35 KiB
35 KiB
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
def load_train_data(file_path):
labels = []
texts = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
parts = line.split('\t', 1)
if len(parts) == 2:
label, text = parts
labels.append(int(label))
texts.append(text.strip())
return pd.DataFrame({'label': labels, 'text': texts})
def load_data(file_path):
texts = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
texts.append(line.strip())
return pd.DataFrame({'text': texts})
def load_labels(file_path):
labels = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
labels.append(int(line.strip()))
return pd.DataFrame({'label': labels})
def get_average_word2vec(tokens_list, model, k=100):
vec = np.zeros(k)
count = 0
for word in tokens_list:
if word in model.wv:
vec += model.wv[word]
count += 1
if count != 0:
vec /= count
return vec
def preprocess_data(file_path, word2vec_model):
data = load_data(file_path)
X = np.array([get_average_word2vec(text.split(), word2vec_model) for text in data['text']])
return X
train_data = load_train_data('train/train.tsv')
sentences = [text.split() for text in train_data['text']]
word2vec_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
X_train = np.array([get_average_word2vec(text.split(), word2vec_model) for text in train_data['text']])
y_train = np.array(train_data['label'])
X_dev = preprocess_data('dev-0/in.tsv', word2vec_model)
dev_labels = load_labels('dev-0/expected.tsv')
model = Sequential()
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_dev, dev_labels))
Epoch 1/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 880us/step - accuracy: 0.9494 - loss: 0.1326 - val_accuracy: 0.9718 - val_loss: 0.0791 Epoch 2/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 879us/step - accuracy: 0.9693 - loss: 0.0806 - val_accuracy: 0.9714 - val_loss: 0.0764 Epoch 3/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 874us/step - accuracy: 0.9710 - loss: 0.0749 - val_accuracy: 0.9727 - val_loss: 0.0743 Epoch 4/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 872us/step - accuracy: 0.9720 - loss: 0.0740 - val_accuracy: 0.9725 - val_loss: 0.0725 Epoch 5/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 854us/step - accuracy: 0.9723 - loss: 0.0718 - val_accuracy: 0.9732 - val_loss: 0.0709 Epoch 6/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 855us/step - accuracy: 0.9737 - loss: 0.0687 - val_accuracy: 0.9685 - val_loss: 0.0921 Epoch 7/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 859us/step - accuracy: 0.9734 - loss: 0.0670 - val_accuracy: 0.9723 - val_loss: 0.0737 Epoch 8/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 867us/step - accuracy: 0.9755 - loss: 0.0636 - val_accuracy: 0.9730 - val_loss: 0.0725 Epoch 9/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 854us/step - accuracy: 0.9757 - loss: 0.0625 - val_accuracy: 0.9719 - val_loss: 0.0731 Epoch 10/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 872us/step - accuracy: 0.9766 - loss: 0.0604 - val_accuracy: 0.9718 - val_loss: 0.0751 Epoch 11/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 832us/step - accuracy: 0.9769 - loss: 0.0595 - val_accuracy: 0.9729 - val_loss: 0.0736 Epoch 12/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 909us/step - accuracy: 0.9785 - loss: 0.0571 - val_accuracy: 0.9723 - val_loss: 0.0735 Epoch 13/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 825us/step - accuracy: 0.9787 - loss: 0.0560 - val_accuracy: 0.9723 - val_loss: 0.0735 Epoch 14/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 819us/step - accuracy: 0.9787 - loss: 0.0543 - val_accuracy: 0.9727 - val_loss: 0.0741 Epoch 15/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 817us/step - accuracy: 0.9790 - loss: 0.0544 - val_accuracy: 0.9719 - val_loss: 0.0740 Epoch 16/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 829us/step - accuracy: 0.9788 - loss: 0.0539 - val_accuracy: 0.9729 - val_loss: 0.0748 Epoch 17/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 817us/step - accuracy: 0.9798 - loss: 0.0524 - val_accuracy: 0.9729 - val_loss: 0.0727 Epoch 18/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 818us/step - accuracy: 0.9810 - loss: 0.0503 - val_accuracy: 0.9710 - val_loss: 0.0782 Epoch 19/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 829us/step - accuracy: 0.9788 - loss: 0.0530 - val_accuracy: 0.9699 - val_loss: 0.0773 Epoch 20/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 838us/step - accuracy: 0.9803 - loss: 0.0512 - val_accuracy: 0.9714 - val_loss: 0.0747 Epoch 21/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 824us/step - accuracy: 0.9802 - loss: 0.0513 - val_accuracy: 0.9723 - val_loss: 0.0795 Epoch 22/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 824us/step - accuracy: 0.9810 - loss: 0.0483 - val_accuracy: 0.9727 - val_loss: 0.0775 Epoch 23/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 819us/step - accuracy: 0.9814 - loss: 0.0473 - val_accuracy: 0.9716 - val_loss: 0.0835 Epoch 24/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 825us/step - accuracy: 0.9810 - loss: 0.0480 - val_accuracy: 0.9710 - val_loss: 0.0767 Epoch 25/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 824us/step - accuracy: 0.9815 - loss: 0.0471 - val_accuracy: 0.9712 - val_loss: 0.0803 Epoch 26/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 826us/step - accuracy: 0.9829 - loss: 0.0449 - val_accuracy: 0.9707 - val_loss: 0.0811 Epoch 27/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 821us/step - accuracy: 0.9823 - loss: 0.0449 - val_accuracy: 0.9697 - val_loss: 0.0813 Epoch 28/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 821us/step - accuracy: 0.9829 - loss: 0.0432 - val_accuracy: 0.9719 - val_loss: 0.0803 Epoch 29/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 826us/step - accuracy: 0.9828 - loss: 0.0433 - val_accuracy: 0.9705 - val_loss: 0.0884 Epoch 30/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 873us/step - accuracy: 0.9832 - loss: 0.0425 - val_accuracy: 0.9707 - val_loss: 0.0855 Epoch 31/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 866us/step - accuracy: 0.9829 - loss: 0.0433 - val_accuracy: 0.9707 - val_loss: 0.0845 Epoch 32/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 857us/step - accuracy: 0.9830 - loss: 0.0430 - val_accuracy: 0.9727 - val_loss: 0.0840 Epoch 33/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 870us/step - accuracy: 0.9835 - loss: 0.0406 - val_accuracy: 0.9661 - val_loss: 0.0911 Epoch 34/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 874us/step - accuracy: 0.9840 - loss: 0.0407 - val_accuracy: 0.9707 - val_loss: 0.0866 Epoch 35/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 897us/step - accuracy: 0.9841 - loss: 0.0400 - val_accuracy: 0.9718 - val_loss: 0.0807 Epoch 36/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 859us/step - accuracy: 0.9841 - loss: 0.0399 - val_accuracy: 0.9696 - val_loss: 0.0841 Epoch 37/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 873us/step - accuracy: 0.9850 - loss: 0.0390 - val_accuracy: 0.9734 - val_loss: 0.0892 Epoch 38/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 866us/step - accuracy: 0.9847 - loss: 0.0378 - val_accuracy: 0.9690 - val_loss: 0.0956 Epoch 39/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 864us/step - accuracy: 0.9851 - loss: 0.0377 - val_accuracy: 0.9708 - val_loss: 0.0889 Epoch 40/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 863us/step - accuracy: 0.9852 - loss: 0.0377 - val_accuracy: 0.9725 - val_loss: 0.0888 Epoch 41/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 857us/step - accuracy: 0.9858 - loss: 0.0360 - val_accuracy: 0.9718 - val_loss: 0.0914 Epoch 42/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 859us/step - accuracy: 0.9844 - loss: 0.0376 - val_accuracy: 0.9699 - val_loss: 0.0980 Epoch 43/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 866us/step - accuracy: 0.9857 - loss: 0.0362 - val_accuracy: 0.9699 - val_loss: 0.0922 Epoch 44/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 860us/step - accuracy: 0.9858 - loss: 0.0368 - val_accuracy: 0.9701 - val_loss: 0.0956 Epoch 45/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 852us/step - accuracy: 0.9862 - loss: 0.0354 - val_accuracy: 0.9690 - val_loss: 0.0942 Epoch 46/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 862us/step - accuracy: 0.9869 - loss: 0.0331 - val_accuracy: 0.9690 - val_loss: 0.0977 Epoch 47/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 885us/step - accuracy: 0.9865 - loss: 0.0334 - val_accuracy: 0.9712 - val_loss: 0.0947 Epoch 48/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 870us/step - accuracy: 0.9871 - loss: 0.0338 - val_accuracy: 0.9699 - val_loss: 0.0983 Epoch 49/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 855us/step - accuracy: 0.9865 - loss: 0.0335 - val_accuracy: 0.9708 - val_loss: 0.1039 Epoch 50/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 864us/step - accuracy: 0.9865 - loss: 0.0338 - val_accuracy: 0.9705 - val_loss: 0.1021 Epoch 51/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 859us/step - accuracy: 0.9867 - loss: 0.0336 - val_accuracy: 0.9705 - val_loss: 0.1011 Epoch 52/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 854us/step - accuracy: 0.9871 - loss: 0.0321 - val_accuracy: 0.9692 - val_loss: 0.1045 Epoch 53/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 885us/step - accuracy: 0.9878 - loss: 0.0310 - val_accuracy: 0.9686 - val_loss: 0.1098 Epoch 54/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 877us/step - accuracy: 0.9870 - loss: 0.0318 - val_accuracy: 0.9701 - val_loss: 0.1042 Epoch 55/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 864us/step - accuracy: 0.9883 - loss: 0.0290 - val_accuracy: 0.9690 - val_loss: 0.1131 Epoch 56/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 866us/step - accuracy: 0.9884 - loss: 0.0298 - val_accuracy: 0.9697 - val_loss: 0.1078 Epoch 57/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 859us/step - accuracy: 0.9879 - loss: 0.0296 - val_accuracy: 0.9683 - val_loss: 0.1089 Epoch 58/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 891us/step - accuracy: 0.9881 - loss: 0.0302 - val_accuracy: 0.9707 - val_loss: 0.1103 Epoch 59/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 869us/step - accuracy: 0.9878 - loss: 0.0307 - val_accuracy: 0.9690 - val_loss: 0.1105 Epoch 60/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 859us/step - accuracy: 0.9873 - loss: 0.0317 - val_accuracy: 0.9685 - val_loss: 0.1166 Epoch 61/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 867us/step - accuracy: 0.9879 - loss: 0.0291 - val_accuracy: 0.9710 - val_loss: 0.1139 Epoch 62/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 853us/step - accuracy: 0.9878 - loss: 0.0287 - val_accuracy: 0.9705 - val_loss: 0.1148 Epoch 63/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 859us/step - accuracy: 0.9886 - loss: 0.0283 - val_accuracy: 0.9679 - val_loss: 0.1263 Epoch 64/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 861us/step - accuracy: 0.9884 - loss: 0.0283 - val_accuracy: 0.9701 - val_loss: 0.1200 Epoch 65/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 869us/step - accuracy: 0.9886 - loss: 0.0283 - val_accuracy: 0.9692 - val_loss: 0.1217 Epoch 66/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 858us/step - accuracy: 0.9895 - loss: 0.0262 - val_accuracy: 0.9701 - val_loss: 0.1157 Epoch 67/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 851us/step - accuracy: 0.9890 - loss: 0.0259 - val_accuracy: 0.9683 - val_loss: 0.1164 Epoch 68/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 852us/step - accuracy: 0.9891 - loss: 0.0265 - val_accuracy: 0.9685 - val_loss: 0.1275 Epoch 69/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 870us/step - accuracy: 0.9888 - loss: 0.0268 - val_accuracy: 0.9679 - val_loss: 0.1218 Epoch 70/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 862us/step - accuracy: 0.9892 - loss: 0.0268 - val_accuracy: 0.9694 - val_loss: 0.1320 Epoch 71/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 855us/step - accuracy: 0.9895 - loss: 0.0254 - val_accuracy: 0.9694 - val_loss: 0.1236 Epoch 72/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 856us/step - accuracy: 0.9895 - loss: 0.0251 - val_accuracy: 0.9708 - val_loss: 0.1271 Epoch 73/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 860us/step - accuracy: 0.9897 - loss: 0.0254 - val_accuracy: 0.9703 - val_loss: 0.1363 Epoch 74/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 861us/step - accuracy: 0.9895 - loss: 0.0257 - val_accuracy: 0.9705 - val_loss: 0.1315 Epoch 75/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 869us/step - accuracy: 0.9891 - loss: 0.0254 - val_accuracy: 0.9683 - val_loss: 0.1385 Epoch 76/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 886us/step - accuracy: 0.9899 - loss: 0.0247 - val_accuracy: 0.9681 - val_loss: 0.1324 Epoch 77/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 851us/step - accuracy: 0.9896 - loss: 0.0247 - val_accuracy: 0.9697 - val_loss: 0.1502 Epoch 78/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 860us/step - accuracy: 0.9901 - loss: 0.0252 - val_accuracy: 0.9701 - val_loss: 0.1304 Epoch 79/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 853us/step - accuracy: 0.9896 - loss: 0.0246 - val_accuracy: 0.9688 - val_loss: 0.1537 Epoch 80/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 882us/step - accuracy: 0.9901 - loss: 0.0239 - val_accuracy: 0.9705 - val_loss: 0.1453 Epoch 81/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 851us/step - accuracy: 0.9902 - loss: 0.0236 - val_accuracy: 0.9696 - val_loss: 0.1482 Epoch 82/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 862us/step - accuracy: 0.9901 - loss: 0.0237 - val_accuracy: 0.9672 - val_loss: 0.1538 Epoch 83/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 853us/step - accuracy: 0.9904 - loss: 0.0234 - val_accuracy: 0.9679 - val_loss: 0.1426 Epoch 84/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 858us/step - accuracy: 0.9903 - loss: 0.0234 - val_accuracy: 0.9692 - val_loss: 0.1495 Epoch 85/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 868us/step - accuracy: 0.9905 - loss: 0.0232 - val_accuracy: 0.9690 - val_loss: 0.1374 Epoch 86/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 861us/step - accuracy: 0.9903 - loss: 0.0232 - val_accuracy: 0.9644 - val_loss: 0.1494 Epoch 87/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 860us/step - accuracy: 0.9907 - loss: 0.0232 - val_accuracy: 0.9675 - val_loss: 0.1575 Epoch 88/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 860us/step - accuracy: 0.9908 - loss: 0.0215 - val_accuracy: 0.9685 - val_loss: 0.1655 Epoch 89/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 853us/step - accuracy: 0.9910 - loss: 0.0213 - val_accuracy: 0.9668 - val_loss: 0.1522 Epoch 90/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 890us/step - accuracy: 0.9909 - loss: 0.0213 - val_accuracy: 0.9670 - val_loss: 0.1697 Epoch 91/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 854us/step - accuracy: 0.9910 - loss: 0.0208 - val_accuracy: 0.9679 - val_loss: 0.1665 Epoch 92/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 861us/step - accuracy: 0.9904 - loss: 0.0224 - val_accuracy: 0.9685 - val_loss: 0.1520 Epoch 93/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 878us/step - accuracy: 0.9906 - loss: 0.0217 - val_accuracy: 0.9674 - val_loss: 0.1594 Epoch 94/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 848us/step - accuracy: 0.9917 - loss: 0.0205 - val_accuracy: 0.9655 - val_loss: 0.1640 Epoch 95/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 866us/step - accuracy: 0.9912 - loss: 0.0214 - val_accuracy: 0.9677 - val_loss: 0.1560 Epoch 96/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 851us/step - accuracy: 0.9912 - loss: 0.0213 - val_accuracy: 0.9679 - val_loss: 0.1666 Epoch 97/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 869us/step - accuracy: 0.9917 - loss: 0.0209 - val_accuracy: 0.9675 - val_loss: 0.1539 Epoch 98/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 886us/step - accuracy: 0.9915 - loss: 0.0206 - val_accuracy: 0.9683 - val_loss: 0.1764 Epoch 99/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 899us/step - accuracy: 0.9918 - loss: 0.0199 - val_accuracy: 0.9677 - val_loss: 0.1634 Epoch 100/100 [1m3067/3067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 854us/step - accuracy: 0.9914 - loss: 0.0197 - val_accuracy: 0.9659 - val_loss: 0.1713
<keras.src.callbacks.history.History at 0x2a5e3e7da50>
loss, accuracy = model.evaluate(X_dev, dev_labels)
print(f"Accuracy: {accuracy}")
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 660us/step - accuracy: 0.9641 - loss: 0.1876 Accuracy: 0.9658840894699097
dev0_pred = model.predict(X_dev)
dev0_pred = (dev0_pred > 0.5).astype(int)
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 794us/step
dev0_pred = pd.DataFrame(dev0_pred)
dev0_pred.to_csv("dev-0/out.tsv", index=False, header=False)
X_testA = preprocess_data('test-A/in.tsv', word2vec_model)
testA_pred = model.predict(X_testA)
testA_pred = (testA_pred > 0.5).astype(int)
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 630us/step
testA_pred = pd.DataFrame(testA_pred)
testA_pred.to_csv("test-A/out.tsv", index=False, header=False)