word2vec/word2vec.ipynb

35 KiB
Raw Permalink Blame History

import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
def load_train_data(file_path):
    labels = []
    texts = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.split('\t', 1)
            if len(parts) == 2:
                label, text = parts
                labels.append(int(label))
                texts.append(text.strip())
    return pd.DataFrame({'label': labels, 'text': texts})
def load_data(file_path):
    texts = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            texts.append(line.strip())
    return pd.DataFrame({'text': texts})
def load_labels(file_path):
    labels = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            labels.append(int(line.strip()))
    return pd.DataFrame({'label': labels})
def get_average_word2vec(tokens_list, model, k=100):
    vec = np.zeros(k)
    count = 0
    for word in tokens_list:
        if word in model.wv:
            vec += model.wv[word]
            count += 1
    if count != 0:
        vec /= count
    return vec
def preprocess_data(file_path, word2vec_model):
    data = load_data(file_path)
    X = np.array([get_average_word2vec(text.split(), word2vec_model) for text in data['text']])
    return X
train_data = load_train_data('train/train.tsv')
sentences = [text.split() for text in train_data['text']]
word2vec_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
X_train = np.array([get_average_word2vec(text.split(), word2vec_model) for text in train_data['text']])
y_train = np.array(train_data['label'])
X_dev = preprocess_data('dev-0/in.tsv', word2vec_model)
dev_labels = load_labels('dev-0/expected.tsv')
model = Sequential()
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_dev, dev_labels))
Epoch 1/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 4s 880us/step - accuracy: 0.9494 - loss: 0.1326 - val_accuracy: 0.9718 - val_loss: 0.0791
Epoch 2/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 879us/step - accuracy: 0.9693 - loss: 0.0806 - val_accuracy: 0.9714 - val_loss: 0.0764
Epoch 3/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 874us/step - accuracy: 0.9710 - loss: 0.0749 - val_accuracy: 0.9727 - val_loss: 0.0743
Epoch 4/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 872us/step - accuracy: 0.9720 - loss: 0.0740 - val_accuracy: 0.9725 - val_loss: 0.0725
Epoch 5/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 854us/step - accuracy: 0.9723 - loss: 0.0718 - val_accuracy: 0.9732 - val_loss: 0.0709
Epoch 6/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 855us/step - accuracy: 0.9737 - loss: 0.0687 - val_accuracy: 0.9685 - val_loss: 0.0921
Epoch 7/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 859us/step - accuracy: 0.9734 - loss: 0.0670 - val_accuracy: 0.9723 - val_loss: 0.0737
Epoch 8/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 867us/step - accuracy: 0.9755 - loss: 0.0636 - val_accuracy: 0.9730 - val_loss: 0.0725
Epoch 9/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 854us/step - accuracy: 0.9757 - loss: 0.0625 - val_accuracy: 0.9719 - val_loss: 0.0731
Epoch 10/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 872us/step - accuracy: 0.9766 - loss: 0.0604 - val_accuracy: 0.9718 - val_loss: 0.0751
Epoch 11/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 832us/step - accuracy: 0.9769 - loss: 0.0595 - val_accuracy: 0.9729 - val_loss: 0.0736
Epoch 12/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 909us/step - accuracy: 0.9785 - loss: 0.0571 - val_accuracy: 0.9723 - val_loss: 0.0735
Epoch 13/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 825us/step - accuracy: 0.9787 - loss: 0.0560 - val_accuracy: 0.9723 - val_loss: 0.0735
Epoch 14/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 819us/step - accuracy: 0.9787 - loss: 0.0543 - val_accuracy: 0.9727 - val_loss: 0.0741
Epoch 15/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 817us/step - accuracy: 0.9790 - loss: 0.0544 - val_accuracy: 0.9719 - val_loss: 0.0740
Epoch 16/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 829us/step - accuracy: 0.9788 - loss: 0.0539 - val_accuracy: 0.9729 - val_loss: 0.0748
Epoch 17/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 817us/step - accuracy: 0.9798 - loss: 0.0524 - val_accuracy: 0.9729 - val_loss: 0.0727
Epoch 18/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 818us/step - accuracy: 0.9810 - loss: 0.0503 - val_accuracy: 0.9710 - val_loss: 0.0782
Epoch 19/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 829us/step - accuracy: 0.9788 - loss: 0.0530 - val_accuracy: 0.9699 - val_loss: 0.0773
Epoch 20/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 838us/step - accuracy: 0.9803 - loss: 0.0512 - val_accuracy: 0.9714 - val_loss: 0.0747
Epoch 21/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 824us/step - accuracy: 0.9802 - loss: 0.0513 - val_accuracy: 0.9723 - val_loss: 0.0795
Epoch 22/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 824us/step - accuracy: 0.9810 - loss: 0.0483 - val_accuracy: 0.9727 - val_loss: 0.0775
Epoch 23/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 819us/step - accuracy: 0.9814 - loss: 0.0473 - val_accuracy: 0.9716 - val_loss: 0.0835
Epoch 24/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 825us/step - accuracy: 0.9810 - loss: 0.0480 - val_accuracy: 0.9710 - val_loss: 0.0767
Epoch 25/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 824us/step - accuracy: 0.9815 - loss: 0.0471 - val_accuracy: 0.9712 - val_loss: 0.0803
Epoch 26/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 826us/step - accuracy: 0.9829 - loss: 0.0449 - val_accuracy: 0.9707 - val_loss: 0.0811
Epoch 27/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 821us/step - accuracy: 0.9823 - loss: 0.0449 - val_accuracy: 0.9697 - val_loss: 0.0813
Epoch 28/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 821us/step - accuracy: 0.9829 - loss: 0.0432 - val_accuracy: 0.9719 - val_loss: 0.0803
Epoch 29/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 826us/step - accuracy: 0.9828 - loss: 0.0433 - val_accuracy: 0.9705 - val_loss: 0.0884
Epoch 30/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 873us/step - accuracy: 0.9832 - loss: 0.0425 - val_accuracy: 0.9707 - val_loss: 0.0855
Epoch 31/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 866us/step - accuracy: 0.9829 - loss: 0.0433 - val_accuracy: 0.9707 - val_loss: 0.0845
Epoch 32/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 857us/step - accuracy: 0.9830 - loss: 0.0430 - val_accuracy: 0.9727 - val_loss: 0.0840
Epoch 33/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 870us/step - accuracy: 0.9835 - loss: 0.0406 - val_accuracy: 0.9661 - val_loss: 0.0911
Epoch 34/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 874us/step - accuracy: 0.9840 - loss: 0.0407 - val_accuracy: 0.9707 - val_loss: 0.0866
Epoch 35/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 897us/step - accuracy: 0.9841 - loss: 0.0400 - val_accuracy: 0.9718 - val_loss: 0.0807
Epoch 36/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 859us/step - accuracy: 0.9841 - loss: 0.0399 - val_accuracy: 0.9696 - val_loss: 0.0841
Epoch 37/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 873us/step - accuracy: 0.9850 - loss: 0.0390 - val_accuracy: 0.9734 - val_loss: 0.0892
Epoch 38/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 866us/step - accuracy: 0.9847 - loss: 0.0378 - val_accuracy: 0.9690 - val_loss: 0.0956
Epoch 39/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 864us/step - accuracy: 0.9851 - loss: 0.0377 - val_accuracy: 0.9708 - val_loss: 0.0889
Epoch 40/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 863us/step - accuracy: 0.9852 - loss: 0.0377 - val_accuracy: 0.9725 - val_loss: 0.0888
Epoch 41/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 857us/step - accuracy: 0.9858 - loss: 0.0360 - val_accuracy: 0.9718 - val_loss: 0.0914
Epoch 42/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 859us/step - accuracy: 0.9844 - loss: 0.0376 - val_accuracy: 0.9699 - val_loss: 0.0980
Epoch 43/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 866us/step - accuracy: 0.9857 - loss: 0.0362 - val_accuracy: 0.9699 - val_loss: 0.0922
Epoch 44/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 860us/step - accuracy: 0.9858 - loss: 0.0368 - val_accuracy: 0.9701 - val_loss: 0.0956
Epoch 45/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 852us/step - accuracy: 0.9862 - loss: 0.0354 - val_accuracy: 0.9690 - val_loss: 0.0942
Epoch 46/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 862us/step - accuracy: 0.9869 - loss: 0.0331 - val_accuracy: 0.9690 - val_loss: 0.0977
Epoch 47/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 885us/step - accuracy: 0.9865 - loss: 0.0334 - val_accuracy: 0.9712 - val_loss: 0.0947
Epoch 48/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 870us/step - accuracy: 0.9871 - loss: 0.0338 - val_accuracy: 0.9699 - val_loss: 0.0983
Epoch 49/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 855us/step - accuracy: 0.9865 - loss: 0.0335 - val_accuracy: 0.9708 - val_loss: 0.1039
Epoch 50/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 864us/step - accuracy: 0.9865 - loss: 0.0338 - val_accuracy: 0.9705 - val_loss: 0.1021
Epoch 51/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 859us/step - accuracy: 0.9867 - loss: 0.0336 - val_accuracy: 0.9705 - val_loss: 0.1011
Epoch 52/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 854us/step - accuracy: 0.9871 - loss: 0.0321 - val_accuracy: 0.9692 - val_loss: 0.1045
Epoch 53/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 885us/step - accuracy: 0.9878 - loss: 0.0310 - val_accuracy: 0.9686 - val_loss: 0.1098
Epoch 54/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 877us/step - accuracy: 0.9870 - loss: 0.0318 - val_accuracy: 0.9701 - val_loss: 0.1042
Epoch 55/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 864us/step - accuracy: 0.9883 - loss: 0.0290 - val_accuracy: 0.9690 - val_loss: 0.1131
Epoch 56/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 866us/step - accuracy: 0.9884 - loss: 0.0298 - val_accuracy: 0.9697 - val_loss: 0.1078
Epoch 57/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 859us/step - accuracy: 0.9879 - loss: 0.0296 - val_accuracy: 0.9683 - val_loss: 0.1089
Epoch 58/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 891us/step - accuracy: 0.9881 - loss: 0.0302 - val_accuracy: 0.9707 - val_loss: 0.1103
Epoch 59/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 869us/step - accuracy: 0.9878 - loss: 0.0307 - val_accuracy: 0.9690 - val_loss: 0.1105
Epoch 60/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 859us/step - accuracy: 0.9873 - loss: 0.0317 - val_accuracy: 0.9685 - val_loss: 0.1166
Epoch 61/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 867us/step - accuracy: 0.9879 - loss: 0.0291 - val_accuracy: 0.9710 - val_loss: 0.1139
Epoch 62/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 853us/step - accuracy: 0.9878 - loss: 0.0287 - val_accuracy: 0.9705 - val_loss: 0.1148
Epoch 63/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 859us/step - accuracy: 0.9886 - loss: 0.0283 - val_accuracy: 0.9679 - val_loss: 0.1263
Epoch 64/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 861us/step - accuracy: 0.9884 - loss: 0.0283 - val_accuracy: 0.9701 - val_loss: 0.1200
Epoch 65/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 869us/step - accuracy: 0.9886 - loss: 0.0283 - val_accuracy: 0.9692 - val_loss: 0.1217
Epoch 66/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 858us/step - accuracy: 0.9895 - loss: 0.0262 - val_accuracy: 0.9701 - val_loss: 0.1157
Epoch 67/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 851us/step - accuracy: 0.9890 - loss: 0.0259 - val_accuracy: 0.9683 - val_loss: 0.1164
Epoch 68/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 852us/step - accuracy: 0.9891 - loss: 0.0265 - val_accuracy: 0.9685 - val_loss: 0.1275
Epoch 69/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 870us/step - accuracy: 0.9888 - loss: 0.0268 - val_accuracy: 0.9679 - val_loss: 0.1218
Epoch 70/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 862us/step - accuracy: 0.9892 - loss: 0.0268 - val_accuracy: 0.9694 - val_loss: 0.1320
Epoch 71/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 855us/step - accuracy: 0.9895 - loss: 0.0254 - val_accuracy: 0.9694 - val_loss: 0.1236
Epoch 72/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 856us/step - accuracy: 0.9895 - loss: 0.0251 - val_accuracy: 0.9708 - val_loss: 0.1271
Epoch 73/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 860us/step - accuracy: 0.9897 - loss: 0.0254 - val_accuracy: 0.9703 - val_loss: 0.1363
Epoch 74/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 861us/step - accuracy: 0.9895 - loss: 0.0257 - val_accuracy: 0.9705 - val_loss: 0.1315
Epoch 75/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 869us/step - accuracy: 0.9891 - loss: 0.0254 - val_accuracy: 0.9683 - val_loss: 0.1385
Epoch 76/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 886us/step - accuracy: 0.9899 - loss: 0.0247 - val_accuracy: 0.9681 - val_loss: 0.1324
Epoch 77/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 851us/step - accuracy: 0.9896 - loss: 0.0247 - val_accuracy: 0.9697 - val_loss: 0.1502
Epoch 78/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 860us/step - accuracy: 0.9901 - loss: 0.0252 - val_accuracy: 0.9701 - val_loss: 0.1304
Epoch 79/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 853us/step - accuracy: 0.9896 - loss: 0.0246 - val_accuracy: 0.9688 - val_loss: 0.1537
Epoch 80/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 882us/step - accuracy: 0.9901 - loss: 0.0239 - val_accuracy: 0.9705 - val_loss: 0.1453
Epoch 81/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 851us/step - accuracy: 0.9902 - loss: 0.0236 - val_accuracy: 0.9696 - val_loss: 0.1482
Epoch 82/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 862us/step - accuracy: 0.9901 - loss: 0.0237 - val_accuracy: 0.9672 - val_loss: 0.1538
Epoch 83/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 853us/step - accuracy: 0.9904 - loss: 0.0234 - val_accuracy: 0.9679 - val_loss: 0.1426
Epoch 84/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 858us/step - accuracy: 0.9903 - loss: 0.0234 - val_accuracy: 0.9692 - val_loss: 0.1495
Epoch 85/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 868us/step - accuracy: 0.9905 - loss: 0.0232 - val_accuracy: 0.9690 - val_loss: 0.1374
Epoch 86/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 861us/step - accuracy: 0.9903 - loss: 0.0232 - val_accuracy: 0.9644 - val_loss: 0.1494
Epoch 87/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 860us/step - accuracy: 0.9907 - loss: 0.0232 - val_accuracy: 0.9675 - val_loss: 0.1575
Epoch 88/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 860us/step - accuracy: 0.9908 - loss: 0.0215 - val_accuracy: 0.9685 - val_loss: 0.1655
Epoch 89/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 853us/step - accuracy: 0.9910 - loss: 0.0213 - val_accuracy: 0.9668 - val_loss: 0.1522
Epoch 90/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 890us/step - accuracy: 0.9909 - loss: 0.0213 - val_accuracy: 0.9670 - val_loss: 0.1697
Epoch 91/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 854us/step - accuracy: 0.9910 - loss: 0.0208 - val_accuracy: 0.9679 - val_loss: 0.1665
Epoch 92/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 861us/step - accuracy: 0.9904 - loss: 0.0224 - val_accuracy: 0.9685 - val_loss: 0.1520
Epoch 93/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 878us/step - accuracy: 0.9906 - loss: 0.0217 - val_accuracy: 0.9674 - val_loss: 0.1594
Epoch 94/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 848us/step - accuracy: 0.9917 - loss: 0.0205 - val_accuracy: 0.9655 - val_loss: 0.1640
Epoch 95/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 866us/step - accuracy: 0.9912 - loss: 0.0214 - val_accuracy: 0.9677 - val_loss: 0.1560
Epoch 96/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 851us/step - accuracy: 0.9912 - loss: 0.0213 - val_accuracy: 0.9679 - val_loss: 0.1666
Epoch 97/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 869us/step - accuracy: 0.9917 - loss: 0.0209 - val_accuracy: 0.9675 - val_loss: 0.1539
Epoch 98/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 886us/step - accuracy: 0.9915 - loss: 0.0206 - val_accuracy: 0.9683 - val_loss: 0.1764
Epoch 99/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 899us/step - accuracy: 0.9918 - loss: 0.0199 - val_accuracy: 0.9677 - val_loss: 0.1634
Epoch 100/100
3067/3067 ━━━━━━━━━━━━━━━━━━━━ 3s 854us/step - accuracy: 0.9914 - loss: 0.0197 - val_accuracy: 0.9659 - val_loss: 0.1713
<keras.src.callbacks.history.History at 0x2a5e3e7da50>
loss, accuracy = model.evaluate(X_dev, dev_labels)
print(f"Accuracy: {accuracy}")
171/171 ━━━━━━━━━━━━━━━━━━━━ 0s 660us/step - accuracy: 0.9641 - loss: 0.1876
Accuracy: 0.9658840894699097
dev0_pred = model.predict(X_dev)
dev0_pred = (dev0_pred > 0.5).astype(int)
171/171 ━━━━━━━━━━━━━━━━━━━━ 0s 794us/step
dev0_pred = pd.DataFrame(dev0_pred)
dev0_pred.to_csv("dev-0/out.tsv", index=False, header=False)
X_testA = preprocess_data('test-A/in.tsv', word2vec_model)
testA_pred = model.predict(X_testA)
testA_pred = (testA_pred > 0.5).astype(int)
171/171 ━━━━━━━━━━━━━━━━━━━━ 0s 630us/step
testA_pred = pd.DataFrame(testA_pred)
testA_pred.to_csv("test-A/out.tsv", index=False, header=False)