{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-05-19T22:00:19.457327Z", "start_time": "2024-05-19T22:00:16.498540Z" } }, "source": [ "import pandas as pd\n", "import numpy as np\n", "from gensim.models import Word2Vec\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense" ], "outputs": [], "execution_count": 1 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:00:19.463374Z", "start_time": "2024-05-19T22:00:19.458329Z" } }, "cell_type": "code", "source": [ "def load_train_data(file_path):\n", " labels = []\n", " texts = []\n", " with open(file_path, 'r', encoding='utf-8') as file:\n", " for line in file:\n", " parts = line.split('\\t', 1)\n", " if len(parts) == 2:\n", " label, text = parts\n", " labels.append(int(label))\n", " texts.append(text.strip())\n", " return pd.DataFrame({'label': labels, 'text': texts})" ], "id": "3224192629baf09c", "outputs": [], "execution_count": 2 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:00:19.472523Z", "start_time": "2024-05-19T22:00:19.464342Z" } }, "cell_type": "code", "source": [ "def load_data(file_path):\n", " texts = []\n", " with open(file_path, 'r', encoding='utf-8') as file:\n", " for line in file:\n", " texts.append(line.strip())\n", " return pd.DataFrame({'text': texts})" ], "id": "1a8254758b45c975", "outputs": [], "execution_count": 3 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:00:19.477364Z", "start_time": "2024-05-19T22:00:19.473525Z" } }, "cell_type": "code", "source": [ "def load_labels(file_path):\n", " labels = []\n", " with open(file_path, 'r', encoding='utf-8') as file:\n", " for line in file:\n", " labels.append(int(line.strip()))\n", " return pd.DataFrame({'label': labels})" ], "id": "f2aaa6dba99b1ff6", "outputs": [], "execution_count": 4 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:00:19.483099Z", "start_time": "2024-05-19T22:00:19.479367Z" } }, "cell_type": "code", "source": [ "def get_average_word2vec(tokens_list, model, k=100):\n", " vec = np.zeros(k)\n", " count = 0\n", " for word in tokens_list:\n", " if word in model.wv:\n", " vec += model.wv[word]\n", " count += 1\n", " if count != 0:\n", " vec /= count\n", " return vec" ], "id": "139f942641e876f", "outputs": [], "execution_count": 5 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:00:19.487397Z", "start_time": "2024-05-19T22:00:19.484101Z" } }, "cell_type": "code", "source": [ "def preprocess_data(file_path, word2vec_model):\n", " data = load_data(file_path)\n", " X = np.array([get_average_word2vec(text.split(), word2vec_model) for text in data['text']])\n", " return X" ], "id": "98797563dcad610b", "outputs": [], "execution_count": 6 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:00:33.384817Z", "start_time": "2024-05-19T22:00:19.488400Z" } }, "cell_type": "code", "source": [ "train_data = load_train_data('train/train.tsv')\n", "sentences = [text.split() for text in train_data['text']]\n", "word2vec_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)" ], "id": "a821ea109f28215b", "outputs": [], "execution_count": 7 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:00:42.574909Z", "start_time": "2024-05-19T22:00:33.385840Z" } }, "cell_type": "code", "source": [ "X_train = np.array([get_average_word2vec(text.split(), word2vec_model) for text in train_data['text']])\n", "y_train = np.array(train_data['label'])\n", "X_dev = preprocess_data('dev-0/in.tsv', word2vec_model)\n", "dev_labels = load_labels('dev-0/expected.tsv')" ], "id": "3cabe99193dd82ad", "outputs": [], "execution_count": 8 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:05:09.554039Z", "start_time": "2024-05-19T22:00:42.575910Z" } }, "cell_type": "code", "source": [ "model = Sequential()\n", "model.add(Dense(64, activation='relu'))\n", "model.add(Dense(32, activation='relu'))\n", "model.add(Dense(1, activation='sigmoid'))\n", "\n", "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n", "model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_dev, dev_labels))" ], "id": "31e13e484c35a433", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m4s\u001B[0m 880us/step - accuracy: 0.9494 - loss: 0.1326 - val_accuracy: 0.9718 - val_loss: 0.0791\n", "Epoch 2/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 879us/step - accuracy: 0.9693 - loss: 0.0806 - val_accuracy: 0.9714 - val_loss: 0.0764\n", "Epoch 3/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 874us/step - accuracy: 0.9710 - loss: 0.0749 - val_accuracy: 0.9727 - val_loss: 0.0743\n", "Epoch 4/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 872us/step - accuracy: 0.9720 - loss: 0.0740 - val_accuracy: 0.9725 - val_loss: 0.0725\n", "Epoch 5/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 854us/step - accuracy: 0.9723 - loss: 0.0718 - val_accuracy: 0.9732 - val_loss: 0.0709\n", "Epoch 6/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 855us/step - accuracy: 0.9737 - loss: 0.0687 - val_accuracy: 0.9685 - val_loss: 0.0921\n", "Epoch 7/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 859us/step - accuracy: 0.9734 - loss: 0.0670 - val_accuracy: 0.9723 - val_loss: 0.0737\n", "Epoch 8/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 867us/step - accuracy: 0.9755 - loss: 0.0636 - val_accuracy: 0.9730 - val_loss: 0.0725\n", "Epoch 9/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 854us/step - accuracy: 0.9757 - loss: 0.0625 - val_accuracy: 0.9719 - val_loss: 0.0731\n", "Epoch 10/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 872us/step - accuracy: 0.9766 - loss: 0.0604 - val_accuracy: 0.9718 - val_loss: 0.0751\n", "Epoch 11/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 832us/step - accuracy: 0.9769 - loss: 0.0595 - val_accuracy: 0.9729 - val_loss: 0.0736\n", "Epoch 12/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 909us/step - accuracy: 0.9785 - loss: 0.0571 - val_accuracy: 0.9723 - val_loss: 0.0735\n", "Epoch 13/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 825us/step - accuracy: 0.9787 - loss: 0.0560 - val_accuracy: 0.9723 - val_loss: 0.0735\n", "Epoch 14/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 819us/step - accuracy: 0.9787 - loss: 0.0543 - val_accuracy: 0.9727 - val_loss: 0.0741\n", "Epoch 15/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 817us/step - accuracy: 0.9790 - loss: 0.0544 - val_accuracy: 0.9719 - val_loss: 0.0740\n", "Epoch 16/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 829us/step - accuracy: 0.9788 - loss: 0.0539 - val_accuracy: 0.9729 - val_loss: 0.0748\n", "Epoch 17/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 817us/step - accuracy: 0.9798 - loss: 0.0524 - val_accuracy: 0.9729 - val_loss: 0.0727\n", "Epoch 18/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 818us/step - accuracy: 0.9810 - loss: 0.0503 - val_accuracy: 0.9710 - val_loss: 0.0782\n", "Epoch 19/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 829us/step - accuracy: 0.9788 - loss: 0.0530 - val_accuracy: 0.9699 - val_loss: 0.0773\n", "Epoch 20/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 838us/step - accuracy: 0.9803 - loss: 0.0512 - val_accuracy: 0.9714 - val_loss: 0.0747\n", "Epoch 21/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 824us/step - accuracy: 0.9802 - loss: 0.0513 - val_accuracy: 0.9723 - val_loss: 0.0795\n", "Epoch 22/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 824us/step - accuracy: 0.9810 - loss: 0.0483 - val_accuracy: 0.9727 - val_loss: 0.0775\n", "Epoch 23/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 819us/step - accuracy: 0.9814 - loss: 0.0473 - val_accuracy: 0.9716 - val_loss: 0.0835\n", "Epoch 24/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 825us/step - accuracy: 0.9810 - loss: 0.0480 - val_accuracy: 0.9710 - val_loss: 0.0767\n", "Epoch 25/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 824us/step - accuracy: 0.9815 - loss: 0.0471 - val_accuracy: 0.9712 - val_loss: 0.0803\n", "Epoch 26/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 826us/step - accuracy: 0.9829 - loss: 0.0449 - val_accuracy: 0.9707 - val_loss: 0.0811\n", "Epoch 27/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 821us/step - accuracy: 0.9823 - loss: 0.0449 - val_accuracy: 0.9697 - val_loss: 0.0813\n", "Epoch 28/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 821us/step - accuracy: 0.9829 - loss: 0.0432 - val_accuracy: 0.9719 - val_loss: 0.0803\n", "Epoch 29/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 826us/step - accuracy: 0.9828 - loss: 0.0433 - val_accuracy: 0.9705 - val_loss: 0.0884\n", "Epoch 30/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 873us/step - accuracy: 0.9832 - loss: 0.0425 - val_accuracy: 0.9707 - val_loss: 0.0855\n", "Epoch 31/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 866us/step - accuracy: 0.9829 - loss: 0.0433 - val_accuracy: 0.9707 - val_loss: 0.0845\n", "Epoch 32/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 857us/step - accuracy: 0.9830 - loss: 0.0430 - val_accuracy: 0.9727 - val_loss: 0.0840\n", "Epoch 33/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 870us/step - accuracy: 0.9835 - loss: 0.0406 - val_accuracy: 0.9661 - val_loss: 0.0911\n", "Epoch 34/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 874us/step - accuracy: 0.9840 - loss: 0.0407 - val_accuracy: 0.9707 - val_loss: 0.0866\n", "Epoch 35/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 897us/step - accuracy: 0.9841 - loss: 0.0400 - val_accuracy: 0.9718 - val_loss: 0.0807\n", "Epoch 36/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 859us/step - accuracy: 0.9841 - loss: 0.0399 - val_accuracy: 0.9696 - val_loss: 0.0841\n", "Epoch 37/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 873us/step - accuracy: 0.9850 - loss: 0.0390 - val_accuracy: 0.9734 - val_loss: 0.0892\n", "Epoch 38/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 866us/step - accuracy: 0.9847 - loss: 0.0378 - val_accuracy: 0.9690 - val_loss: 0.0956\n", "Epoch 39/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 864us/step - accuracy: 0.9851 - loss: 0.0377 - val_accuracy: 0.9708 - val_loss: 0.0889\n", "Epoch 40/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 863us/step - accuracy: 0.9852 - loss: 0.0377 - val_accuracy: 0.9725 - val_loss: 0.0888\n", "Epoch 41/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 857us/step - accuracy: 0.9858 - loss: 0.0360 - val_accuracy: 0.9718 - val_loss: 0.0914\n", "Epoch 42/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 859us/step - accuracy: 0.9844 - loss: 0.0376 - val_accuracy: 0.9699 - val_loss: 0.0980\n", "Epoch 43/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 866us/step - accuracy: 0.9857 - loss: 0.0362 - val_accuracy: 0.9699 - val_loss: 0.0922\n", "Epoch 44/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 860us/step - accuracy: 0.9858 - loss: 0.0368 - val_accuracy: 0.9701 - val_loss: 0.0956\n", "Epoch 45/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 852us/step - accuracy: 0.9862 - loss: 0.0354 - val_accuracy: 0.9690 - val_loss: 0.0942\n", "Epoch 46/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 862us/step - accuracy: 0.9869 - loss: 0.0331 - val_accuracy: 0.9690 - val_loss: 0.0977\n", "Epoch 47/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 885us/step - accuracy: 0.9865 - loss: 0.0334 - val_accuracy: 0.9712 - val_loss: 0.0947\n", "Epoch 48/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 870us/step - accuracy: 0.9871 - loss: 0.0338 - val_accuracy: 0.9699 - val_loss: 0.0983\n", "Epoch 49/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 855us/step - accuracy: 0.9865 - loss: 0.0335 - val_accuracy: 0.9708 - val_loss: 0.1039\n", "Epoch 50/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 864us/step - accuracy: 0.9865 - loss: 0.0338 - val_accuracy: 0.9705 - val_loss: 0.1021\n", "Epoch 51/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 859us/step - accuracy: 0.9867 - loss: 0.0336 - val_accuracy: 0.9705 - val_loss: 0.1011\n", "Epoch 52/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 854us/step - accuracy: 0.9871 - loss: 0.0321 - val_accuracy: 0.9692 - val_loss: 0.1045\n", "Epoch 53/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 885us/step - accuracy: 0.9878 - loss: 0.0310 - val_accuracy: 0.9686 - val_loss: 0.1098\n", "Epoch 54/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 877us/step - accuracy: 0.9870 - loss: 0.0318 - val_accuracy: 0.9701 - val_loss: 0.1042\n", "Epoch 55/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 864us/step - accuracy: 0.9883 - loss: 0.0290 - val_accuracy: 0.9690 - val_loss: 0.1131\n", "Epoch 56/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 866us/step - accuracy: 0.9884 - loss: 0.0298 - val_accuracy: 0.9697 - val_loss: 0.1078\n", "Epoch 57/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 859us/step - accuracy: 0.9879 - loss: 0.0296 - val_accuracy: 0.9683 - val_loss: 0.1089\n", "Epoch 58/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 891us/step - accuracy: 0.9881 - loss: 0.0302 - val_accuracy: 0.9707 - val_loss: 0.1103\n", "Epoch 59/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 869us/step - accuracy: 0.9878 - loss: 0.0307 - val_accuracy: 0.9690 - val_loss: 0.1105\n", "Epoch 60/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 859us/step - accuracy: 0.9873 - loss: 0.0317 - val_accuracy: 0.9685 - val_loss: 0.1166\n", "Epoch 61/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 867us/step - accuracy: 0.9879 - loss: 0.0291 - val_accuracy: 0.9710 - val_loss: 0.1139\n", "Epoch 62/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 853us/step - accuracy: 0.9878 - loss: 0.0287 - val_accuracy: 0.9705 - val_loss: 0.1148\n", "Epoch 63/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 859us/step - accuracy: 0.9886 - loss: 0.0283 - val_accuracy: 0.9679 - val_loss: 0.1263\n", "Epoch 64/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 861us/step - accuracy: 0.9884 - loss: 0.0283 - val_accuracy: 0.9701 - val_loss: 0.1200\n", "Epoch 65/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 869us/step - accuracy: 0.9886 - loss: 0.0283 - val_accuracy: 0.9692 - val_loss: 0.1217\n", "Epoch 66/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 858us/step - accuracy: 0.9895 - loss: 0.0262 - val_accuracy: 0.9701 - val_loss: 0.1157\n", "Epoch 67/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 851us/step - accuracy: 0.9890 - loss: 0.0259 - val_accuracy: 0.9683 - val_loss: 0.1164\n", "Epoch 68/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 852us/step - accuracy: 0.9891 - loss: 0.0265 - val_accuracy: 0.9685 - val_loss: 0.1275\n", "Epoch 69/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 870us/step - accuracy: 0.9888 - loss: 0.0268 - val_accuracy: 0.9679 - val_loss: 0.1218\n", "Epoch 70/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 862us/step - accuracy: 0.9892 - loss: 0.0268 - val_accuracy: 0.9694 - val_loss: 0.1320\n", "Epoch 71/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 855us/step - accuracy: 0.9895 - loss: 0.0254 - val_accuracy: 0.9694 - val_loss: 0.1236\n", "Epoch 72/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 856us/step - accuracy: 0.9895 - loss: 0.0251 - val_accuracy: 0.9708 - val_loss: 0.1271\n", "Epoch 73/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 860us/step - accuracy: 0.9897 - loss: 0.0254 - val_accuracy: 0.9703 - val_loss: 0.1363\n", "Epoch 74/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 861us/step - accuracy: 0.9895 - loss: 0.0257 - val_accuracy: 0.9705 - val_loss: 0.1315\n", "Epoch 75/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 869us/step - accuracy: 0.9891 - loss: 0.0254 - val_accuracy: 0.9683 - val_loss: 0.1385\n", "Epoch 76/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 886us/step - accuracy: 0.9899 - loss: 0.0247 - val_accuracy: 0.9681 - val_loss: 0.1324\n", "Epoch 77/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 851us/step - accuracy: 0.9896 - loss: 0.0247 - val_accuracy: 0.9697 - val_loss: 0.1502\n", "Epoch 78/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 860us/step - accuracy: 0.9901 - loss: 0.0252 - val_accuracy: 0.9701 - val_loss: 0.1304\n", "Epoch 79/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 853us/step - accuracy: 0.9896 - loss: 0.0246 - val_accuracy: 0.9688 - val_loss: 0.1537\n", "Epoch 80/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 882us/step - accuracy: 0.9901 - loss: 0.0239 - val_accuracy: 0.9705 - val_loss: 0.1453\n", "Epoch 81/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 851us/step - accuracy: 0.9902 - loss: 0.0236 - val_accuracy: 0.9696 - val_loss: 0.1482\n", "Epoch 82/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 862us/step - accuracy: 0.9901 - loss: 0.0237 - val_accuracy: 0.9672 - val_loss: 0.1538\n", "Epoch 83/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 853us/step - accuracy: 0.9904 - loss: 0.0234 - val_accuracy: 0.9679 - val_loss: 0.1426\n", "Epoch 84/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 858us/step - accuracy: 0.9903 - loss: 0.0234 - val_accuracy: 0.9692 - val_loss: 0.1495\n", "Epoch 85/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 868us/step - accuracy: 0.9905 - loss: 0.0232 - val_accuracy: 0.9690 - val_loss: 0.1374\n", "Epoch 86/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 861us/step - accuracy: 0.9903 - loss: 0.0232 - val_accuracy: 0.9644 - val_loss: 0.1494\n", "Epoch 87/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 860us/step - accuracy: 0.9907 - loss: 0.0232 - val_accuracy: 0.9675 - val_loss: 0.1575\n", "Epoch 88/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 860us/step - accuracy: 0.9908 - loss: 0.0215 - val_accuracy: 0.9685 - val_loss: 0.1655\n", "Epoch 89/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 853us/step - accuracy: 0.9910 - loss: 0.0213 - val_accuracy: 0.9668 - val_loss: 0.1522\n", "Epoch 90/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 890us/step - accuracy: 0.9909 - loss: 0.0213 - val_accuracy: 0.9670 - val_loss: 0.1697\n", "Epoch 91/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 854us/step - accuracy: 0.9910 - loss: 0.0208 - val_accuracy: 0.9679 - val_loss: 0.1665\n", "Epoch 92/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 861us/step - accuracy: 0.9904 - loss: 0.0224 - val_accuracy: 0.9685 - val_loss: 0.1520\n", "Epoch 93/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 878us/step - accuracy: 0.9906 - loss: 0.0217 - val_accuracy: 0.9674 - val_loss: 0.1594\n", "Epoch 94/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 848us/step - accuracy: 0.9917 - loss: 0.0205 - val_accuracy: 0.9655 - val_loss: 0.1640\n", "Epoch 95/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 866us/step - accuracy: 0.9912 - loss: 0.0214 - val_accuracy: 0.9677 - val_loss: 0.1560\n", "Epoch 96/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 851us/step - accuracy: 0.9912 - loss: 0.0213 - val_accuracy: 0.9679 - val_loss: 0.1666\n", "Epoch 97/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 869us/step - accuracy: 0.9917 - loss: 0.0209 - val_accuracy: 0.9675 - val_loss: 0.1539\n", "Epoch 98/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 886us/step - accuracy: 0.9915 - loss: 0.0206 - val_accuracy: 0.9683 - val_loss: 0.1764\n", "Epoch 99/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 899us/step - accuracy: 0.9918 - loss: 0.0199 - val_accuracy: 0.9677 - val_loss: 0.1634\n", "Epoch 100/100\n", "\u001B[1m3067/3067\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m3s\u001B[0m 854us/step - accuracy: 0.9914 - loss: 0.0197 - val_accuracy: 0.9659 - val_loss: 0.1713\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 9 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:05:09.728675Z", "start_time": "2024-05-19T22:05:09.554039Z" } }, "cell_type": "code", "source": [ "loss, accuracy = model.evaluate(X_dev, dev_labels)\n", "print(f\"Accuracy: {accuracy}\")\n" ], "id": "cf8b0f801768560b", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001B[1m171/171\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 660us/step - accuracy: 0.9641 - loss: 0.1876\n", "Accuracy: 0.9658840894699097\n" ] } ], "execution_count": 10 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:05:10.010233Z", "start_time": "2024-05-19T22:05:09.728675Z" } }, "cell_type": "code", "source": [ "dev0_pred = model.predict(X_dev)\n", "dev0_pred = (dev0_pred > 0.5).astype(int)" ], "id": "bf1843efc9fbcc41", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001B[1m171/171\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 794us/step\n" ] } ], "execution_count": 11 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:05:10.018255Z", "start_time": "2024-05-19T22:05:10.010233Z" } }, "cell_type": "code", "source": [ "dev0_pred = pd.DataFrame(dev0_pred)\n", "dev0_pred.to_csv(\"dev-0/out.tsv\", index=False, header=False)" ], "id": "5c5a511249859def", "outputs": [], "execution_count": 12 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:05:10.724357Z", "start_time": "2024-05-19T22:05:10.019265Z" } }, "cell_type": "code", "source": [ "X_testA = preprocess_data('test-A/in.tsv', word2vec_model)\n", "testA_pred = model.predict(X_testA)\n", "testA_pred = (testA_pred > 0.5).astype(int)" ], "id": "3a736eea3080da17", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001B[1m171/171\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 630us/step\n" ] } ], "execution_count": 13 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-19T22:05:10.732380Z", "start_time": "2024-05-19T22:05:10.726373Z" } }, "cell_type": "code", "source": [ "testA_pred = pd.DataFrame(testA_pred)\n", "testA_pred.to_csv(\"test-A/out.tsv\", index=False, header=False)" ], "id": "87af989ea8614b3c", "outputs": [], "execution_count": 14 } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }