nn

2022-06-14 23:36:56 +02:00 · 2022-06-14 23:36:56 +02:00 · 8a69cabc52
commit 8a69cabc52
parent 756ef4277a
13 changed files with 888077 additions and 0 deletions
--- a/.ipynb_checkpoints/run-checkpoint.ipynb
+++ b/.ipynb_checkpoints/run-checkpoint.ipynb
@ -0,0 +1,262 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "74100403-147c-42cd-8285-e30778c0fb66",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import gensim\n",
+    "import torch\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.metrics import accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "bf211ece-e27a-4119-a1b9-9a9a610cfb46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_year(x, path_out, model):\n",
+    "    results = model.predict(x)\n",
+    "    with open(path_out, 'wt') as file:\n",
+    "        for r in results:\n",
+    "            file.write(str(r) + '\\n') "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1ec57d97-a852-490e-8da4-d1e4c9676cd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_file(filename):\n",
+    "    result = []\n",
+    "    with open(filename, 'r', encoding=\"utf-8\") as file:\n",
+    "        for line in file:\n",
+    "            text = line.split(\"\\t\")[0].strip()\n",
+    "            result.append(text)\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "86fbfb79-76e7-49f5-b722-2827f93cb03f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('train/in.tsv', 'r', encoding='utf8') as file:\n",
+    "    train = pd.read_csv(file, sep='\\t', header=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8960c975-f756-4e36-a1ce-e9fd5fdf8fe3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('train/expected.tsv', 'r', encoding='utf8') as file:\n",
+    "    train_y = pd.read_csv(file, sep='\\t', header=None)\n",
+    "train_y = train_y[0:10000]\n",
+    "train_y = train_y[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "07ae7b22-e95d-4614-9757-15660a9834b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train = train[0:10000]\n",
+    "train_x = train[0]\n",
+    "train_x = [gensim.utils.simple_preprocess(x) for x in train_x]\n",
+    "#train_x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "fde71cd8-f682-4793-bce9-0f9a9d8c176c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gensim.test.utils import common_texts\n",
+    "from gensim.models import Word2Vec\n",
+    "\n",
+    "model = Word2Vec(sentences=train_x, vector_size=100, window=5, min_count=1, workers=4)\n",
+    "#data, min_count = 1, vector_size = 100, window = 5, sg = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9a4c8066-f985-478e-8944-dd45b73d9795",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\korne\\AppData\\Local\\Temp\\ipykernel_3520\\3800840358.py:2: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  train_x_vec = np.array([np.array([model.wv[i] for i in x if i in words]) for x in train_x])\n"
+     ]
+    }
+   ],
+   "source": [
+    "words = set(model.wv.index_to_key)\n",
+    "train_x_vec = np.array([np.array([model.wv[i] for i in x if i in words]) for x in train_x])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b52269f9-f143-483d-9669-ce8f5972d6bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "FEATURES = 100\n",
+    "\n",
+    "class NeuralNetworkModel(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(NeuralNetworkModel, self).__init__()\n",
+    "        self.fc1 = torch.nn.Linear(FEATURES,500)\n",
+    "        self.fc2 = torch.nn.Linear(500,1)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.fc1(x)\n",
+    "        x = torch.relu(x)\n",
+    "        x = self.fc2(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x\n",
+    "\n",
+    "nn_model = NeuralNetworkModel()\n",
+    "BATCH_SIZE = 40\n",
+    "criterion = torch.nn.BCELoss()\n",
+    "optimizer = torch.optim.SGD(nn_model.parameters(), lr = 0.1)\n",
+    "\n",
+    "def get_loss_acc(model, data_x, data_y):\n",
+    "    loss_score = 0\n",
+    "    acc_score = 0\n",
+    "    items_total = 0\n",
+    "    model.eval()\n",
+    "    for i in range(0, data_y.shape[0], BATCH_SIZE):\n",
+    "        X = data_x[i:i+BATCH_SIZE]\n",
+    "        X = torch.tensor(X.astype(np.float32))\n",
+    "        Y = data_y[i:i+BATCH_SIZE]\n",
+    "        Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)\n",
+    "        Y_predictions = model(X)\n",
+    "        acc_score += torch.sum((Y_predictions > 0.5) == Y).item()\n",
+    "        items_total += Y.shape[0]\n",
+    "\n",
+    "        loss = criterion(Y_predictions, Y)\n",
+    "\n",
+    "        loss_score += loss.item() * Y.shape[0]\n",
+    "    return (loss_score / items_total), (acc_score / items_total)\n",
+    "\n",
+    "\n",
+    "for epoch in range(5):\n",
+    "    loss_score = 0\n",
+    "    acc_score = 0\n",
+    "    items_total = 0\n",
+    "    nn_model.train()\n",
+    "    for i in range(0, train_y.shape[0] - 42, BATCH_SIZE):\n",
+    "        X = train_x_vec[i:i+BATCH_SIZE]\n",
+    "        X = torch.tensor(X.astype(np.float32))\n",
+    "        Y = train_y[i:i+BATCH_SIZE]\n",
+    "        Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)\n",
+    "        Y_predictions = nn_model(X)\n",
+    "        acc_score += torch.sum((Y_predictions > 0.5) == Y).item()\n",
+    "        items_total += Y.shape[0]\n",
+    "\n",
+    "        optimizer.zero_grad()\n",
+    "        loss = criterion(Y_predictions, Y)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "\n",
+    "        loss_score += loss.item() * Y.shape[0]\n",
+    "\n",
+    "    display(epoch)\n",
+    "    display(get_loss_acc(model, train_x_vect, train_y))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1482f342-f2ea-4c9d-b221-5ef451e3a6b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print('trenowanie modelu')\n",
+    "model = NeuralNetworkModel()\n",
+    "BATCH_SIZE = 5\n",
+    "criterion = torch.nn.BCELoss()\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n",
+    "\n",
+    "for epoch in range(BATCH_SIZE):\n",
+    "    model.train()\n",
+    "    for i in range(0, y_train.shape[0], BATCH_SIZE):\n",
+    "        X = x_train[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        y = y_train[i:i + BATCH_SIZE]\n",
+    "        y = torch.tensor(y.astype(np.float32).to_numpy()).reshape(-1, 1)\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(X.float())\n",
+    "        loss = criterion(outputs, y)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "#print('predykcja wynikow')\n",
+    "y_dev = []\n",
+    "y_test = []\n",
+    "model.eval()\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    for i in range(0, len(x_dev), BATCH_SIZE):\n",
+    "        X = x_dev[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        outputs = model(X.float())\n",
+    "        prediction = (outputs > 0.5)\n",
+    "        y_dev += prediction.tolist()\n",
+    "\n",
+    "    for i in range(0, len(x_test), BATCH_SIZE):\n",
+    "        X = x_test[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        outputs = model(X.float())\n",
+    "        y = (outputs >= 0.5)\n",
+    "        y_test += prediction.tolist()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/.ipynb_checkpoints/sceptic-checkpoint.ipynb
+++ b/.ipynb_checkpoints/sceptic-checkpoint.ipynb
@ -0,0 +1,223 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "equal-singles",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/lib/python3/dist-packages/sklearn/utils/validation.py:37: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n",
+      "  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'\n",
+      "/usr/lib/python3/dist-packages/sklearn/feature_extraction/image.py:167: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  dtype=np.int):\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/least_angle.py:35: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  eps=np.finfo(np.float).eps,\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/least_angle.py:597: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/least_angle.py:836: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/least_angle.py:862: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  eps=np.finfo(np.float).eps, positive=False):\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/least_angle.py:1097: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  max_n_alphas=1000, n_jobs=None, eps=np.finfo(np.float).eps,\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/least_angle.py:1344: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  max_n_alphas=1000, n_jobs=None, eps=np.finfo(np.float).eps,\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/least_angle.py:1480: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  eps=np.finfo(np.float).eps, copy_X=True, positive=False):\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/randomized_l1.py:152: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  precompute=False, eps=np.finfo(np.float).eps,\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/randomized_l1.py:320: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  eps=np.finfo(np.float).eps, random_state=None,\n",
+      "/usr/lib/python3/dist-packages/sklearn/linear_model/randomized_l1.py:580: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  eps=4 * np.finfo(np.float).eps, n_jobs=None,\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import torch\n",
+    "import csv\n",
+    "import lzma\n",
+    "import gensim.downloader\n",
+    "from nltk import word_tokenize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "involved-understanding",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_train = pd.read_table('in.tsv', sep='\\t', header=None, quoting=3)\n",
+    "y_train = pd.read_table('expected.tsv', sep='\\t', header=None, quoting=3)\n",
+    "#x_dev = pd.read_table('dev-0/in.tsv.xz', compression='xz', sep='\\t', header=None, quoting=3)\n",
+    "#x_test = pd.read_table('test-A/in.tsv.xz', compression='xz', sep='\\t', header=None, quoting=3)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "collaborative-cincinnati",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "module 'torch' has no attribute 'nn'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-5-11c9482004ae>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#print('inicjalizacja modelu')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mclass\u001b[0m \u001b[0mNeuralNetworkModel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m         \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mNeuralNetworkModel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ml01\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m300\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m300\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAttributeError\u001b[0m: module 'torch' has no attribute 'nn'"
+     ]
+    }
+   ],
+   "source": [
+    "#print('inicjalizacja modelu')\n",
+    "class NeuralNetworkModel(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(NeuralNetworkModel, self).__init__()\n",
+    "        self.l01 = torch.nn.Linear(300, 300)\n",
+    "        self.l02 = torch.nn.Linear(300, 1)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.l01(x)\n",
+    "        x = torch.relu(x)\n",
+    "        x = self.l02(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "hydraulic-business",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print('przygotowanie danych')\n",
+    "\n",
+    "x_train = x_train[0].str.lower()\n",
+    "y_train = y_train[0]\n",
+    "x_dev = x_dev[0].str.lower()\n",
+    "x_test = x_test[0].str.lower()\n",
+    "\n",
+    "x_train = [word_tokenize(x) for x in x_train]\n",
+    "x_dev = [word_tokenize(x) for x in x_dev]\n",
+    "x_test = [word_tokenize(x) for x in x_test]\n",
+    "\n",
+    "word2vec = gensim.downloader.load('word2vec-google-news-300')\n",
+    "x_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_train]\n",
+    "x_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_dev]\n",
+    "x_test = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_test]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "heavy-sandwich",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print('trenowanie modelu')\n",
+    "model = NeuralNetworkModel()\n",
+    "BATCH_SIZE = 5\n",
+    "criterion = torch.nn.BCELoss()\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n",
+    "\n",
+    "for epoch in range(BATCH_SIZE):\n",
+    "    model.train()\n",
+    "    for i in range(0, y_train.shape[0], BATCH_SIZE):\n",
+    "        X = x_train[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        y = y_train[i:i + BATCH_SIZE]\n",
+    "        y = torch.tensor(y.astype(np.float32).to_numpy()).reshape(-1, 1)\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(X.float())\n",
+    "        loss = criterion(outputs, y)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "small-pavilion",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print('predykcja wynikow')\n",
+    "y_dev = []\n",
+    "y_test = []\n",
+    "model.eval()\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    for i in range(0, len(x_dev), BATCH_SIZE):\n",
+    "        X = x_dev[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        outputs = model(X.float())\n",
+    "        prediction = (outputs > 0.5)\n",
+    "        y_dev += prediction.tolist()\n",
+    "\n",
+    "    for i in range(0, len(x_test), BATCH_SIZE):\n",
+    "        X = x_test[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        outputs = model(X.float())\n",
+    "        y = (outputs >= 0.5)\n",
+    "        y_test += prediction.tolist()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "toxic-pendant",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print('eksportowanie do plików')\n",
+    "y_dev = np.asarray(y_dev, dtype=np.int32)\n",
+    "y_test = np.asarray(y_test, dtype=np.int32)\n",
+    "y_dev.tofile('./dev-0/out.tsv', sep='\\n')\n",
+    "y_test.tofile('./test-A/out.tsv', sep='\\n')\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/dev-0/in.tsv
+++ b/dev-0/in.tsv
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/run.ipynb
+++ b/run.ipynb
@ -0,0 +1,726 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "74100403-147c-42cd-8285-e30778c0fb66",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import torch\n",
+    "import csv\n",
+    "import lzma\n",
+    "import gensim.downloader\n",
+    "from nltk import word_tokenize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cbe60d7b-850e-4838-b4ce-672f13bf2bb2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "bf211ece-e27a-4119-a1b9-9a9a610cfb46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_year(x, path_out, model):\n",
+    "    results = model.predict(x)\n",
+    "    with open(path_out, 'wt') as file:\n",
+    "        for r in results:\n",
+    "            file.write(str(r) + '\\n') "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1ec57d97-a852-490e-8da4-d1e4c9676cd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_file(filename):\n",
+    "    result = []\n",
+    "    with open(filename, 'r', encoding=\"utf-8\") as file:\n",
+    "        for line in file:\n",
+    "            text = line.split(\"\\t\")[0].strip()\n",
+    "            result.append(text)\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "86fbfb79-76e7-49f5-b722-2827f93cb03f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>have you had an medical issues recently?</td>\n",
+       "      <td>1335187994</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>It's supposedly aluminum, barium, and strontiu...</td>\n",
+       "      <td>1346187161</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Nobel prizes don't make you rich.</td>\n",
+       "      <td>1337160218</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>I came for the article, I stayed for the doctor.</td>\n",
+       "      <td>1277674344</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>you resorted to insults AND got owned directly...</td>\n",
+       "      <td>1348538535</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199995</th>\n",
+       "      <td>It's really sad. My sister used to believe tha...</td>\n",
+       "      <td>1334111989</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199996</th>\n",
+       "      <td>I don't mean it in a dickish way, I'm being se...</td>\n",
+       "      <td>1322700456</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199997</th>\n",
+       "      <td>Fair enough, I stand corrected.</td>\n",
+       "      <td>1354646212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199998</th>\n",
+       "      <td>Right. Scientists tend to think and conclude l...</td>\n",
+       "      <td>1348777201</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199999</th>\n",
+       "      <td>Because they are illiterate</td>\n",
+       "      <td>1249579722</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>200000 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                        0           1\n",
+       "0                have you had an medical issues recently?  1335187994\n",
+       "1       It's supposedly aluminum, barium, and strontiu...  1346187161\n",
+       "2                       Nobel prizes don't make you rich.  1337160218\n",
+       "3        I came for the article, I stayed for the doctor.  1277674344\n",
+       "4       you resorted to insults AND got owned directly...  1348538535\n",
+       "...                                                   ...         ...\n",
+       "199995  It's really sad. My sister used to believe tha...  1334111989\n",
+       "199996  I don't mean it in a dickish way, I'm being se...  1322700456\n",
+       "199997                    Fair enough, I stand corrected.  1354646212\n",
+       "199998  Right. Scientists tend to think and conclude l...  1348777201\n",
+       "199999                        Because they are illiterate  1249579722\n",
+       "\n",
+       "[200000 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x_train = pd.read_table('train/in.tsv', sep='\\t', header=None, quoting=3)\n",
+    "x_train = x_train[0:200000]\n",
+    "x_train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8960c975-f756-4e36-a1ce-e9fd5fdf8fe3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199995</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199996</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199997</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199998</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>199999</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>200000 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        0\n",
+       "0       1\n",
+       "1       0\n",
+       "2       0\n",
+       "3       0\n",
+       "4       0\n",
+       "...    ..\n",
+       "199995  0\n",
+       "199996  0\n",
+       "199997  1\n",
+       "199998  1\n",
+       "199999  0\n",
+       "\n",
+       "[200000 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with open('train/expected.tsv', 'r', encoding='utf8') as file:\n",
+    "    y_train = pd.read_csv(file, sep='\\t', header=None)\n",
+    "y_train = y_train[0:200000]\n",
+    "y_train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "6b27e6ce-e9fd-41a1-aacf-53a5fde0a7c1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>In which case, tell them I'm in work, or dead,...</td>\n",
+       "      <td>1328302967</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Put me down as another for Mysterious Universe...</td>\n",
+       "      <td>1347836881</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>The military of any country would never admit ...</td>\n",
+       "      <td>1331905826</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>An example would have been more productive tha...</td>\n",
+       "      <td>1315584834</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>sorry, but the authors of this article admit t...</td>\n",
+       "      <td>1347389166</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5267</th>\n",
+       "      <td>Your fault for going at all. That's how we get...</td>\n",
+       "      <td>1308176634</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5268</th>\n",
+       "      <td>EVP....that's a shot in the GH drinking game.</td>\n",
+       "      <td>1354408646</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5269</th>\n",
+       "      <td>i think a good hard massage is good for you. t...</td>\n",
+       "      <td>1305726318</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5270</th>\n",
+       "      <td>Interesting theory. Makes my imagination run w...</td>\n",
+       "      <td>1339839088</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5271</th>\n",
+       "      <td>Tampering of candy? More like cooking somethin...</td>\n",
+       "      <td>1320262659</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5272 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                      0           1\n",
+       "0     In which case, tell them I'm in work, or dead,...  1328302967\n",
+       "1     Put me down as another for Mysterious Universe...  1347836881\n",
+       "2     The military of any country would never admit ...  1331905826\n",
+       "3     An example would have been more productive tha...  1315584834\n",
+       "4     sorry, but the authors of this article admit t...  1347389166\n",
+       "...                                                 ...         ...\n",
+       "5267  Your fault for going at all. That's how we get...  1308176634\n",
+       "5268      EVP....that's a shot in the GH drinking game.  1354408646\n",
+       "5269  i think a good hard massage is good for you. t...  1305726318\n",
+       "5270  Interesting theory. Makes my imagination run w...  1339839088\n",
+       "5271  Tampering of candy? More like cooking somethin...  1320262659\n",
+       "\n",
+       "[5272 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with open('dev-0/in.tsv', 'r', encoding='utf8') as file:\n",
+    "    x_dev = pd.read_csv(file, sep='\\t', header=None)\n",
+    "x_dev"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "99ae526d-9b7c-493f-be4f-f95b1c8f4b81",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Gentleman, I believe we can agree that this is...</td>\n",
+       "      <td>1304170330</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>The problem is that it will just turn it r/nos...</td>\n",
+       "      <td>1353763204</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Well, according to some Christian apologists, ...</td>\n",
+       "      <td>1336314173</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Don't know if this is what you are looking for...</td>\n",
+       "      <td>1348860314</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>I respect what you're saying completely. I jus...</td>\n",
+       "      <td>1341285952</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5147</th>\n",
+       "      <td>GAMBIT</td>\n",
+       "      <td>1326441107</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5148</th>\n",
+       "      <td>&amp;gt;Joe Rogan is no snake oil salesman.\\n\\nHe ...</td>\n",
+       "      <td>1319464245</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5149</th>\n",
+       "      <td>Reading further, Sagan does seem to agree with...</td>\n",
+       "      <td>1322126150</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5150</th>\n",
+       "      <td>Notice that they never invoke god, or any othe...</td>\n",
+       "      <td>1307679295</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5151</th>\n",
+       "      <td>They might co-ordinate an anniversary attack o...</td>\n",
+       "      <td>1342409261</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5152 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                      0           1\n",
+       "0     Gentleman, I believe we can agree that this is...  1304170330\n",
+       "1     The problem is that it will just turn it r/nos...  1353763204\n",
+       "2     Well, according to some Christian apologists, ...  1336314173\n",
+       "3     Don't know if this is what you are looking for...  1348860314\n",
+       "4     I respect what you're saying completely. I jus...  1341285952\n",
+       "...                                                 ...         ...\n",
+       "5147                                             GAMBIT  1326441107\n",
+       "5148  &gt;Joe Rogan is no snake oil salesman.\\n\\nHe ...  1319464245\n",
+       "5149  Reading further, Sagan does seem to agree with...  1322126150\n",
+       "5150  Notice that they never invoke god, or any othe...  1307679295\n",
+       "5151  They might co-ordinate an anniversary attack o...  1342409261\n",
+       "\n",
+       "[5152 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n",
+    "    x_test = pd.read_csv(file, sep='\\t', header=None)\n",
+    "x_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "dba17668-971f-47f8-99ce-fc840b5cb74a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class NeuralNetworkModel(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(NeuralNetworkModel, self).__init__()\n",
+    "        self.l01 = torch.nn.Linear(300, 300)\n",
+    "        self.l02 = torch.nn.Linear(300, 1)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.l01(x)\n",
+    "        x = torch.relu(x)\n",
+    "        x = self.l02(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "1a275c1d-75bc-4290-9332-56396d16a0f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_train = x_train[0].str.lower()\n",
+    "y_train = y_train[0]\n",
+    "x_dev = x_dev[0].str.lower()\n",
+    "x_test = x_test[0].str.lower()\n",
+    "\n",
+    "x_train = [word_tokenize(x) for x in x_train]\n",
+    "x_dev = [word_tokenize(x) for x in x_dev]\n",
+    "x_test = [word_tokenize(x) for x in x_test]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "031a3670-3be7-4146-97b4-0dacd4f9ae58",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gensim.test.utils import common_texts\n",
+    "from gensim.models import Word2Vec\n",
+    "\n",
+    "word2vec = gensim.downloader.load('word2vec-google-news-300')\n",
+    "x_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_train]\n",
+    "x_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_dev]\n",
+    "x_test = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_test]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "b7defd18-e281-4cf6-9941-cee560749677",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\korne\\AppData\\Local\\Temp\\ipykernel_22024\\3484013121.py:10: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\torch\\csrc\\utils\\tensor_new.cpp:210.)\n",
+      "  X = torch.tensor(X)\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = NeuralNetworkModel()\n",
+    "BATCH_SIZE = 5\n",
+    "criterion = torch.nn.BCELoss()\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n",
+    "\n",
+    "for epoch in range(BATCH_SIZE):\n",
+    "    model.train()\n",
+    "    for i in range(0, y_train.shape[0], BATCH_SIZE):\n",
+    "        X = x_train[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        y = y_train[i:i + BATCH_SIZE]\n",
+    "        y = torch.tensor(y.astype(np.float32).to_numpy()).reshape(-1, 1)\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(X.float())\n",
+    "        loss = criterion(outputs, y)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "92c69ddd-fe58-477f-b2c2-06324a983bcc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_dev = []\n",
+    "y_test = []\n",
+    "model.eval()\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    for i in range(0, len(x_dev), BATCH_SIZE):\n",
+    "        X = x_dev[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        outputs = model(X.float())\n",
+    "        prediction = (outputs > 0.5)\n",
+    "        y_dev += prediction.tolist()\n",
+    "\n",
+    "    for i in range(0, len(x_test), BATCH_SIZE):\n",
+    "        X = x_test[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        outputs = model(X.float())\n",
+    "        y = (outputs >= 0.5)\n",
+    "        y_test += prediction.tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "caff921c-d0ab-4fce-a17f-6610266b404d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_dev = np.asarray(y_dev, dtype=np.int32)\n",
+    "y_test = np.asarray(y_test, dtype=np.int32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "73076eb2-810f-4f85-aa3f-05ee884c413b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('./dev-0/out.tsv', 'wt') as file:\n",
+    "    for r in y_dev:\n",
+    "        file.write(str(r) + '\\n')   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "ddda251c-cafa-40f8-a020-48310a9f23b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('./test-A/out.tsv', 'wt') as file:\n",
+    "    for r in y_test:\n",
+    "        file.write(str(r) + '\\n')   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "5730562a-0200-4c8f-8f73-992fa2b36133",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[NbConvertApp] Converting notebook run.ipynb to script\n",
+      "[NbConvertApp] Writing 3816 bytes to run.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "!jupyter nbconvert --to script run.ipynb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07a09298-204c-4905-90a8-5dcb87877368",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/run.py
+++ b/run.py
@ -0,0 +1,191 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+import numpy as np
+import pandas as pd
+import torch
+import csv
+import lzma
+import gensim.downloader
+from nltk import word_tokenize
+
+
+# In[ ]:
+
+
+
+
+
+# In[2]:
+
+
+def predict_year(x, path_out, model):
+    results = model.predict(x)
+    with open(path_out, 'wt') as file:
+        for r in results:
+            file.write(str(r) + '\n') 
+
+
+# In[3]:
+
+
+def read_file(filename):
+    result = []
+    with open(filename, 'r', encoding="utf-8") as file:
+        for line in file:
+            text = line.split("\t")[0].strip()
+            result.append(text)
+    return result
+
+
+# In[4]:
+
+
+x_train = pd.read_table('train/in.tsv', sep='\t', header=None, quoting=3)
+x_train = x_train[0:200000]
+x_train
+
+
+# In[5]:
+
+
+with open('train/expected.tsv', 'r', encoding='utf8') as file:
+    y_train = pd.read_csv(file, sep='\t', header=None)
+y_train = y_train[0:200000]
+y_train
+
+
+# In[6]:
+
+
+with open('dev-0/in.tsv', 'r', encoding='utf8') as file:
+    x_dev = pd.read_csv(file, sep='\t', header=None)
+x_dev
+
+
+# In[7]:
+
+
+with open('test-A/in.tsv', 'r', encoding='utf8') as file:
+    x_test = pd.read_csv(file, sep='\t', header=None)
+x_test
+
+
+# In[8]:
+
+
+class NeuralNetworkModel(torch.nn.Module):
+    def __init__(self):
+        super(NeuralNetworkModel, self).__init__()
+        self.l01 = torch.nn.Linear(300, 300)
+        self.l02 = torch.nn.Linear(300, 1)
+
+    def forward(self, x):
+        x = self.l01(x)
+        x = torch.relu(x)
+        x = self.l02(x)
+        x = torch.sigmoid(x)
+        return x
+
+
+# In[9]:
+
+
+x_train = x_train[0].str.lower()
+y_train = y_train[0]
+x_dev = x_dev[0].str.lower()
+x_test = x_test[0].str.lower()
+
+x_train = [word_tokenize(x) for x in x_train]
+x_dev = [word_tokenize(x) for x in x_dev]
+x_test = [word_tokenize(x) for x in x_test]
+
+
+# In[11]:
+
+
+from gensim.test.utils import common_texts
+from gensim.models import Word2Vec
+
+word2vec = gensim.downloader.load('word2vec-google-news-300')
+x_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_train]
+x_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_dev]
+x_test = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_test]
+
+
+# In[ ]:
+
+
+model = NeuralNetworkModel()
+BATCH_SIZE = 5
+criterion = torch.nn.BCELoss()
+optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+for epoch in range(BATCH_SIZE):
+    model.train()
+    for i in range(0, y_train.shape[0], BATCH_SIZE):
+        X = x_train[i:i + BATCH_SIZE]
+        X = torch.tensor(X)
+        y = y_train[i:i + BATCH_SIZE]
+        y = torch.tensor(y.astype(np.float32).to_numpy()).reshape(-1, 1)
+        optimizer.zero_grad()
+        outputs = model(X.float())
+        loss = criterion(outputs, y)
+        loss.backward()
+        optimizer.step()
+
+
+# In[ ]:
+
+
+y_dev = []
+y_test = []
+model.eval()
+
+with torch.no_grad():
+    for i in range(0, len(x_dev), BATCH_SIZE):
+        X = x_dev[i:i + BATCH_SIZE]
+        X = torch.tensor(X)
+        outputs = model(X.float())
+        prediction = (outputs > 0.5)
+        y_dev += prediction.tolist()
+
+    for i in range(0, len(x_test), BATCH_SIZE):
+        X = x_test[i:i + BATCH_SIZE]
+        X = torch.tensor(X)
+        outputs = model(X.float())
+        y = (outputs >= 0.5)
+        y_test += prediction.tolist()
+
+
+# In[ ]:
+
+
+y_dev = np.asarray(y_dev, dtype=np.int32)
+y_test = np.asarray(y_test, dtype=np.int32)
+
+
+# In[ ]:
+
+
+with open('./dev-0/out.tsv', 'wt') as file:
+    for r in y_dev:
+        file.write(str(r) + '\n')   
+
+
+# In[ ]:
+
+
+with open('./test-A/out.tsv', 'wt') as file:
+    for r in y_test:
+        file.write(str(r) + '\n')   
+
+
+# In[ ]:
+
+
+get_ipython().system('jupyter nbconvert --to script run.ipynb')
+
--- a/sceptic.ipynb
+++ b/sceptic.ipynb
@ -0,0 +1,180 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "equal-singles",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import torch\n",
+    "import csv\n",
+    "import lzma\n",
+    "import gensim.downloader\n",
+    "from nltk import word_tokenize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "involved-understanding",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_train = pd.read_table('in.tsv', sep='\\t', header=None, quoting=3)\n",
+    "y_train = pd.read_table('expected.tsv', sep='\\t', header=None, quoting=3)\n",
+    "#x_dev = pd.read_table('dev-0/in.tsv.xz', compression='xz', sep='\\t', header=None, quoting=3)\n",
+    "#x_test = pd.read_table('test-A/in.tsv.xz', compression='xz', sep='\\t', header=None, quoting=3)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "collaborative-cincinnati",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "module 'torch' has no attribute 'nn'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-5-11c9482004ae>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#print('inicjalizacja modelu')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mclass\u001b[0m \u001b[0mNeuralNetworkModel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m         \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mNeuralNetworkModel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ml01\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m300\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m300\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAttributeError\u001b[0m: module 'torch' has no attribute 'nn'"
+     ]
+    }
+   ],
+   "source": [
+    "#print('inicjalizacja modelu')\n",
+    "class NeuralNetworkModel(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(NeuralNetworkModel, self).__init__()\n",
+    "        self.l01 = torch.nn.Linear(300, 300)\n",
+    "        self.l02 = torch.nn.Linear(300, 1)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.l01(x)\n",
+    "        x = torch.relu(x)\n",
+    "        x = self.l02(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "hydraulic-business",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print('przygotowanie danych')\n",
+    "\n",
+    "x_train = x_train.str.lower()\n",
+    "x_dev = x_dev[0].str.lower()\n",
+    "x_test = x_test[0].str.lower()\n",
+    "\n",
+    "x_train = [word_tokenize(x) for x in x_train]\n",
+    "x_dev = [word_tokenize(x) for x in x_dev]\n",
+    "x_test = [word_tokenize(x) for x in x_test]\n",
+    "\n",
+    "word2vec = gensim.downloader.load('word2vec-google-news-300')\n",
+    "x_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_train]\n",
+    "x_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_dev]\n",
+    "x_test = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in x_test]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "heavy-sandwich",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print('trenowanie modelu')\n",
+    "model = NeuralNetworkModel()\n",
+    "BATCH_SIZE = 5\n",
+    "criterion = torch.nn.BCELoss()\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n",
+    "\n",
+    "for epoch in range(BATCH_SIZE):\n",
+    "    model.train()\n",
+    "    for i in range(0, y_train.shape[0], BATCH_SIZE):\n",
+    "        X = x_train[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        y = y_train[i:i + BATCH_SIZE]\n",
+    "        y = torch.tensor(y.astype(np.float32).to_numpy()).reshape(-1, 1)\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(X.float())\n",
+    "        loss = criterion(outputs, y)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "small-pavilion",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print('predykcja wynikow')\n",
+    "y_dev = []\n",
+    "y_test = []\n",
+    "model.eval()\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    for i in range(0, len(x_dev), BATCH_SIZE):\n",
+    "        X = x_dev[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        outputs = model(X.float())\n",
+    "        prediction = (outputs > 0.5)\n",
+    "        y_dev += prediction.tolist()\n",
+    "\n",
+    "    for i in range(0, len(x_test), BATCH_SIZE):\n",
+    "        X = x_test[i:i + BATCH_SIZE]\n",
+    "        X = torch.tensor(X)\n",
+    "        outputs = model(X.float())\n",
+    "        y = (outputs >= 0.5)\n",
+    "        y_test += prediction.tolist()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "toxic-pendant",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print('eksportowanie do plików')\n",
+    "y_dev = np.asarray(y_dev, dtype=np.int32)\n",
+    "y_test = np.asarray(y_test, dtype=np.int32)\n",
+    "y_dev.tofile('./dev-0/out.tsv', sep='\\n')\n",
+    "y_test.tofile('./test-A/out.tsv', sep='\\n')\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/test-A/in.tsv
+++ b/test-A/in.tsv
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
--- a/train/.ipynb_checkpoints/expected-checkpoint.tsv
+++ b/train/.ipynb_checkpoints/expected-checkpoint.tsv
--- a/train/.ipynb_checkpoints/in-checkpoint.tsv
+++ b/train/.ipynb_checkpoints/in-checkpoint.tsv
--- a/train/in.tsv
+++ b/train/in.tsv
--- a/word2vec.model
+++ b/word2vec.model