Test 3 Outputs

2021-05-22 17:07:56 +02:00 · 2021-05-22 17:07:56 +02:00 · 265216824e
commit 265216824e
parent c68b2d0d1a
3 changed files with 26 additions and 28 deletions
--- a/LogReg_Test.ipynb
+++ b/LogReg_Test.ipynb
@ -82,8 +82,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# word2vec\n",
+    "# word2vec zgodnie z poradą Pana Jakuba\n",
    "# https://radimrehurek.com/gensim/auto_examples/howtos/run_downloader_api.html\n",
+    "# https://www.kaggle.com/kstathou/word-embeddings-logistic-regression\n",
    "w2v = api.load('word2vec-google-news-300')\n",
    "X_train = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_train]\n",
    "X_dev = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_dev]\n",
@ -129,32 +130,26 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 59,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Predykcje...\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
-    "print('Predykcje...')\n",
-    "dev_prediction = []\n",
-    "test_prediction = []\n",
+    "y_dev = []\n",
+    "y_test = []\n",
    "\n",
+    "#model.eval() will notify all your layers that you are in eval mode\n",
    "model.eval()\n",
+    "\n",
+    "#torch.no_grad() impacts the autograd engine and deactivate it. It will reduce memory usage and speed up\n",
    "with torch.no_grad():\n",
    "    for i in range(0, len(X_dev), batch_size):\n",
    "        X = X_dev[i:i+batch_size]\n",
    "        X = torch.tensor(X)\n",
-    "\n",
+    "        \n",
    "        outputs = model(X.float())\n",
-    "\n",
-    "        prediction = (outputs > 0.5)\n",
-    "        dev_prediction = dev_prediction + prediction.tolist()\n",
+    "        \n",
+    "        y = (outputs > 0.5)\n",
+    "        y_dev.extend(y)\n",
    "\n",
    "    for i in range(0, len(X_test), batch_size):\n",
    "        X = X_test[i:i+batch_size]\n",
@ -162,21 +157,24 @@
    "\n",
    "        outputs = model(X.float())\n",
    "\n",
-    "        prediction = (outputs > 0.5)\n",
-    "        test_prediction = test_prediction + prediction.tolist()\n",
-    "\n",
-    "dev_prediction = np.asarray(dev_prediction, dtype=np.int32)\n",
-    "test_prediction = np.asarray(test_prediction, dtype=np.int32)"
+    "        y = (outputs > 0.5)\n",
+    "        y_test.extend(y)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
-    "dev_prediction.tofile('./dev-0/out.tsv', sep='\\n')\n",
-    "test_prediction.tofile('./test-A/out.tsv', sep='\\n')"
+    "y_dev = np.asarray(y_dev, dtype=np.int32)\n",
+    "y_test = np.asarray(y_test, dtype=np.int32)\n",
+    "\n",
+    "y_dev_df = pd.DataFrame({'label':y_dev})\n",
+    "y_test_df = pd.DataFrame({'label':y_test})\n",
+    "\n",
+    "y_dev_df.to_csv(r'dev-0/out.tsv', sep='\\t', index=False,  header=False)\n",
+    "y_test_df.to_csv(r'test-A/out.tsv', sep='\\t', index=False,  header=False)"
   ]
  },
  {