diff --git a/LogReg_Test.ipynb b/LogReg_Test.ipynb index 0541e4b..0e3f071 100644 --- a/LogReg_Test.ipynb +++ b/LogReg_Test.ipynb @@ -82,8 +82,9 @@ "metadata": {}, "outputs": [], "source": [ - "# word2vec\n", + "# word2vec zgodnie z poradÄ… Pana Jakuba\n", "# https://radimrehurek.com/gensim/auto_examples/howtos/run_downloader_api.html\n", + "# https://www.kaggle.com/kstathou/word-embeddings-logistic-regression\n", "w2v = api.load('word2vec-google-news-300')\n", "X_train = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_train]\n", "X_dev = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_dev]\n", @@ -129,32 +130,26 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 59, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Predykcje...\n" - ] - } - ], + "outputs": [], "source": [ - "print('Predykcje...')\n", - "dev_prediction = []\n", - "test_prediction = []\n", + "y_dev = []\n", + "y_test = []\n", "\n", + "#model.eval() will notify all your layers that you are in eval mode\n", "model.eval()\n", + "\n", + "#torch.no_grad() impacts the autograd engine and deactivate it. It will reduce memory usage and speed up\n", "with torch.no_grad():\n", " for i in range(0, len(X_dev), batch_size):\n", " X = X_dev[i:i+batch_size]\n", " X = torch.tensor(X)\n", - "\n", + " \n", " outputs = model(X.float())\n", - "\n", - " prediction = (outputs > 0.5)\n", - " dev_prediction = dev_prediction + prediction.tolist()\n", + " \n", + " y = (outputs > 0.5)\n", + " y_dev.extend(y)\n", "\n", " for i in range(0, len(X_test), batch_size):\n", " X = X_test[i:i+batch_size]\n", @@ -162,21 +157,24 @@ "\n", " outputs = model(X.float())\n", "\n", - " prediction = (outputs > 0.5)\n", - " test_prediction = test_prediction + prediction.tolist()\n", - "\n", - "dev_prediction = np.asarray(dev_prediction, dtype=np.int32)\n", - "test_prediction = np.asarray(test_prediction, dtype=np.int32)" + " y = (outputs > 0.5)\n", + " y_test.extend(y)" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ - "dev_prediction.tofile('./dev-0/out.tsv', sep='\\n')\n", - "test_prediction.tofile('./test-A/out.tsv', sep='\\n')" + "y_dev = np.asarray(y_dev, dtype=np.int32)\n", + "y_test = np.asarray(y_test, dtype=np.int32)\n", + "\n", + "y_dev_df = pd.DataFrame({'label':y_dev})\n", + "y_test_df = pd.DataFrame({'label':y_test})\n", + "\n", + "y_dev_df.to_csv(r'dev-0/out.tsv', sep='\\t', index=False, header=False)\n", + "y_test_df.to_csv(r'test-A/out.tsv', sep='\\t', index=False, header=False)" ] }, { diff --git a/dev-0/out.tsv b/dev-0/out.tsv index 328b345..575b176 100644 --- a/dev-0/out.tsv +++ b/dev-0/out.tsv @@ -5269,4 +5269,4 @@ 0 0 0 -0 \ No newline at end of file +0 diff --git a/test-A/out.tsv b/test-A/out.tsv index 8a1999e..2662cf4 100644 --- a/test-A/out.tsv +++ b/test-A/out.tsv @@ -5149,4 +5149,4 @@ 0 0 0 -0 \ No newline at end of file +0