Test 3 Outputs

This commit is contained in:
Dominik Strzako 2021-05-22 17:07:56 +02:00
parent c68b2d0d1a
commit 265216824e
3 changed files with 26 additions and 28 deletions

View File

@ -82,8 +82,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# word2vec\n", "# word2vec zgodnie z poradą Pana Jakuba\n",
"# https://radimrehurek.com/gensim/auto_examples/howtos/run_downloader_api.html\n", "# https://radimrehurek.com/gensim/auto_examples/howtos/run_downloader_api.html\n",
"# https://www.kaggle.com/kstathou/word-embeddings-logistic-regression\n",
"w2v = api.load('word2vec-google-news-300')\n", "w2v = api.load('word2vec-google-news-300')\n",
"X_train = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_train]\n", "X_train = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_train]\n",
"X_dev = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_dev]\n", "X_dev = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_dev]\n",
@ -129,32 +130,26 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 47, "execution_count": 59,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predykcje...\n"
]
}
],
"source": [ "source": [
"print('Predykcje...')\n", "y_dev = []\n",
"dev_prediction = []\n", "y_test = []\n",
"test_prediction = []\n",
"\n", "\n",
"#model.eval() will notify all your layers that you are in eval mode\n",
"model.eval()\n", "model.eval()\n",
"\n",
"#torch.no_grad() impacts the autograd engine and deactivate it. It will reduce memory usage and speed up\n",
"with torch.no_grad():\n", "with torch.no_grad():\n",
" for i in range(0, len(X_dev), batch_size):\n", " for i in range(0, len(X_dev), batch_size):\n",
" X = X_dev[i:i+batch_size]\n", " X = X_dev[i:i+batch_size]\n",
" X = torch.tensor(X)\n", " X = torch.tensor(X)\n",
"\n", " \n",
" outputs = model(X.float())\n", " outputs = model(X.float())\n",
"\n", " \n",
" prediction = (outputs > 0.5)\n", " y = (outputs > 0.5)\n",
" dev_prediction = dev_prediction + prediction.tolist()\n", " y_dev.extend(y)\n",
"\n", "\n",
" for i in range(0, len(X_test), batch_size):\n", " for i in range(0, len(X_test), batch_size):\n",
" X = X_test[i:i+batch_size]\n", " X = X_test[i:i+batch_size]\n",
@ -162,21 +157,24 @@
"\n", "\n",
" outputs = model(X.float())\n", " outputs = model(X.float())\n",
"\n", "\n",
" prediction = (outputs > 0.5)\n", " y = (outputs > 0.5)\n",
" test_prediction = test_prediction + prediction.tolist()\n", " y_test.extend(y)"
"\n",
"dev_prediction = np.asarray(dev_prediction, dtype=np.int32)\n",
"test_prediction = np.asarray(test_prediction, dtype=np.int32)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 49, "execution_count": 60,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"dev_prediction.tofile('./dev-0/out.tsv', sep='\\n')\n", "y_dev = np.asarray(y_dev, dtype=np.int32)\n",
"test_prediction.tofile('./test-A/out.tsv', sep='\\n')" "y_test = np.asarray(y_test, dtype=np.int32)\n",
"\n",
"y_dev_df = pd.DataFrame({'label':y_dev})\n",
"y_test_df = pd.DataFrame({'label':y_test})\n",
"\n",
"y_dev_df.to_csv(r'dev-0/out.tsv', sep='\\t', index=False, header=False)\n",
"y_test_df.to_csv(r'test-A/out.tsv', sep='\\t', index=False, header=False)"
] ]
}, },
{ {