Test 3 Outputs

This commit is contained in:
Dominik Strzako 2021-05-22 17:07:56 +02:00
parent c68b2d0d1a
commit 265216824e
3 changed files with 26 additions and 28 deletions

View File

@ -82,8 +82,9 @@
"metadata": {},
"outputs": [],
"source": [
"# word2vec\n",
"# word2vec zgodnie z poradą Pana Jakuba\n",
"# https://radimrehurek.com/gensim/auto_examples/howtos/run_downloader_api.html\n",
"# https://www.kaggle.com/kstathou/word-embeddings-logistic-regression\n",
"w2v = api.load('word2vec-google-news-300')\n",
"X_train = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_train]\n",
"X_dev = [np.mean([w2v[w] for w in content if w in w2v] or [np.zeros(300)], axis=0) for content in X_dev]\n",
@ -129,32 +130,26 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predykcje...\n"
]
}
],
"outputs": [],
"source": [
"print('Predykcje...')\n",
"dev_prediction = []\n",
"test_prediction = []\n",
"y_dev = []\n",
"y_test = []\n",
"\n",
"#model.eval() will notify all your layers that you are in eval mode\n",
"model.eval()\n",
"\n",
"#torch.no_grad() impacts the autograd engine and deactivate it. It will reduce memory usage and speed up\n",
"with torch.no_grad():\n",
" for i in range(0, len(X_dev), batch_size):\n",
" X = X_dev[i:i+batch_size]\n",
" X = torch.tensor(X)\n",
"\n",
" \n",
" outputs = model(X.float())\n",
"\n",
" prediction = (outputs > 0.5)\n",
" dev_prediction = dev_prediction + prediction.tolist()\n",
" \n",
" y = (outputs > 0.5)\n",
" y_dev.extend(y)\n",
"\n",
" for i in range(0, len(X_test), batch_size):\n",
" X = X_test[i:i+batch_size]\n",
@ -162,21 +157,24 @@
"\n",
" outputs = model(X.float())\n",
"\n",
" prediction = (outputs > 0.5)\n",
" test_prediction = test_prediction + prediction.tolist()\n",
"\n",
"dev_prediction = np.asarray(dev_prediction, dtype=np.int32)\n",
"test_prediction = np.asarray(test_prediction, dtype=np.int32)"
" y = (outputs > 0.5)\n",
" y_test.extend(y)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"dev_prediction.tofile('./dev-0/out.tsv', sep='\\n')\n",
"test_prediction.tofile('./test-A/out.tsv', sep='\\n')"
"y_dev = np.asarray(y_dev, dtype=np.int32)\n",
"y_test = np.asarray(y_test, dtype=np.int32)\n",
"\n",
"y_dev_df = pd.DataFrame({'label':y_dev})\n",
"y_test_df = pd.DataFrame({'label':y_test})\n",
"\n",
"y_dev_df.to_csv(r'dev-0/out.tsv', sep='\\t', index=False, header=False)\n",
"y_test_df.to_csv(r'test-A/out.tsv', sep='\\t', index=False, header=False)"
]
},
{