update

2022-06-07 17:32:52 +02:00 · 2022-06-07 17:32:52 +02:00 · 3683d13395
commit 3683d13395
parent 565e5dfb90
7 changed files with 455750 additions and 376587 deletions
--- a/.ipynb_checkpoints/run-checkpoint.ipynb
+++ b/.ipynb_checkpoints/run-checkpoint.ipynb
@ -2,8 +2,8 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 6,
-   "id": "4206eb3f",
+   "execution_count": 11,
+   "id": "5182690b",
   "metadata": {},
   "outputs": [],
   "source": [
@ -14,8 +14,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
-   "id": "fde46276",
+   "execution_count": 12,
+   "id": "6ebd5310",
   "metadata": {},
   "outputs": [],
   "source": [
@ -29,14 +29,14 @@
    "    with open(path_out, 'w', encoding='utf-8') as file:\n",
    "        for example in data['train_input']:\n",
    "            predicted = model.predict(example)\n",
-    "            text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)\n",
+    "            text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)\n",
    "            file.write(str(text_predicted) + '\\n')\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
-   "id": "27e69709",
+   "execution_count": 13,
+   "id": "e5d2de9f",
   "metadata": {},
   "outputs": [],
   "source": [
@ -57,8 +57,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
-   "id": "c406b425",
+   "execution_count": null,
+   "id": "f6ba46b9",
   "metadata": {},
   "outputs": [
    {
@ -88,7 +88,7 @@
    "    \n",
    "data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)\n",
    "\n",
-    "model = vowpalwabbit.Workspace('--oaa 3 --quiet')\n",
+    "model = vowpalwabbit.Workspace('--oaa 7 --quite)\n",
    "\n",
    "for example in data['train_input']:\n",
    "    model.learn(example)\n",
@ -97,6 +97,25 @@
    "prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)\n",
    "prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "0cdf4eaa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[NbConvertApp] Converting notebook run.ipynb to script\n",
+      "[NbConvertApp] Writing 1933 bytes to run.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "!jupyter nbconvert --to script run.ipynb"
+   ]
  }
 ],
 "metadata": {
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/run.ipynb
+++ b/run.ipynb
@ -2,8 +2,8 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 6,
-   "id": "4206eb3f",
+   "execution_count": 23,
+   "id": "4618be1f",
   "metadata": {},
   "outputs": [],
   "source": [
@ -14,8 +14,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
-   "id": "fde46276",
+   "execution_count": 24,
+   "id": "fc616309",
   "metadata": {},
   "outputs": [],
   "source": [
@ -29,14 +29,14 @@
    "    with open(path_out, 'w', encoding='utf-8') as file:\n",
    "        for example in data['train_input']:\n",
    "            predicted = model.predict(example)\n",
-    "            text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)\n",
+    "            text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)\n",
    "            file.write(str(text_predicted) + '\\n')\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
-   "id": "27e69709",
+   "execution_count": 25,
+   "id": "9553e9b0",
   "metadata": {},
   "outputs": [],
   "source": [
@ -57,8 +57,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
-   "id": "c406b425",
+   "execution_count": 28,
+   "id": "96e97cdf",
   "metadata": {},
   "outputs": [
    {
@ -88,7 +88,7 @@
    "    \n",
    "data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)\n",
    "\n",
-    "model = vowpalwabbit.Workspace('--oaa 3 --quiet')\n",
+    "model = vowpalwabbit.Workspace('--oaa 7')\n",
    "\n",
    "for example in data['train_input']:\n",
    "    model.learn(example)\n",
@ -97,6 +97,25 @@
    "prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)\n",
    "prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "e1fe9b4c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[NbConvertApp] Converting notebook run.ipynb to script\n",
+      "[NbConvertApp] Writing 1933 bytes to run.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "!jupyter nbconvert --to script run.ipynb"
+   ]
  }
 ],
 "metadata": {
--- a/run.py
+++ b/run.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # coding: utf-8

-# In[6]:
+# In[23]:


 import vowpalwabbit
@ -9,7 +9,7 @@ import pandas as pd
 import re


-# In[7]:
+# In[24]:


 def prediction(path_in, path_out, model, categories):
@ -22,11 +22,11 @@ def prediction(path_in, path_out, model, categories):
    with open(path_out, 'w', encoding='utf-8') as file:
        for example in data['train_input']:
            predicted = model.predict(example)
-            text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)
+            text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)
            file.write(str(text_predicted) + '\n')


-# In[8]:
+# In[25]:


 def to_vowpalwabbit(row, categories):
@ -44,7 +44,7 @@ def to_vowpalwabbit(row, categories):
    return vw


-# In[9]:
+# In[28]:


 x_train = pd.read_csv('train/in.tsv', header=None, sep='\t')
@ -65,7 +65,7 @@ print(categories)
    
 data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)

-model = vowpalwabbit.Workspace('--oaa 3 --quiet')
+model = vowpalwabbit.Workspace('--oaa 7')

 for example in data['train_input']:
    model.learn(example)
@ -74,3 +74,9 @@ prediction('dev-0/in.tsv', 'dev-0/out.tsv', model, categories)
 prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)
 prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)

+
+# In[15]:
+
+
+get_ipython().system('jupyter nbconvert --to script run.ipynb')
+
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
--- a/test-B/.ipynb_checkpoints/out-checkpoint.tsv
+++ b/test-B/.ipynb_checkpoints/out-checkpoint.tsv
--- a/test-B/out.tsv
+++ b/test-B/out.tsv