This commit is contained in:
Sebastian 2022-06-07 17:32:52 +02:00
parent 565e5dfb90
commit 3683d13395
7 changed files with 455750 additions and 376587 deletions

View File

@ -2,8 +2,8 @@
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "4206eb3f",
"execution_count": 11,
"id": "5182690b",
"metadata": {},
"outputs": [],
"source": [
@ -14,8 +14,8 @@
},
{
"cell_type": "code",
"execution_count": 7,
"id": "fde46276",
"execution_count": 12,
"id": "6ebd5310",
"metadata": {},
"outputs": [],
"source": [
@ -29,14 +29,14 @@
" with open(path_out, 'w', encoding='utf-8') as file:\n",
" for example in data['train_input']:\n",
" predicted = model.predict(example)\n",
" text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)\n",
" text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)\n",
" file.write(str(text_predicted) + '\\n')\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "27e69709",
"execution_count": 13,
"id": "e5d2de9f",
"metadata": {},
"outputs": [],
"source": [
@ -57,8 +57,8 @@
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c406b425",
"execution_count": null,
"id": "f6ba46b9",
"metadata": {},
"outputs": [
{
@ -88,7 +88,7 @@
" \n",
"data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)\n",
"\n",
"model = vowpalwabbit.Workspace('--oaa 3 --quiet')\n",
"model = vowpalwabbit.Workspace('--oaa 7 --quite)\n",
"\n",
"for example in data['train_input']:\n",
" model.learn(example)\n",
@ -97,6 +97,25 @@
"prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)\n",
"prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "0cdf4eaa",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook run.ipynb to script\n",
"[NbConvertApp] Writing 1933 bytes to run.py\n"
]
}
],
"source": [
"!jupyter nbconvert --to script run.ipynb"
]
}
],
"metadata": {

298268
dev-0/out.tsv

File diff suppressed because it is too large Load Diff

View File

@ -2,8 +2,8 @@
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "4206eb3f",
"execution_count": 23,
"id": "4618be1f",
"metadata": {},
"outputs": [],
"source": [
@ -14,8 +14,8 @@
},
{
"cell_type": "code",
"execution_count": 7,
"id": "fde46276",
"execution_count": 24,
"id": "fc616309",
"metadata": {},
"outputs": [],
"source": [
@ -29,14 +29,14 @@
" with open(path_out, 'w', encoding='utf-8') as file:\n",
" for example in data['train_input']:\n",
" predicted = model.predict(example)\n",
" text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)\n",
" text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)\n",
" file.write(str(text_predicted) + '\\n')\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "27e69709",
"execution_count": 25,
"id": "9553e9b0",
"metadata": {},
"outputs": [],
"source": [
@ -57,8 +57,8 @@
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c406b425",
"execution_count": 28,
"id": "96e97cdf",
"metadata": {},
"outputs": [
{
@ -88,7 +88,7 @@
" \n",
"data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)\n",
"\n",
"model = vowpalwabbit.Workspace('--oaa 3 --quiet')\n",
"model = vowpalwabbit.Workspace('--oaa 7')\n",
"\n",
"for example in data['train_input']:\n",
" model.learn(example)\n",
@ -97,6 +97,25 @@
"prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)\n",
"prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "e1fe9b4c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook run.ipynb to script\n",
"[NbConvertApp] Writing 1933 bytes to run.py\n"
]
}
],
"source": [
"!jupyter nbconvert --to script run.ipynb"
]
}
],
"metadata": {

18
run.py
View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
# coding: utf-8
# In[6]:
# In[23]:
import vowpalwabbit
@ -9,7 +9,7 @@ import pandas as pd
import re
# In[7]:
# In[24]:
def prediction(path_in, path_out, model, categories):
@ -22,11 +22,11 @@ def prediction(path_in, path_out, model, categories):
with open(path_out, 'w', encoding='utf-8') as file:
for example in data['train_input']:
predicted = model.predict(example)
text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)
text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)
file.write(str(text_predicted) + '\n')
# In[8]:
# In[25]:
def to_vowpalwabbit(row, categories):
@ -44,7 +44,7 @@ def to_vowpalwabbit(row, categories):
return vw
# In[9]:
# In[28]:
x_train = pd.read_csv('train/in.tsv', header=None, sep='\t')
@ -65,7 +65,7 @@ print(categories)
data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)
model = vowpalwabbit.Workspace('--oaa 3 --quiet')
model = vowpalwabbit.Workspace('--oaa 7')
for example in data['train_input']:
model.learn(example)
@ -74,3 +74,9 @@ prediction('dev-0/in.tsv', 'dev-0/out.tsv', model, categories)
prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)
prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)
# In[15]:
get_ipython().system('jupyter nbconvert --to script run.ipynb')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff