update
This commit is contained in:
parent
565e5dfb90
commit
3683d13395
@ -2,8 +2,8 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "4206eb3f",
|
||||
"execution_count": 11,
|
||||
"id": "5182690b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -14,8 +14,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "fde46276",
|
||||
"execution_count": 12,
|
||||
"id": "6ebd5310",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -29,14 +29,14 @@
|
||||
" with open(path_out, 'w', encoding='utf-8') as file:\n",
|
||||
" for example in data['train_input']:\n",
|
||||
" predicted = model.predict(example)\n",
|
||||
" text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)\n",
|
||||
" text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)\n",
|
||||
" file.write(str(text_predicted) + '\\n')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "27e69709",
|
||||
"execution_count": 13,
|
||||
"id": "e5d2de9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -57,8 +57,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "c406b425",
|
||||
"execution_count": null,
|
||||
"id": "f6ba46b9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -88,7 +88,7 @@
|
||||
" \n",
|
||||
"data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)\n",
|
||||
"\n",
|
||||
"model = vowpalwabbit.Workspace('--oaa 3 --quiet')\n",
|
||||
"model = vowpalwabbit.Workspace('--oaa 7 --quite)\n",
|
||||
"\n",
|
||||
"for example in data['train_input']:\n",
|
||||
" model.learn(example)\n",
|
||||
@ -97,6 +97,25 @@
|
||||
"prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)\n",
|
||||
"prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "0cdf4eaa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[NbConvertApp] Converting notebook run.ipynb to script\n",
|
||||
"[NbConvertApp] Writing 1933 bytes to run.py\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!jupyter nbconvert --to script run.ipynb"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
298268
dev-0/out.tsv
298268
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
39
run.ipynb
39
run.ipynb
@ -2,8 +2,8 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "4206eb3f",
|
||||
"execution_count": 23,
|
||||
"id": "4618be1f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -14,8 +14,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "fde46276",
|
||||
"execution_count": 24,
|
||||
"id": "fc616309",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -29,14 +29,14 @@
|
||||
" with open(path_out, 'w', encoding='utf-8') as file:\n",
|
||||
" for example in data['train_input']:\n",
|
||||
" predicted = model.predict(example)\n",
|
||||
" text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)\n",
|
||||
" text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)\n",
|
||||
" file.write(str(text_predicted) + '\\n')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "27e69709",
|
||||
"execution_count": 25,
|
||||
"id": "9553e9b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -57,8 +57,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "c406b425",
|
||||
"execution_count": 28,
|
||||
"id": "96e97cdf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -88,7 +88,7 @@
|
||||
" \n",
|
||||
"data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)\n",
|
||||
"\n",
|
||||
"model = vowpalwabbit.Workspace('--oaa 3 --quiet')\n",
|
||||
"model = vowpalwabbit.Workspace('--oaa 7')\n",
|
||||
"\n",
|
||||
"for example in data['train_input']:\n",
|
||||
" model.learn(example)\n",
|
||||
@ -97,6 +97,25 @@
|
||||
"prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)\n",
|
||||
"prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "e1fe9b4c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[NbConvertApp] Converting notebook run.ipynb to script\n",
|
||||
"[NbConvertApp] Writing 1933 bytes to run.py\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!jupyter nbconvert --to script run.ipynb"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
18
run.py
18
run.py
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[6]:
|
||||
# In[23]:
|
||||
|
||||
|
||||
import vowpalwabbit
|
||||
@ -9,7 +9,7 @@ import pandas as pd
|
||||
import re
|
||||
|
||||
|
||||
# In[7]:
|
||||
# In[24]:
|
||||
|
||||
|
||||
def prediction(path_in, path_out, model, categories):
|
||||
@ -22,11 +22,11 @@ def prediction(path_in, path_out, model, categories):
|
||||
with open(path_out, 'w', encoding='utf-8') as file:
|
||||
for example in data['train_input']:
|
||||
predicted = model.predict(example)
|
||||
text_predicted = dict((value, key) for key, value in map_dict.items()).get(predicted)
|
||||
text_predicted = dict((value, key) for key, value in categories.items()).get(predicted)
|
||||
file.write(str(text_predicted) + '\n')
|
||||
|
||||
|
||||
# In[8]:
|
||||
# In[25]:
|
||||
|
||||
|
||||
def to_vowpalwabbit(row, categories):
|
||||
@ -44,7 +44,7 @@ def to_vowpalwabbit(row, categories):
|
||||
return vw
|
||||
|
||||
|
||||
# In[9]:
|
||||
# In[28]:
|
||||
|
||||
|
||||
x_train = pd.read_csv('train/in.tsv', header=None, sep='\t')
|
||||
@ -65,7 +65,7 @@ print(categories)
|
||||
|
||||
data['train_input'] = data.apply(lambda row: to_vowpalwabbit(row, categories), axis=1)
|
||||
|
||||
model = vowpalwabbit.Workspace('--oaa 3 --quiet')
|
||||
model = vowpalwabbit.Workspace('--oaa 7')
|
||||
|
||||
for example in data['train_input']:
|
||||
model.learn(example)
|
||||
@ -74,3 +74,9 @@ prediction('dev-0/in.tsv', 'dev-0/out.tsv', model, categories)
|
||||
prediction('test-A/in.tsv', 'test-A/out.tsv', model, categories)
|
||||
prediction('test-B/in.tsv', 'test-B/out.tsv', model, categories)
|
||||
|
||||
|
||||
# In[15]:
|
||||
|
||||
|
||||
get_ipython().system('jupyter nbconvert --to script run.ipynb')
|
||||
|
||||
|
296616
test-A/out.tsv
296616
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
79119
test-B/.ipynb_checkpoints/out-checkpoint.tsv
Normal file
79119
test-B/.ipynb_checkpoints/out-checkpoint.tsv
Normal file
File diff suppressed because it is too large
Load Diff
158238
test-B/out.tsv
158238
test-B/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user