solution
This commit is contained in:
parent
97421a97ee
commit
06ae71b777
149134
dev-0/out.tsv
Normal file
149134
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
54
run.py
Normal file
54
run.py
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
import vowpalwabbit
|
||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
file_to_predict = sys.argv[1]
|
||||||
|
|
||||||
|
|
||||||
|
def to_vw_format(row):
|
||||||
|
mapping = {'business': 1, 'culture': 2, 'lifestyle': 3, 'news': 4, 'opinion': 5, 'removed': 6, 'sport': 7}
|
||||||
|
try:
|
||||||
|
res = f'{mapping[row.category]} |'
|
||||||
|
rows = row.drop(['category']).iteritems()
|
||||||
|
except:
|
||||||
|
res = '0 |'
|
||||||
|
rows = row.iteritems()
|
||||||
|
for idx, value in rows:
|
||||||
|
if idx == 'text':
|
||||||
|
value = value.lower()
|
||||||
|
value = re.sub("[^a-zA-Z '-]", '', value)
|
||||||
|
value = re.sub("( ')|(' )|('$)|(^')", ' ', value)
|
||||||
|
value = value.strip()
|
||||||
|
value = re.sub(' +', ' ', value)
|
||||||
|
res += f" {idx}:{value}"
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
in_df = pd.read_csv('/home/ked/Desktop/aitech/eks/ireland-news-headlines/train/in.tsv', sep='\t', header=None)
|
||||||
|
exp_df = pd.read_csv('/home/ked/Desktop/aitech/eks/ireland-news-headlines/train/expected.tsv', sep='\t', header=None)
|
||||||
|
pred_df = pd.read_csv(file_to_predict, sep='\t', header=None)
|
||||||
|
|
||||||
|
in_df[3] = exp_df[0]
|
||||||
|
in_df = in_df.drop(1, 1)
|
||||||
|
in_df.columns = ['year', 'text', 'category']
|
||||||
|
|
||||||
|
pred_df[3] = exp_df[0] # Leniwe rozwiązanie, żeby funkcja się nie sypała
|
||||||
|
pred_df = pred_df.drop(1, 1)
|
||||||
|
pred_df.columns = ['year', 'text', 'category']
|
||||||
|
|
||||||
|
vw = vowpalwabbit.Workspace('--oaa 7')
|
||||||
|
|
||||||
|
for example in in_df.apply(to_vw_format, axis=1):
|
||||||
|
vw.learn(example)
|
||||||
|
|
||||||
|
predictions = []
|
||||||
|
for example in pred_df.apply(to_vw_format, axis=1):
|
||||||
|
predicted_class = vw.predict(example)
|
||||||
|
predictions.append(predicted_class)
|
||||||
|
|
||||||
|
mapping = {1: 'business', 2: 'culture', 3: 'lifestyle', 4: 'news', 5: 'opinion', 6: 'removed', 7: 'sport'}
|
||||||
|
|
||||||
|
with open('out.tsv', 'w', encoding='utf-8') as f:
|
||||||
|
for prediction in predictions:
|
||||||
|
f.write(mapping[prediction] + '\n')
|
148308
test-A/out.tsv
Normal file
148308
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
79119
test-B/out.tsv
Normal file
79119
test-B/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user