sport-text-classification/tsv2vw.py

33 lines
783 B
Python
Executable File

#!/usr/bin/python3
import sys
out_dict = {'pilka-nozna':1, 'siatkowka':2, 'sporty-walki':3, 'pilka-reczna':4, 'koszykowka':5, 'tenis':6, 'moto':7, 'zimowe':8}
counter = 1
def process_item(out,inp):
if out is None:
out = ''
else:
global counter
if out not in out_dict:
out = 8
else:
out = out_dict[out]
if out is None:
out = ''
if out == '0':
out = "-1"
if out == 9:
out = 8
inp = inp.replace(':',' ')
return str(out) + ' | ' + inp
for line in sys.stdin:
line = line.rstrip()
fields = line.split('\t')
if len(fields) == 2:
print(process_item(fields[0], fields[1]))
else:
print(process_item(None, fields[0]))