One more time

This commit is contained in:
Iwona Christop 2022-05-28 21:16:33 +02:00
parent 57bb940cb8
commit 8ea54c4e58
15 changed files with 1563507 additions and 2536569 deletions

View File

@ -1,19 +1,12 @@
import inout as io
files = ['dev-0', 'test-A', 'test-B']
categories = {0: 'news', 1: 'sport', 2: 'opinion',
3: 'business', 4: 'culture', 5: 'lifestyle', 6: 'removed'}
if __name__ == '__main__':
target = [x[0].replace('\n', '') for x in io.read('train/expected.tsv.xz')]
categories = {}
i = 0
for x in target:
if x not in categories.values():
categories[i] = x
i += 1
files = ['dev-0', 'test-A', 'test-B']
for file in files:
predicted = io.read('predicted-' + file)
predicted = [1 if float(x)-1 <= -0.5 else x for x in predicted]
predicted = [categories[round(float(x))-1] for x in predicted]
predicted = io.read(file + '/out')
predicted = [categories[round(float(x))] for x in predicted]
io.write(predicted, file + '/out.tsv')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

149134
dev-0/vw-in Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

149134
vw-dev0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

79119
vw-test-B

File diff suppressed because it is too large Load Diff

BIN
vw.model

Binary file not shown.

42
wockyWoad.py Normal file
View File

@ -0,0 +1,42 @@
import inout as io
categories = {'news': 0, 'sport': 1, 'opinion': 2,
'business': 3, 'culture': 4, 'lifestyle': 5, 'removed': 6}
def trainingData(data, target):
data = io.read(data)
years = [x[0] for x in data]
text = [x[2].replace('\n', '').replace(':', '') for x in data]
target = [categories[x[0].replace('\n', '')] for x in io.read(target)]
data = []
for i in range(len(text)):
data.append(' |Text ' + text[i] + ' |Year ' + years[i])
return {'data': data, 'target': target}
def predictFuture(test):
data = io.read(test + '/in.tsv')
years = [x[0] for x in data]
text = [x[2].replace('\n', '').replace(':', '') for x in data]
data = []
for i in range(len(text)):
data.append(' |Text ' + text[i] + ' |Year ' + years[i])
with open(test + '/vw-in', 'w', encoding='utf-8') as f:
for text in data:
f.write('1' + text + '\n')
if __name__ == '__main__':
ireland_news_train = trainingData('train/in.tsv.xz', 'train/expected.tsv.xz')
with open('train/vw-in', 'w', encoding='utf-8') as f:
for target, text in zip(ireland_news_train['target'], ireland_news_train['data']):
f.write(str(target + 1) + text + '\n')
predictFuture('dev-0')
predictFuture('test-A')
predictFuture('test-B')