One more time
This commit is contained in:
parent
57bb940cb8
commit
8ea54c4e58
19
decode.py
19
decode.py
@ -1,19 +1,12 @@
|
|||||||
import inout as io
|
import inout as io
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
target = [x[0].replace('\n', '') for x in io.read('train/expected.tsv.xz')]
|
|
||||||
categories = {}
|
|
||||||
i = 0
|
|
||||||
for x in target:
|
|
||||||
if x not in categories.values():
|
|
||||||
categories[i] = x
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
files = ['dev-0', 'test-A', 'test-B']
|
files = ['dev-0', 'test-A', 'test-B']
|
||||||
|
|
||||||
|
categories = {0: 'news', 1: 'sport', 2: 'opinion',
|
||||||
|
3: 'business', 4: 'culture', 5: 'lifestyle', 6: 'removed'}
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
for file in files:
|
for file in files:
|
||||||
predicted = io.read('predicted-' + file)
|
predicted = io.read(file + '/out')
|
||||||
predicted = [1 if float(x)-1 <= -0.5 else x for x in predicted]
|
predicted = [categories[round(float(x))] for x in predicted]
|
||||||
predicted = [categories[round(float(x))-1] for x in predicted]
|
|
||||||
io.write(predicted, file + '/out.tsv')
|
io.write(predicted, file + '/out.tsv')
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
149134
dev-0/vw-in
Normal file
149134
dev-0/vw-in
Normal file
File diff suppressed because it is too large
Load Diff
149134
predicted-dev-0
149134
predicted-dev-0
File diff suppressed because it is too large
Load Diff
148308
predicted-test-A
148308
predicted-test-A
File diff suppressed because it is too large
Load Diff
79119
predicted-test-B
79119
predicted-test-B
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
227427
vw-test-A → test-B/vw-in
227427
vw-test-A → test-B/vw-in
File diff suppressed because it is too large
Load Diff
2373796
vw-train → train/vw-in
2373796
vw-train → train/vw-in
File diff suppressed because it is too large
Load Diff
149134
vw-dev0-targets
149134
vw-dev0-targets
File diff suppressed because it is too large
Load Diff
42
wockyWoad.py
Normal file
42
wockyWoad.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import inout as io
|
||||||
|
|
||||||
|
categories = {'news': 0, 'sport': 1, 'opinion': 2,
|
||||||
|
'business': 3, 'culture': 4, 'lifestyle': 5, 'removed': 6}
|
||||||
|
|
||||||
|
|
||||||
|
def trainingData(data, target):
|
||||||
|
data = io.read(data)
|
||||||
|
years = [x[0] for x in data]
|
||||||
|
text = [x[2].replace('\n', '').replace(':', '') for x in data]
|
||||||
|
target = [categories[x[0].replace('\n', '')] for x in io.read(target)]
|
||||||
|
|
||||||
|
data = []
|
||||||
|
for i in range(len(text)):
|
||||||
|
data.append(' |Text ' + text[i] + ' |Year ' + years[i])
|
||||||
|
|
||||||
|
return {'data': data, 'target': target}
|
||||||
|
|
||||||
|
def predictFuture(test):
|
||||||
|
data = io.read(test + '/in.tsv')
|
||||||
|
years = [x[0] for x in data]
|
||||||
|
text = [x[2].replace('\n', '').replace(':', '') for x in data]
|
||||||
|
|
||||||
|
data = []
|
||||||
|
for i in range(len(text)):
|
||||||
|
data.append(' |Text ' + text[i] + ' |Year ' + years[i])
|
||||||
|
|
||||||
|
with open(test + '/vw-in', 'w', encoding='utf-8') as f:
|
||||||
|
for text in data:
|
||||||
|
f.write('1' + text + '\n')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
ireland_news_train = trainingData('train/in.tsv.xz', 'train/expected.tsv.xz')
|
||||||
|
|
||||||
|
with open('train/vw-in', 'w', encoding='utf-8') as f:
|
||||||
|
for target, text in zip(ireland_news_train['target'], ireland_news_train['data']):
|
||||||
|
f.write(str(target + 1) + text + '\n')
|
||||||
|
|
||||||
|
predictFuture('dev-0')
|
||||||
|
predictFuture('test-A')
|
||||||
|
predictFuture('test-B')
|
Loading…
Reference in New Issue
Block a user