One more time

2022-05-28 21:16:33 +02:00 · 2022-05-28 21:16:33 +02:00 · 8ea54c4e58
commit 8ea54c4e58
parent 57bb940cb8
15 changed files with 1563507 additions and 2536569 deletions
--- a/decode.py
+++ b/decode.py
@ -1,19 +1,12 @@
 import inout as io

+files = ['dev-0', 'test-A', 'test-B']
+
+categories = {0: 'news', 1: 'sport', 2: 'opinion', 
+    3: 'business', 4: 'culture', 5: 'lifestyle', 6: 'removed'}

 if __name__ == '__main__':
-    target = [x[0].replace('\n', '') for x in io.read('train/expected.tsv.xz')]
-    categories = {}
-    i = 0
-    for x in target:
-        if x not in categories.values(): 
-            categories[i] = x
-            i += 1
-
-    files = ['dev-0', 'test-A', 'test-B']
-
    for file in files:
-        predicted = io.read('predicted-' + file)
-        predicted = [1 if float(x)-1 <= -0.5 else x for x in predicted]
-        predicted = [categories[round(float(x))-1] for x in predicted]
+        predicted = io.read(file + '/out')
+        predicted = [categories[round(float(x))] for x in predicted]
        io.write(predicted, file + '/out.tsv')
--- a/dev-0/.ipynb_checkpoints/expected-checkpoint.tsv
+++ b/dev-0/.ipynb_checkpoints/expected-checkpoint.tsv
--- a/dev-0/.ipynb_checkpoints/out-checkpoint.tsv
+++ b/dev-0/.ipynb_checkpoints/out-checkpoint.tsv
--- a/dev-0/vw-in
+++ b/dev-0/vw-in
--- a/149134
+++ b/149134
--- a/148308
+++ b/148308
--- a/79119
+++ b/79119
--- a/dev-0/.ipynb_checkpoints/in-checkpoint.tsv
+++ b/dev-0/.ipynb_checkpoints/in-checkpoint.tsv
--- a/test-B/vw-in
+++ b/test-B/vw-in
--- a/train/vw-in
+++ b/train/vw-in
--- a/149134
+++ b/149134
--- a/149134
+++ b/149134
--- a/79119
+++ b/79119
--- a/vw.model
+++ b/vw.model
--- a/wockyWoad.py
+++ b/wockyWoad.py
@ -0,0 +1,42 @@
+import inout as io
+
+categories = {'news': 0, 'sport': 1, 'opinion': 2, 
+    'business': 3, 'culture': 4, 'lifestyle': 5, 'removed': 6}
+
+
+def trainingData(data, target):
+    data = io.read(data)
+    years = [x[0] for x in data]
+    text = [x[2].replace('\n', '').replace(':', '') for x in data]
+    target = [categories[x[0].replace('\n', '')] for x in io.read(target)]
+
+    data = []
+    for i in range(len(text)):
+        data.append(' |Text ' + text[i] + ' |Year ' + years[i])
+
+    return {'data': data, 'target': target}
+
+def predictFuture(test):
+    data = io.read(test + '/in.tsv')
+    years = [x[0] for x in data]
+    text = [x[2].replace('\n', '').replace(':', '') for x in data]
+
+    data = []
+    for i in range(len(text)):
+        data.append(' |Text ' + text[i] + ' |Year ' + years[i])
+
+    with open(test + '/vw-in', 'w', encoding='utf-8') as f:
+        for text in data:
+            f.write('1' + text + '\n')
+
+
+if __name__ == '__main__':
+    ireland_news_train = trainingData('train/in.tsv.xz', 'train/expected.tsv.xz')
+
+    with open('train/vw-in', 'w', encoding='utf-8') as f:
+        for target, text in zip(ireland_news_train['target'], ireland_news_train['data']):
+            f.write(str(target + 1) + text + '\n')
+    
+    predictFuture('dev-0')
+    predictFuture('test-A')
+    predictFuture('test-B')