Update for the new version

2022-04-28 22:06:42 +02:00
4 changed files with 19033 additions and 19027 deletions
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/gonito.yaml
+++ b/gonito.yaml
@ -0,0 +1,4 @@
 description: tfidf with linear regression
 tags:
  - linear-regression
  - tf-idf
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
--- a/tfidflr/train_predict.py
+++ b/tfidflr/train_predict.py
@ -10,18 +10,20 @@ import random
 import pickle
 import sys
 import lzma
 def tokenizer_space(text):
    return text.split(' ')
-type = sys.argv[1] # 1 or 2 
+type = sys.argv[1] # 1 or 2
 def run():
    # LOADING DATA
-    train_text = [a.rstrip('\n') for a in open('../train/in.tsv','r')]
+    train_text = [a.rstrip('\n') for a in lzma.open('../train/in.tsv.xz', 'rt')]
-    dev_text = [a.rstrip('\n') for a in open('../dev-0/in.tsv','r')]
+    dev_text = [a.rstrip('\n') for a in lzma.open('../dev-0/in.tsv.xz', 'rt')]
-    test_text = [a.rstrip('\n') for a in open('../test-A/in.tsv','r')]
+    test_text = [a.rstrip('\n') for a in lzma.open('../test-A/in.tsv.xz', 'rt')]
    global lowest
    train_year = [float(a.rstrip('\n')) for a in open(f'../train/expected{type}.tsv','r')]