Update for the new version

2022-04-28 22:06:42 +02:00 · 2022-04-28 22:06:42 +02:00 · 90adf994db
commit 90adf994db
parent 8acd615fdb
4 changed files with 19033 additions and 19027 deletions
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/gonito.yaml
+++ b/gonito.yaml
@ -0,0 +1,4 @@
+description: tfidf with linear regression
+tags:
+  - linear-regression
+  - tf-idf
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
--- a/tfidflr/train_predict.py
+++ b/tfidflr/train_predict.py
@ -10,18 +10,20 @@ import random
 import pickle
 import sys

+import lzma
+
 def tokenizer_space(text):
    return text.split(' ')

-type = sys.argv[1] # 1 or 2 
+type = sys.argv[1] # 1 or 2


 def run():

    # LOADING DATA
-    train_text = [a.rstrip('\n') for a in open('../train/in.tsv','r')]
-    dev_text = [a.rstrip('\n') for a in open('../dev-0/in.tsv','r')]
-    test_text = [a.rstrip('\n') for a in open('../test-A/in.tsv','r')]
+    train_text = [a.rstrip('\n') for a in lzma.open('../train/in.tsv.xz', 'rt')]
+    dev_text = [a.rstrip('\n') for a in lzma.open('../dev-0/in.tsv.xz', 'rt')]
+    test_text = [a.rstrip('\n') for a in lzma.open('../test-A/in.tsv.xz', 'rt')]
    global lowest

    train_year = [float(a.rstrip('\n')) for a in open(f'../train/expected{type}.tsv','r')]