Update for the new version

This commit is contained in:
Filip Gralinski 2022-04-28 22:06:42 +02:00
parent 8acd615fdb
commit 90adf994db
4 changed files with 19033 additions and 19027 deletions

File diff suppressed because it is too large Load Diff

4
gonito.yaml Normal file
View File

@ -0,0 +1,4 @@
description: tfidf with linear regression
tags:
- linear-regression
- tf-idf

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,8 @@ import random
import pickle
import sys
import lzma
def tokenizer_space(text):
return text.split(' ')
@ -19,9 +21,9 @@ type = sys.argv[1] # 1 or 2
def run():
# LOADING DATA
train_text = [a.rstrip('\n') for a in open('../train/in.tsv','r')]
dev_text = [a.rstrip('\n') for a in open('../dev-0/in.tsv','r')]
test_text = [a.rstrip('\n') for a in open('../test-A/in.tsv','r')]
train_text = [a.rstrip('\n') for a in lzma.open('../train/in.tsv.xz', 'rt')]
dev_text = [a.rstrip('\n') for a in lzma.open('../dev-0/in.tsv.xz', 'rt')]
test_text = [a.rstrip('\n') for a in lzma.open('../test-A/in.tsv.xz', 'rt')]
global lowest
train_year = [float(a.rstrip('\n')) for a in open(f'../train/expected{type}.tsv','r')]