Compare commits

...

1 Commits

Author SHA1 Message Date
90adf994db Update for the new version 2022-04-28 22:06:42 +02:00
4 changed files with 19033 additions and 19027 deletions

File diff suppressed because it is too large Load Diff

4
gonito.yaml Normal file
View File

@ -0,0 +1,4 @@
description: tfidf with linear regression
tags:
- linear-regression
- tf-idf

File diff suppressed because it is too large Load Diff

View File

@ -10,18 +10,20 @@ import random
import pickle import pickle
import sys import sys
import lzma
def tokenizer_space(text): def tokenizer_space(text):
return text.split(' ') return text.split(' ')
type = sys.argv[1] # 1 or 2 type = sys.argv[1] # 1 or 2
def run(): def run():
# LOADING DATA # LOADING DATA
train_text = [a.rstrip('\n') for a in open('../train/in.tsv','r')] train_text = [a.rstrip('\n') for a in lzma.open('../train/in.tsv.xz', 'rt')]
dev_text = [a.rstrip('\n') for a in open('../dev-0/in.tsv','r')] dev_text = [a.rstrip('\n') for a in lzma.open('../dev-0/in.tsv.xz', 'rt')]
test_text = [a.rstrip('\n') for a in open('../test-A/in.tsv','r')] test_text = [a.rstrip('\n') for a in lzma.open('../test-A/in.tsv.xz', 'rt')]
global lowest global lowest
train_year = [float(a.rstrip('\n')) for a in open(f'../train/expected{type}.tsv','r')] train_year = [float(a.rstrip('\n')) for a in open(f'../train/expected{type}.tsv','r')]