This commit is contained in:
Piotrek96 2021-05-16 22:15:55 +02:00
parent 647c099815
commit d2386fec3f
6 changed files with 141767 additions and 0 deletions

20000
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

14
dev-0/set5.py Normal file
View File

@ -0,0 +1,14 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
import string
import csv
dev = []
with open("out.tsv", 'r', encoding="utf-8") as out:
dev = [line.strip() for line in out]
i = -1
with open("fix.tsv", 'w', encoding="utf-8") as fix:
for score in dev:
score = int(float(score))
fix.write(str(score) + ".5\n")

49
skrypt.py Normal file
View File

@ -0,0 +1,49 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
import string
import csv
date = []
text = []
with open("train/train.tsv", 'r', encoding="utf-8") as train:
for line in csv.reader(train, delimiter="\t"):
date.append((float(line[0]) + float(line[1]))/2)
text.append(line[4])
lr = LinearRegression()
vectorizer = TfidfVectorizer()
text = vectorizer.fit_transform(text)
print("Fitting lr")
lr.fit(text, date)
textIn = []
print("reading in.tsv")
with open("dev-0/in.tsv", 'r', encoding="utf-8") as dev0:
for line in csv.reader(dev0, delimiter="\t"):
textIn.append(line[0])
textIn = vectorizer.transform(textIn)
devOut = lr.predict(textIn)
print("writing out.tsv")
with open("dev-0/out.tsv", 'w', encoding="utf-8") as dev0:
for w in devOut:
dev0.write(str(w))
dev0.write("\n")
## Test A
textIn = []
print("reading test in.tsv")
with open("test-A/in.tsv", 'r', encoding="utf-8") as test:
for line in csv.reader(test, delimiter="\t"):
textIn.append(line[0])
textIn = vectorizer.transform(textIn)
testOut = lr.predict(textIn)
print("writing test out.tsv")
with open("test-A/out.tsv", 'w', encoding="utf-8") as test:
for w in testOut:
test.write(str(w))
test.write("\n")

14219
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

14
test-A/set5.py Normal file
View File

@ -0,0 +1,14 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
import string
import csv
dev = []
with open("out.tsv", 'r', encoding="utf-8") as out:
dev = [line.strip() for line in out]
i = -1
with open("fix.tsv", 'w', encoding="utf-8") as fix:
for score in dev:
score = int(float(score))
fix.write(str(score) + ".5\n")

107471
train/train.tsv Normal file

File diff suppressed because one or more lines are too long