done
This commit is contained in:
parent
647c099815
commit
d2386fec3f
20000
dev-0/out.tsv
Normal file
20000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
14
dev-0/set5.py
Normal file
14
dev-0/set5.py
Normal file
@ -0,0 +1,14 @@
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import string
|
||||
import csv
|
||||
|
||||
dev = []
|
||||
with open("out.tsv", 'r', encoding="utf-8") as out:
|
||||
dev = [line.strip() for line in out]
|
||||
|
||||
i = -1
|
||||
with open("fix.tsv", 'w', encoding="utf-8") as fix:
|
||||
for score in dev:
|
||||
score = int(float(score))
|
||||
fix.write(str(score) + ".5\n")
|
49
skrypt.py
Normal file
49
skrypt.py
Normal file
@ -0,0 +1,49 @@
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import string
|
||||
import csv
|
||||
|
||||
date = []
|
||||
text = []
|
||||
with open("train/train.tsv", 'r', encoding="utf-8") as train:
|
||||
for line in csv.reader(train, delimiter="\t"):
|
||||
date.append((float(line[0]) + float(line[1]))/2)
|
||||
text.append(line[4])
|
||||
|
||||
lr = LinearRegression()
|
||||
vectorizer = TfidfVectorizer()
|
||||
text = vectorizer.fit_transform(text)
|
||||
print("Fitting lr")
|
||||
lr.fit(text, date)
|
||||
|
||||
textIn = []
|
||||
print("reading in.tsv")
|
||||
with open("dev-0/in.tsv", 'r', encoding="utf-8") as dev0:
|
||||
for line in csv.reader(dev0, delimiter="\t"):
|
||||
textIn.append(line[0])
|
||||
|
||||
textIn = vectorizer.transform(textIn)
|
||||
devOut = lr.predict(textIn)
|
||||
|
||||
print("writing out.tsv")
|
||||
with open("dev-0/out.tsv", 'w', encoding="utf-8") as dev0:
|
||||
for w in devOut:
|
||||
dev0.write(str(w))
|
||||
dev0.write("\n")
|
||||
|
||||
## Test A
|
||||
textIn = []
|
||||
print("reading test in.tsv")
|
||||
with open("test-A/in.tsv", 'r', encoding="utf-8") as test:
|
||||
for line in csv.reader(test, delimiter="\t"):
|
||||
textIn.append(line[0])
|
||||
|
||||
textIn = vectorizer.transform(textIn)
|
||||
testOut = lr.predict(textIn)
|
||||
|
||||
print("writing test out.tsv")
|
||||
with open("test-A/out.tsv", 'w', encoding="utf-8") as test:
|
||||
for w in testOut:
|
||||
test.write(str(w))
|
||||
test.write("\n")
|
||||
|
14219
test-A/out.tsv
Normal file
14219
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
14
test-A/set5.py
Normal file
14
test-A/set5.py
Normal file
@ -0,0 +1,14 @@
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import string
|
||||
import csv
|
||||
|
||||
dev = []
|
||||
with open("out.tsv", 'r', encoding="utf-8") as out:
|
||||
dev = [line.strip() for line in out]
|
||||
|
||||
i = -1
|
||||
with open("fix.tsv", 'w', encoding="utf-8") as fix:
|
||||
for score in dev:
|
||||
score = int(float(score))
|
||||
fix.write(str(score) + ".5\n")
|
107471
train/train.tsv
Normal file
107471
train/train.tsv
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user