linear regresion solution
This commit is contained in:
parent
b775a221e6
commit
64b2bf963d
137314
dev-0/out.tsv
Normal file
137314
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
156606
dev-1/out.tsv
Normal file
156606
dev-1/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
69
run.py
Normal file
69
run.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import lzma
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.metrics import recall_score
|
||||||
|
from sklearn.metrics import precision_score
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
from sklearn.metrics import f1_score
|
||||||
|
|
||||||
|
X_train = []
|
||||||
|
Y_train = []
|
||||||
|
|
||||||
|
stop = 0
|
||||||
|
|
||||||
|
with lzma.open('train/in.tsv.xz', 'rt', encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
if(stop > 5000):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
text = line.strip()
|
||||||
|
X_train.append(text)
|
||||||
|
#stop = stop + 1
|
||||||
|
|
||||||
|
stop = 0
|
||||||
|
|
||||||
|
with open('train/expected.tsv', 'rt') as f2:
|
||||||
|
for line in f2:
|
||||||
|
if(stop > 5000):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
text = line.strip()
|
||||||
|
Y_train.append(int(text))
|
||||||
|
#stop = stop + 1
|
||||||
|
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
document_vectors = vectorizer.fit_transform(X_train)
|
||||||
|
|
||||||
|
model = LogisticRegression()
|
||||||
|
model.fit(document_vectors, Y_train)
|
||||||
|
|
||||||
|
|
||||||
|
def readFile(filename):
|
||||||
|
X_dev = []
|
||||||
|
with open(filename, 'r', encoding="utf-8") as dev_in:
|
||||||
|
for line in dev_in:
|
||||||
|
text = line.split("\t")[0].strip()
|
||||||
|
X_dev.append(text)
|
||||||
|
return X_dev
|
||||||
|
|
||||||
|
def writePred(filename, predictions):
|
||||||
|
with open(filename, "w") as out_file:
|
||||||
|
for pred in predictions:
|
||||||
|
out_file.write(str(pred) + "\n")
|
||||||
|
|
||||||
|
X_dev = readFile('dev-0/in.tsv')
|
||||||
|
X_dev = vectorizer.transform(X_dev)
|
||||||
|
predictions = model.predict(X_dev)
|
||||||
|
writePred('dev-0/out.tsv',predictions)
|
||||||
|
|
||||||
|
X_dev = readFile('dev-1/in.tsv')
|
||||||
|
X_dev = vectorizer.transform(X_dev)
|
||||||
|
predictions = model.predict(X_dev)
|
||||||
|
writePred('dev-1/out.tsv',predictions)
|
||||||
|
|
||||||
|
X_dev = readFile('test-A/in.tsv')
|
||||||
|
X_dev = vectorizer.transform(X_dev)
|
||||||
|
predictions = model.predict(X_dev)
|
||||||
|
writePred('test-A/out.tsv',predictions)
|
||||||
|
|
||||||
|
|
134618
test-A/out.tsv
Normal file
134618
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user