tfidf + lr
This commit is contained in:
parent
29bb18a1f2
commit
c5d9b752dc
63098
dev-0/out.tsv
63098
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
41
solutions/1.py
Normal file
41
solutions/1.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with open('../train/in.tsv') as f:
|
||||||
|
train_in = [x.split('\t')[-1].rstrip() for x in f.readlines()]
|
||||||
|
|
||||||
|
with open('../train/expected.tsv') as f:
|
||||||
|
train_expected = [x.rstrip() for x in f.readlines()]
|
||||||
|
|
||||||
|
classes = sorted(set(train_expected))
|
||||||
|
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
X_train = vectorizer.fit_transform(train_in)
|
||||||
|
y_train = [classes.index(x) for x in train_expected]
|
||||||
|
|
||||||
|
model = LogisticRegression().fit(X_train, y_train)
|
||||||
|
|
||||||
|
print(accuracy_score(y_train, model.predict(X_train)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for DATASET in 'dev-0', 'test-A', 'test-B':
|
||||||
|
with open(f'../{DATASET}/in.tsv') as f:
|
||||||
|
d_in = [x.split('\t')[-1].rstrip() for x in f.readlines()]
|
||||||
|
|
||||||
|
X_d = vectorizer.transform(d_in)
|
||||||
|
out = model.predict(X_d)
|
||||||
|
with open(f'../{DATASET}/out.tsv','w') as f:
|
||||||
|
for sample in out:
|
||||||
|
class_name = classes[sample]
|
||||||
|
f.write(class_name + '\n')
|
||||||
|
|
||||||
|
with open('vectorizer.pickle','wb') as f:
|
||||||
|
pickle.dump(vectorizer, f)
|
||||||
|
|
||||||
|
with open('model.pickle','wb') as f:
|
||||||
|
pickle.dump(model, f)
|
BIN
solutions/model.pickle
Normal file
BIN
solutions/model.pickle
Normal file
Binary file not shown.
BIN
solutions/vectorizer.pickle
Normal file
BIN
solutions/vectorizer.pickle
Normal file
Binary file not shown.
63196
test-A/out.tsv
63196
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
120975
test-B/out.tsv
120975
test-B/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user