Run autopep8
This commit is contained in:
parent
d2d308a78a
commit
0238b238d2
@ -5,12 +5,12 @@ from sklearn.naive_bayes import GaussianNB, MultinomialNB
|
||||
from sklearn.pipeline import make_pipeline, Pipeline
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.corpus import stopwords
|
||||
|
||||
|
||||
# pogarsza wynik z 0.73 na 0.7
|
||||
# pogarsza wynik z 0.73 na 0.7
|
||||
def preprocess(line, stop_words):
|
||||
return " ".join([word for word in line.split() if word not in stop_words])
|
||||
return " ".join([word for word in line.split() if word not in stop_words])
|
||||
|
||||
|
||||
def train_model(train_in, train_expected):
|
||||
@ -27,11 +27,11 @@ def train_model(train_in, train_expected):
|
||||
# model = MultinomialNB()
|
||||
# return model.fit(vectors, exp_encoded)
|
||||
# MemoryError
|
||||
|
||||
|
||||
pipeline = Pipeline(steps=[
|
||||
('tfidf', TfidfVectorizer()),
|
||||
('naive-bayes', MultinomialNB())
|
||||
])
|
||||
('tfidf', TfidfVectorizer()),
|
||||
('naive-bayes', MultinomialNB())
|
||||
])
|
||||
|
||||
return pipeline.fit(train_data, exp_encoded)
|
||||
|
||||
@ -42,11 +42,13 @@ def predict(model, in_file, out_file):
|
||||
prediction = model.predict(lines)
|
||||
np.savetxt(out_file, prediction, fmt='%d')
|
||||
|
||||
|
||||
def main():
|
||||
#stop_words = set(stopwords.words('english'))
|
||||
model = train_model("train/in.tsv", "train/expected.tsv")
|
||||
predict(model, "dev-0/in.tsv", "dev-0/out.tsv")
|
||||
predict(model, "test-A/in.tsv", "test-A/out.tsv")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user