From 0238b238d274fc888d29a4505ebe4ba80fe40a48 Mon Sep 17 00:00:00 2001 From: Aleksy Wroblewski Date: Thu, 22 Apr 2021 22:03:47 +0200 Subject: [PATCH] Run autopep8 --- naivebayes.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/naivebayes.py b/naivebayes.py index 6947691..4cb12b8 100644 --- a/naivebayes.py +++ b/naivebayes.py @@ -5,12 +5,12 @@ from sklearn.naive_bayes import GaussianNB, MultinomialNB from sklearn.pipeline import make_pipeline, Pipeline from sklearn.feature_extraction.text import TfidfVectorizer -from nltk.corpus import stopwords +from nltk.corpus import stopwords -# pogarsza wynik z 0.73 na 0.7 +# pogarsza wynik z 0.73 na 0.7 def preprocess(line, stop_words): - return " ".join([word for word in line.split() if word not in stop_words]) + return " ".join([word for word in line.split() if word not in stop_words]) def train_model(train_in, train_expected): @@ -27,11 +27,11 @@ def train_model(train_in, train_expected): # model = MultinomialNB() # return model.fit(vectors, exp_encoded) # MemoryError - + pipeline = Pipeline(steps=[ - ('tfidf', TfidfVectorizer()), - ('naive-bayes', MultinomialNB()) - ]) + ('tfidf', TfidfVectorizer()), + ('naive-bayes', MultinomialNB()) + ]) return pipeline.fit(train_data, exp_encoded) @@ -42,11 +42,13 @@ def predict(model, in_file, out_file): prediction = model.predict(lines) np.savetxt(out_file, prediction, fmt='%d') + def main(): #stop_words = set(stopwords.words('english')) model = train_model("train/in.tsv", "train/expected.tsv") predict(model, "dev-0/in.tsv", "dev-0/out.tsv") predict(model, "test-A/in.tsv", "test-A/out.tsv") + if __name__ == '__main__': - main() \ No newline at end of file + main()