Run autopep8

2021-04-22 22:03:47 +02:00 · 2021-04-22 22:03:47 +02:00 · 0238b238d2
commit 0238b238d2
parent d2d308a78a
1 changed files with 10 additions and 8 deletions
--- a/naivebayes.py
+++ b/naivebayes.py
@ -5,12 +5,12 @@ from sklearn.naive_bayes import GaussianNB, MultinomialNB
 from sklearn.pipeline import make_pipeline, Pipeline
 from sklearn.feature_extraction.text import TfidfVectorizer

-from nltk.corpus import stopwords 
+from nltk.corpus import stopwords


-# pogarsza wynik z 0.73 na 0.7 
+# pogarsza wynik z 0.73 na 0.7
 def preprocess(line, stop_words):
-  return " ".join([word for word in line.split() if word not in stop_words])
+    return " ".join([word for word in line.split() if word not in stop_words])


 def train_model(train_in, train_expected):
@ -27,11 +27,11 @@ def train_model(train_in, train_expected):
    # model = MultinomialNB()
    # return model.fit(vectors, exp_encoded)
    # MemoryError
-    
+
    pipeline = Pipeline(steps=[
-    ('tfidf', TfidfVectorizer()),
-    ('naive-bayes', MultinomialNB())
-    ]) 
+        ('tfidf', TfidfVectorizer()),
+        ('naive-bayes', MultinomialNB())
+    ])

    return pipeline.fit(train_data, exp_encoded)

@ -42,11 +42,13 @@ def predict(model, in_file, out_file):
    prediction = model.predict(lines)
    np.savetxt(out_file, prediction, fmt='%d')

+
 def main():
    #stop_words = set(stopwords.words('english'))
    model = train_model("train/in.tsv", "train/expected.tsv")
    predict(model, "dev-0/in.tsv", "dev-0/out.tsv")
    predict(model, "test-A/in.tsv", "test-A/out.tsv")

+
 if __name__ == '__main__':
-    main()
+    main()