from sklearn.naive_bayes import MultinomialNB from sklearn.preprocessing import LabelEncoder from sklearn.pipeline import Pipeline from sklearn.feature_extraction.text import TfidfVectorizer import numpy as np def makePrediction(filePath): with open(filePath + 'in.tsv') as input: predictData = input.readlines() pred = model.predict(predictData) with open(filePath + "out.tsv", "w") as result: for prediction in pred: result.write(str(prediction) + '\n') expectedTraining = open('./train/expected.tsv').readlines() inTraining = open('./train/in.tsv').readlines() afterTransform = LabelEncoder().fit_transform(expectedTraining) pipeline = Pipeline( steps=[('tfidf', TfidfVectorizer()), ('naive-bayes', MultinomialNB())]) model = pipeline.fit(inTraining, afterTransform) makePrediction('./dev-0/') makePrediction('./test-A/')