from naivebayes import NaiveBayesTextClassifier from spacy.lang.en.stop_words import STOP_WORDS as en_stop naive_bayes = NaiveBayesTextClassifier( categories=[0, 1], stop_words=en_stop ) with open('train/in.tsv', 'r', encoding='utf8') as f: train = f.readlines() with open('train/expected.tsv', 'r', encoding='utf8') as f: expected = f.readlines() for i in range(0, len(expected)): expected[i] = int(expected[i]) step = 20000 start, end = 0, step for i in range(0, len(expected), step): naive_bayes.train(train[start:end], expected[start:end]) if start + step < len(expected): start += step else: start = 0 end = min(start + step, len(expected)) with open('dev-0/in.tsv', 'r', encoding='utf8') as f: dev_0 = f.readlines() predicted_dev_0 = naive_bayes.classify(dev_0) with open('dev-0/out.tsv', 'wt') as f: for p in predicted_dev_0: f.write(str(p) + '\n') f.close() with open('test-A/in.tsv', 'r', encoding='utf8') as f: test_A = f.readlines() predicted_test_A = naive_bayes.classify(test_A) with open('test-A/out.tsv', 'wt') as f: for p in predicted_test_A: f.write(str(p) + '\n') f.close()