Compare commits
No commits in common. "master" and "master" have entirely different histories.
61
bayes.py
61
bayes.py
@ -1,61 +0,0 @@
|
|||||||
from sklearn.naive_bayes import GaussianNB
|
|
||||||
import pandas as pd
|
|
||||||
from sklearn.naive_bayes import MultinomialNB
|
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
||||||
|
|
||||||
PATHS = ['./train/train.tsv', './dev-0/in.tsv', './test-A/in.tsv']
|
|
||||||
PATHS_OUTPUT = ['./dev-0/out.tsv', './test-A/out.tsv']
|
|
||||||
|
|
||||||
def get_data(path):
|
|
||||||
return pd.read_table(path, error_bad_lines=False, sep='\t', header=None)
|
|
||||||
|
|
||||||
def get_X_y_train(data):
|
|
||||||
X_train = data[1].values
|
|
||||||
y_train = data[0].values
|
|
||||||
|
|
||||||
return X_train, y_train
|
|
||||||
|
|
||||||
def training(x, y):
|
|
||||||
vectorizer = TfidfVectorizer()
|
|
||||||
result = vectorizer.fit_transform(x)
|
|
||||||
classifier = MultinomialNB()
|
|
||||||
classifier.fit(result, y)
|
|
||||||
|
|
||||||
return classifier, vectorizer
|
|
||||||
|
|
||||||
|
|
||||||
def predict(vectorizer, classifier, x):
|
|
||||||
result = vectorizer.transform(x)
|
|
||||||
pred = classifier.predict(result)
|
|
||||||
|
|
||||||
return pred
|
|
||||||
|
|
||||||
def generate_output(pred, path):
|
|
||||||
pred.tofile(path, sep = '\n')
|
|
||||||
|
|
||||||
def main():
|
|
||||||
#prepare train
|
|
||||||
train = get_data(PATHS[0])
|
|
||||||
X_train, y_train = get_X_y_train(train)
|
|
||||||
|
|
||||||
#train
|
|
||||||
classifier, vectorizer = training(X_train, y_train)
|
|
||||||
|
|
||||||
#dev
|
|
||||||
X_dev = get_data(PATHS[1])
|
|
||||||
X_dev = X_dev[0].values
|
|
||||||
pred_dev = predict(vectorizer, classifier, X_dev)
|
|
||||||
|
|
||||||
#test
|
|
||||||
X_test = get_data(PATHS[2])
|
|
||||||
X_test = X_test[0].values
|
|
||||||
pred_test = predict(vectorizer, classifier, X_test)
|
|
||||||
|
|
||||||
#generate output
|
|
||||||
generate_output(pred_dev, PATHS_OUTPUT[0])
|
|
||||||
generate_output(pred_test, PATHS_OUTPUT[1])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
5452
dev-0/out.tsv
5452
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
5445
test-A/out.tsv
5445
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
98132
train/train.tsv
98132
train/train.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user