Compare commits

...

4 Commits

Author SHA1 Message Date
f0167001f1 add geval 2021-06-20 20:20:16 +02:00
c7006c5d1e remove jupiter 2021-06-20 20:05:38 +02:00
2c5b3c6c96 add bayes 2021-06-20 20:05:17 +02:00
4ea8113b15 add bayes 2021-06-20 20:05:08 +02:00
5 changed files with 109090 additions and 0 deletions

61
bayes.py Normal file
View File

@ -0,0 +1,61 @@
from sklearn.naive_bayes import GaussianNB
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
PATHS = ['./train/train.tsv', './dev-0/in.tsv', './test-A/in.tsv']
PATHS_OUTPUT = ['./dev-0/out.tsv', './test-A/out.tsv']
def get_data(path):
return pd.read_table(path, error_bad_lines=False, sep='\t', header=None)
def get_X_y_train(data):
X_train = data[1].values
y_train = data[0].values
return X_train, y_train
def training(x, y):
vectorizer = TfidfVectorizer()
result = vectorizer.fit_transform(x)
classifier = MultinomialNB()
classifier.fit(result, y)
return classifier, vectorizer
def predict(vectorizer, classifier, x):
result = vectorizer.transform(x)
pred = classifier.predict(result)
return pred
def generate_output(pred, path):
pred.tofile(path, sep = '\n')
def main():
#prepare train
train = get_data(PATHS[0])
X_train, y_train = get_X_y_train(train)
#train
classifier, vectorizer = training(X_train, y_train)
#dev
X_dev = get_data(PATHS[1])
X_dev = X_dev[0].values
pred_dev = predict(vectorizer, classifier, X_dev)
#test
X_test = get_data(PATHS[2])
X_test = X_test[0].values
pred_test = predict(vectorizer, classifier, X_test)
#generate output
generate_output(pred_dev, PATHS_OUTPUT[0])
generate_output(pred_test, PATHS_OUTPUT[1])
if __name__ == '__main__':
main()

5452
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

BIN
geval Executable file

Binary file not shown.

5445
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

98132
train/train.tsv Normal file

File diff suppressed because it is too large Load Diff