Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

5 changed files with 0 additions and 109090 deletions

View File

@ -1,61 +0,0 @@
from sklearn.naive_bayes import GaussianNB
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
PATHS = ['./train/train.tsv', './dev-0/in.tsv', './test-A/in.tsv']
PATHS_OUTPUT = ['./dev-0/out.tsv', './test-A/out.tsv']
def get_data(path):
return pd.read_table(path, error_bad_lines=False, sep='\t', header=None)
def get_X_y_train(data):
X_train = data[1].values
y_train = data[0].values
return X_train, y_train
def training(x, y):
vectorizer = TfidfVectorizer()
result = vectorizer.fit_transform(x)
classifier = MultinomialNB()
classifier.fit(result, y)
return classifier, vectorizer
def predict(vectorizer, classifier, x):
result = vectorizer.transform(x)
pred = classifier.predict(result)
return pred
def generate_output(pred, path):
pred.tofile(path, sep = '\n')
def main():
#prepare train
train = get_data(PATHS[0])
X_train, y_train = get_X_y_train(train)
#train
classifier, vectorizer = training(X_train, y_train)
#dev
X_dev = get_data(PATHS[1])
X_dev = X_dev[0].values
pred_dev = predict(vectorizer, classifier, X_dev)
#test
X_test = get_data(PATHS[2])
X_test = X_test[0].values
pred_test = predict(vectorizer, classifier, X_test)
#generate output
generate_output(pred_dev, PATHS_OUTPUT[0])
generate_output(pred_test, PATHS_OUTPUT[1])
if __name__ == '__main__':
main()

File diff suppressed because it is too large Load Diff

BIN
geval

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff