2020-04-27 16:04:14 +02:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import csv
|
|
|
|
import pickle
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
def predict():
|
|
|
|
dev0 = pd.read_csv("dev-0/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)["document"]
|
|
|
|
testA = pd.read_csv("test-A/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)["document"]
|
|
|
|
|
|
|
|
clf = pickle.load(open("clf.model", "rb"))
|
|
|
|
vectorizer = pickle.load(open("vectorizer.model", "rb"))
|
|
|
|
|
|
|
|
dev0_vectorizer = vectorizer.transform(dev0)
|
|
|
|
testA_vectorizer = vectorizer.transform(testA)
|
|
|
|
|
|
|
|
y_dev = clf.predict_proba(dev0_vectorizer)[:, 1]
|
|
|
|
y_test = clf.predict_proba(testA_vectorizer)[:, 1]
|
|
|
|
|
2020-04-27 16:18:38 +02:00
|
|
|
np.savetxt('test-A/out.tsv', y_test, '%f')
|
|
|
|
np.savetxt('dev-0/out.tsv', y_dev, '%f')
|
2020-04-27 16:04:14 +02:00
|
|
|
|
|
|
|
predict()
|