diff --git a/predict_rm.py b/predict_rm.py index d080a11..95762ba 100644 --- a/predict_rm.py +++ b/predict_rm.py @@ -1,30 +1,24 @@ -#!/usr/bin/python3 - -import pandas as pd -import csv -import pickle - -def predict(): - dev0 = pd.read_csv("dev-0/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE) - testA = pd.read_csv("test-A/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE) - devdoc = dev["document"] - testdoc = testA["document"] - - clf = pickle.load(open("clf.model", "rb")) - vectorizer = pickle.load(open("vectorizer.model", "rb")) - - dev0_vectorizer = vectorizer.transform(devdoc) - testA_vectorizer = vectorizer.transform(testdoc) - - y_dev = clf.predict_proba(dev0_vectorizer) - y_test = clf.predict_proba(testA_vectorizer) - - with open("dev-0/out.tsv", "w") as devout: - for line in y_dev: - devout.write(str(line[1])+"\n") - - with open("test-A/out.tsv", "w") as testaout: - for line in y_test: - testaout.write(str(line[1])+"\n") - -predict() +#!/usr/bin/python3 + +import pandas as pd +import csv +import pickle +import numpy as np + +def predict(): + dev0 = pd.read_csv("dev-0/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)["document"] + testA = pd.read_csv("test-A/in.tsv", delimiter="\t", header=None, names=["document","date"], quoting=csv.QUOTE_NONE)["document"] + + clf = pickle.load(open("clf.model", "rb")) + vectorizer = pickle.load(open("vectorizer.model", "rb")) + + dev0_vectorizer = vectorizer.transform(dev0) + testA_vectorizer = vectorizer.transform(testA) + + y_dev = clf.predict_proba(dev0_vectorizer)[:, 1] + y_test = clf.predict_proba(testA_vectorizer)[:, 1] + + np.savetxt('test-A/out.tsv', y_dev, '%f') + np.savetxt('dev-0/out.tsv', y_test, '%f') + +predict()