import csv import pickle from typing import re import numpy import numpy as np import pandas as pd from sklearn.decomposition import PCA from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.decomposition import TruncatedSVD def predict(): input_file = open("l_regression.pkl",'rb') l_regression = pickle.load(input_file) input_file = open("tfidf_model.pkl",'rb') tfidf = pickle.load(input_file,encoding='UTF-8') dev0 = pd.read_csv("dev-0/in.tsv", delimiter="\t", header=None, names=["txt"], quoting=csv.QUOTE_NONE, error_bad_lines=False, skip_blank_lines=False) testA = pd.read_csv("test-A/in.tsv", delimiter="\t", header=None, names=["txt"], quoting=csv.QUOTE_NONE, error_bad_lines=False, skip_blank_lines=False ) #devtxt = dev0["txt"] #testAtxt = testA["txt"] #print(testAtxt) dev0_vector = tfidf.fit_transform(dev0['txt'].apply(lambda dev0_vector: np.str_(dev0_vector))) testA_vector = tfidf.fit_transform(testA['txt'].apply(lambda testA_vector: np.str_(testA_vector))) #dev0_vector = tfidf.fit_transform(dev0['txt'].values.astype('U')) #testA_vector = tfidf.fit_transform(testA['txt'].values.astype('U')) #dev0_vector = tfidf.fit_transform(dev0['txt'],y=None) #testA_vector = tfidf.fit_transform(testA['txt'],y=None) #print(testA_vector) pca = TruncatedSVD(n_components=120) dev0_pca = pca.fit_transform(dev0_vector) testA_pca = pca.fit_transform(testA_vector) y_dev = l_regression.predict(dev0_pca) y_test = l_regression.predict(testA_pca) numpy.savetxt('dev-0/out.tsv', y_dev) numpy.savetxt('test-A/out.tsv', y_test) #y_dev.to_csv(r'dev-0/out.csv',index=False) #y_test.to_csv(r'test-A/out.csv',index=False) predict()