from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer import pandas as pd from spacy.lang.en.stop_words import STOP_WORDS as en_stop class VectorizerTf: def __init__(self, corpus): vectorizer = CountVectorizer(stop_words=en_stop) self.tf_matrix = vectorizer.fit_transform(corpus) self.vectorizer = vectorizer self.feature_names = self.vectorizer.get_feature_names() def get_tf_for_document(self, term): return self.vectorizer.transform([term]).toarray()