16 lines
454 B
Python
16 lines
454 B
Python
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
|
import pandas as pd
|
|
|
|
|
|
class VectorizerTf:
|
|
|
|
def __init__(self, corpus):
|
|
vectorizer = CountVectorizer()
|
|
self.tf_matrix = vectorizer.fit_transform(corpus)
|
|
self.vectorizer = vectorizer
|
|
self.feature_names = self.vectorizer.get_feature_names()
|
|
|
|
def get_tf_for_document(self, term):
|
|
return self.vectorizer.transform([term]).toarray()
|
|
|