13 lines
355 B
Python
13 lines
355 B
Python
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
|
|
|
class VectorizerIdf:
|
|
|
|
def __init__(self, corpus):
|
|
vectorizer = TfidfVectorizer(use_idf=True)
|
|
vectorizer.fit_transform(corpus)
|
|
self.vectorizer = vectorizer
|
|
|
|
def get_idf_for_word(self, term):
|
|
return self.vectorizer.idf_[self.vectorizer.vocabulary_[term]]
|