okapi/vectorizer_idf.py
Mikołaj Pokrywka d70c623066 to present
2022-04-12 22:46:50 +02:00

14 lines
448 B
Python

from sklearn.feature_extraction.text import TfidfVectorizer
from spacy.lang.en.stop_words import STOP_WORDS as en_stop
class VectorizerIdf:
def __init__(self, corpus):
vectorizer = TfidfVectorizer(use_idf=True, stop_words=en_stop)
self.matrix = vectorizer.fit_transform(corpus)
self.vectorizer = vectorizer
def get_idf_for_word(self, term):
return self.vectorizer.idf_[self.vectorizer.vocabulary_[term]]