okapi/vectorizer_tf.py

17 lines
531 B
Python
Raw Normal View History

2022-04-10 18:18:05 +02:00
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
import pandas as pd
2022-04-12 22:46:50 +02:00
from spacy.lang.en.stop_words import STOP_WORDS as en_stop
2022-04-10 18:18:05 +02:00
class VectorizerTf:
def __init__(self, corpus):
2022-04-12 22:46:50 +02:00
vectorizer = CountVectorizer(stop_words=en_stop)
2022-04-10 18:18:05 +02:00
self.tf_matrix = vectorizer.fit_transform(corpus)
self.vectorizer = vectorizer
self.feature_names = self.vectorizer.get_feature_names()
def get_tf_for_document(self, term):
return self.vectorizer.transform([term]).toarray()