This commit is contained in:
s444501 2022-04-12 23:26:35 +02:00
parent 7947b532cb
commit 6df0ea0790

View File

@ -35,7 +35,7 @@ dates = processed_documents['date']
# Vectorization # Vectorization
print(f"{len(processed_documents)} documents ready!") print(f"{len(processed_documents)} documents ready!")
print("Vectorizing...") print("Vectorizing...")
cv = CountVectorizer() cv = CountVectorizer(dtype='uint8')
transformer = TfidfTransformer() transformer = TfidfTransformer()
word_count_vector = cv.fit_transform(tweets) word_count_vector = cv.fit_transform(tweets)
@ -44,7 +44,7 @@ try:
except: except:
words = cv.get_feature_names() words = cv.get_feature_names()
tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8') tf = pd.DataFrame(word_count_vector.toarray(), columns=words)
transformer.fit_transform(word_count_vector) transformer.fit_transform(word_count_vector)
tfidf_dict = {} tfidf_dict = {}