This commit is contained in:
s444501 2022-04-12 23:15:34 +02:00
parent 2ff0538dc3
commit 7947b532cb
1 changed files with 7 additions and 3 deletions

View File

@ -1,5 +1,5 @@
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from statistics import mean
@ -33,14 +33,18 @@ retweets = processed_documents['retweets']
dates = processed_documents['date']
# Vectorization
print(f"{len(processed_documents)} documents ready!")
print("Vectorizing...")
cv = CountVectorizer()
transformer = TfidfTransformer()
word_count_vector = cv.fit_transform(tweets)
words = cv.get_feature_names_out()
try:
words = cv.get_feature_names_out()
except:
words = cv.get_feature_names()
tf = pd.DataFrame(word_count_vector.toarray(), columns=words)
tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8')
transformer.fit_transform(word_count_vector)
tfidf_dict = {}