This commit is contained in:
s444501 2022-04-12 23:26:35 +02:00
parent 7947b532cb
commit 6df0ea0790

View File

@ -35,7 +35,7 @@ dates = processed_documents['date']
# Vectorization
print(f"{len(processed_documents)} documents ready!")
print("Vectorizing...")
cv = CountVectorizer()
cv = CountVectorizer(dtype='uint8')
transformer = TfidfTransformer()
word_count_vector = cv.fit_transform(tweets)
@ -44,7 +44,7 @@ try:
except:
words = cv.get_feature_names()
tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8')
tf = pd.DataFrame(word_count_vector.toarray(), columns=words)
transformer.fit_transform(word_count_vector)
tfidf_dict = {}