test

2022-04-12 23:15:34 +02:00 · 2022-04-12 23:15:34 +02:00 · 7947b532cb
commit 7947b532cb
parent 2ff0538dc3
1 changed files with 7 additions and 3 deletions
--- a/okapi.py
+++ b/okapi.py
@ -1,5 +1,5 @@
-import pandas as pd
 import numpy as np
+import pandas as pd
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
 from statistics import mean

@ -33,14 +33,18 @@ retweets = processed_documents['retweets']
 dates = processed_documents['date']

 # Vectorization
+print(f"{len(processed_documents)} documents ready!")
 print("Vectorizing...")
 cv = CountVectorizer()
 transformer = TfidfTransformer()

 word_count_vector = cv.fit_transform(tweets)
-words = cv.get_feature_names_out()
+try:
+    words = cv.get_feature_names_out()
+except:
+    words = cv.get_feature_names()

-tf = pd.DataFrame(word_count_vector.toarray(), columns=words)
+tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8')
 transformer.fit_transform(word_count_vector)

 tfidf_dict = {}