test

2022-04-12 23:15:34 +02:00 · 2022-04-12 23:15:34 +02:00 · 7947b532cb
commit 7947b532cb
parent 2ff0538dc3
1 changed files with 7 additions and 3 deletions
--- a/okapi.py
+++ b/okapi.py
@ -1,5 +1,5 @@
 import pandas as pd
 import numpy as np
 import pandas as pd
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
 from statistics import mean
@ -33,14 +33,18 @@ retweets = processed_documents['retweets']
 dates = processed_documents['date']
 # Vectorization
 print(f"{len(processed_documents)} documents ready!")
 print("Vectorizing...")
 cv = CountVectorizer()
 transformer = TfidfTransformer()
 word_count_vector = cv.fit_transform(tweets)
-words = cv.get_feature_names_out()
+try:
    words = cv.get_feature_names_out()
 except:
    words = cv.get_feature_names()
-tf = pd.DataFrame(word_count_vector.toarray(), columns=words)
+tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8')
 transformer.fit_transform(word_count_vector)
 tfidf_dict = {}