test

2022-04-12 23:26:35 +02:00 · 2022-04-12 23:26:35 +02:00 · 6df0ea0790
commit 6df0ea0790
parent 7947b532cb
1 changed files with 2 additions and 2 deletions
--- a/okapi.py
+++ b/okapi.py
@ -35,7 +35,7 @@ dates = processed_documents['date']
 # Vectorization
 print(f"{len(processed_documents)} documents ready!")
 print("Vectorizing...")
-cv = CountVectorizer()
+cv = CountVectorizer(dtype='uint8')
 transformer = TfidfTransformer()

 word_count_vector = cv.fit_transform(tweets)
@ -44,7 +44,7 @@ try:
 except:
    words = cv.get_feature_names()

-tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8')
+tf = pd.DataFrame(word_count_vector.toarray(), columns=words)
 transformer.fit_transform(word_count_vector)

 tfidf_dict = {}