diff --git a/okapi.py b/okapi.py index 3bf491a..5c862ea 100644 --- a/okapi.py +++ b/okapi.py @@ -35,7 +35,7 @@ dates = processed_documents['date'] # Vectorization print(f"{len(processed_documents)} documents ready!") print("Vectorizing...") -cv = CountVectorizer() +cv = CountVectorizer(dtype='uint8') transformer = TfidfTransformer() word_count_vector = cv.fit_transform(tweets) @@ -44,7 +44,7 @@ try: except: words = cv.get_feature_names() -tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8') +tf = pd.DataFrame(word_count_vector.toarray(), columns=words) transformer.fit_transform(word_count_vector) tfidf_dict = {}