From 6df0ea07906d85bab04c5aab37c5755008d93d18 Mon Sep 17 00:00:00 2001 From: s444501 Date: Tue, 12 Apr 2022 23:26:35 +0200 Subject: [PATCH] test --- okapi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/okapi.py b/okapi.py index 3bf491a..5c862ea 100644 --- a/okapi.py +++ b/okapi.py @@ -35,7 +35,7 @@ dates = processed_documents['date'] # Vectorization print(f"{len(processed_documents)} documents ready!") print("Vectorizing...") -cv = CountVectorizer() +cv = CountVectorizer(dtype='uint8') transformer = TfidfTransformer() word_count_vector = cv.fit_transform(tweets) @@ -44,7 +44,7 @@ try: except: words = cv.get_feature_names() -tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8') +tf = pd.DataFrame(word_count_vector.toarray(), columns=words) transformer.fit_transform(word_count_vector) tfidf_dict = {}