test
This commit is contained in:
parent
7947b532cb
commit
6df0ea0790
4
okapi.py
4
okapi.py
@ -35,7 +35,7 @@ dates = processed_documents['date']
|
||||
# Vectorization
|
||||
print(f"{len(processed_documents)} documents ready!")
|
||||
print("Vectorizing...")
|
||||
cv = CountVectorizer()
|
||||
cv = CountVectorizer(dtype='uint8')
|
||||
transformer = TfidfTransformer()
|
||||
|
||||
word_count_vector = cv.fit_transform(tweets)
|
||||
@ -44,7 +44,7 @@ try:
|
||||
except:
|
||||
words = cv.get_feature_names()
|
||||
|
||||
tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8')
|
||||
tf = pd.DataFrame(word_count_vector.toarray(), columns=words)
|
||||
transformer.fit_transform(word_count_vector)
|
||||
|
||||
tfidf_dict = {}
|
||||
|
Loading…
Reference in New Issue
Block a user