test
This commit is contained in:
parent
2ff0538dc3
commit
7947b532cb
10
okapi.py
10
okapi.py
@ -1,5 +1,5 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
||||
from statistics import mean
|
||||
|
||||
@ -33,14 +33,18 @@ retweets = processed_documents['retweets']
|
||||
dates = processed_documents['date']
|
||||
|
||||
# Vectorization
|
||||
print(f"{len(processed_documents)} documents ready!")
|
||||
print("Vectorizing...")
|
||||
cv = CountVectorizer()
|
||||
transformer = TfidfTransformer()
|
||||
|
||||
word_count_vector = cv.fit_transform(tweets)
|
||||
words = cv.get_feature_names_out()
|
||||
try:
|
||||
words = cv.get_feature_names_out()
|
||||
except:
|
||||
words = cv.get_feature_names()
|
||||
|
||||
tf = pd.DataFrame(word_count_vector.toarray(), columns=words)
|
||||
tf = pd.DataFrame(word_count_vector.toarray(), columns=words, dtype='int8')
|
||||
transformer.fit_transform(word_count_vector)
|
||||
|
||||
tfidf_dict = {}
|
||||
|
Loading…
Reference in New Issue
Block a user