This commit is contained in:
Jan Przybylski 2021-04-13 00:35:55 +02:00
parent e4adfb04dc
commit 6df1e0282b

13
prog.py Normal file
View File

@ -0,0 +1,13 @@
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import sklearn.metrics
from sklearn.cluster import KMeans
with open("dev-0/in.tsv") as f:
content = f.readlines()
vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = False)
vectorizer = TfidfVectorizer()
document_vectors = vectorizer.fit_transform(content)
kmeans = KMeans(n_clusters=4, random_state=0).fit(document_vectors)
pred_y = kmeans.fit_predict(document_vectors)
print(pred_y)