polish-urban-legends-426228/prog-dev0.py
Jan Przybylski dd5ffc0149 update
2021-04-13 11:34:07 +02:00

17 lines
627 B
Python

from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import sklearn.metrics
from sklearn.cluster import KMeans
with open("dev-0/in.tsv") as f:
content = f.readlines()
vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = False)
vectorizer = TfidfVectorizer()
document_vectors = vectorizer.fit_transform(content)
kmeans = KMeans(n_clusters=77, random_state=0).fit(document_vectors)
pred_y = kmeans.fit_predict(document_vectors)
t=kmeans.labels_
t=np.array2string(t, precision=2, separator='\n',suppress_small=True)
f = open("dev-0/out.tsv", "a")
f.write(t)