polish-urban-legends-public/skrypt-test-A.py

20 lines
627 B
Python

from sklearn.cluster import KMeans
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
with open("test-A/in.tsv") as file:
corpus = file.readlines()
vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = True)
vectors = vectorizer.fit_transform(corpus)
kmeans = KMeans(n_clusters=255, random_state = 0).fit(vectors)
prediction = kmeans.fit_predict(vectors)
labels = kmeans.labels_
labels = np.array2string(labels, precision=2, separator='\n', suppress_small=True)
file = open("test-A/out.tsv", "w")
file.write(labels[1:-1])