polish-urban-legends-public/mainTest.py

18 lines
653 B
Python
Raw Normal View History

2021-05-10 16:39:12 +02:00
import sklearn.metrics
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
with open("test-A/in.tsv") as myFile:
cnt = myFile.readlines()
vect = TfidfVectorizer(ngram_range = (1,2), use_idf = False)
vect = TfidfVectorizer()
document_vectors = vect.fit_transform(cnt)
kmeans = KMeans(n_clusters = 77, random_state = 0).fit(document_vectors)
pred_myFile = kmeans.fit_predict(document_vectors)
tmp = kmeans.labels_
tmp = np.array2string(tmp, precision = 2, separator = '\n',suppress_small = True)
myFile = open("test-A/out.tsv", "a")
myFile.write(tmp)