from sklearn.cluster import KMeans import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer import matplotlib.pyplot as plt with open("test-A/in.tsv") as file: corpus = file.readlines() vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = True) vectors = vectorizer.fit_transform(corpus) kmeans = KMeans(n_clusters=255, random_state = 0).fit(vectors) prediction = kmeans.fit_predict(vectors) labels = kmeans.labels_ labels = np.array2string(labels, precision=2, separator='\n', suppress_small=True) file = open("test-A/out.tsv", "w") file.write(labels[1:-1])