from sklearn.feature_extraction.text import TfidfVectorizer import numpy as np import sklearn.metrics from sklearn.cluster import KMeans with open("dev-0/in.tsv") as f: content = f.readlines() vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = False) vectorizer = TfidfVectorizer() document_vectors = vectorizer.fit_transform(content) kmeans = KMeans(n_clusters=77, random_state=0).fit(document_vectors) pred_y = kmeans.fit_predict(document_vectors) t=kmeans.labels_ t=np.array2string(t, precision=2, separator='\n',suppress_small=True) f = open("dev-0/out.tsv", "a") f.write(t)