#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import csv from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.cluster import KMeans # In[2]: #dev0 dev0_data = pd.read_csv('dev-0/in.tsv', header=None, quoting=csv.QUOTE_NONE, sep='\t') dev0_y = KMeans(n_clusters=50).fit_predict(TfidfVectorizer().fit_transform(dev0_data[0].values)) #zapis wyników dev0_y.tofile('dev-0/out.tsv', sep='\n') # In[3]: #TestA testA_data = pd.read_csv('test-A/in.tsv', header=None, quoting=csv.QUOTE_NONE, sep='\t') testA_y = KMeans(n_clusters=50).fit_predict(TfidfVectorizer().fit_transform(testA_data[0].values)) #zapis wyników testA_y.tofile('test-A/out.tsv', sep='\n')