init
This commit is contained in:
parent
e4adfb04dc
commit
6df1e0282b
13
prog.py
Normal file
13
prog.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
import numpy as np
|
||||||
|
import sklearn.metrics
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
|
with open("dev-0/in.tsv") as f:
|
||||||
|
content = f.readlines()
|
||||||
|
vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = False)
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
document_vectors = vectorizer.fit_transform(content)
|
||||||
|
kmeans = KMeans(n_clusters=4, random_state=0).fit(document_vectors)
|
||||||
|
pred_y = kmeans.fit_predict(document_vectors)
|
||||||
|
print(pred_y)
|
Loading…
Reference in New Issue
Block a user