From 89a269ccda04e4f427f73a77fdaf65bb837892db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Kolasin=CC=81ski?= Date: Tue, 13 Apr 2021 20:59:56 +0200 Subject: [PATCH] Script fix --- classifier.py | 4 +- dev-0/out.tsv | 156 +++++++++++++++++++++++++------------------------- 2 files changed, 80 insertions(+), 80 deletions(-) diff --git a/classifier.py b/classifier.py index e766a08..0afbe15 100644 --- a/classifier.py +++ b/classifier.py @@ -24,7 +24,7 @@ for sentence in sentences: spl = [x.lower() for x in spl] splited.append(spl) documents = [TaggedDocument(doc, [i]) for i, doc in enumerate(splited)] -model = Doc2Vec(documents, min_count=0, alpha=0.025, epochs=20) +model = Doc2Vec(documents, min_count=0, alpha=0.025, epochs=400) # for epoch in range(10): # print('Training epoch %s', epoch) # model.train() @@ -34,7 +34,7 @@ model = Doc2Vec(documents, min_count=0, alpha=0.025, epochs=20) X = model.dv.vectors -kmeans = KMeans(n_clusters=clusters_no, max_iter=1000).fit(X) +kmeans = KMeans(n_clusters=clusters_no, max_iter=4000).fit(X) labels = kmeans.labels_ centroids = kmeans.cluster_centers_ diff --git a/dev-0/out.tsv b/dev-0/out.tsv index 1a4bb89..b145ba4 100644 --- a/dev-0/out.tsv +++ b/dev-0/out.tsv @@ -1,87 +1,87 @@ -9 -2 -8 -5 -0 -2 -2 -6 -1 5 2 -8 -0 -8 -8 -8 -8 -8 -2 -0 -8 -8 -2 -2 -2 -4 -9 -2 -7 -8 -9 -7 -2 -5 -8 -7 -2 -8 -9 -8 -7 -5 -8 -2 -7 -2 -8 -0 -1 -2 -0 -5 -7 -8 -7 -7 -7 -6 -2 -8 -7 -2 -2 -2 -7 -8 -7 -8 -8 -2 -8 -2 -0 -0 -9 3 -6 +3 +4 +4 +5 7 +4 +3 +3 +4 +1 +3 +3 +3 +3 +3 +3 2 +3 +3 +3 +3 +5 +4 +4 +3 +4 +3 +9 +5 +5 +3 +3 +5 +3 +3 +4 +3 +4 +3 +5 +5 +3 +3 +5 +1 6 5 -4 +1 +3 5 -7 -4 +3 +3 +3 +1 +0 5 -9 +3 +5 +3 +5 +3 +4 +3 +5 +3 +3 +5 +3 +3 +5 +2 +5 +2 +1 +3 +5 +1 +3 +4 +3 +2 +4 +3 +8