Improve the results to 0.8ish

2021-04-17 14:23:23 +02:00 · 2021-04-17 14:23:23 +02:00 · 9728e579d4
commit 9728e579d4
parent 83e6d37f53
4 changed files with 717 additions and 719 deletions
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
@ -1,87 +1,87 @@
-5
-29
-35
-32
-18
-20
-3
-42
-4
-1
-17
-20
-16
-44
-13
-13
-41
-36
-10
+14
 0
-27
-28
-8
-14
-25
-19
-2
-8
-38
-40
-34
-34
-15
-24
-13
-9
-39
-5
-2
-32
 4
-33
-15
-21
-30
-37
-9
-11
-26
-15
-25
-35
-34
-19
-6
-14
-16
-16
-34
 7
-15
+2
+5
+22
 12
-34
-31
-20
-7
-34
-30
-35
+5
+4
+15
+5
+0
+2
+8
+8
+8
+2
 9
-18
-10
-30
-3
-43
-42
-25
+23
+24
 6
+13
+10
+15
+6
+11
+13
+23
+0
+22
+22
+3
+20
+8
+3
+20
+14
+11
+7
+5
+18
+1
+7
+16
 21
 22
-1
+0
+22
+3
+15
+4
+22
+6
 17
+10
+0
+0
+22
+20
+3
+0
+1
+21
+5
+6
+22
+16
+19
+22
+2
+9
+16
 1
 0
+12
 15
+17
+7
+2
+4
+15
+4
+2
+3
+6
 7
-23
--- a/solution-dev0.py
+++ b/solution-dev0.py
@ -17,12 +17,11 @@ def main():
        documents = [preprocess(document, stopwords)
                     for document in in_file.readlines()]

-        vectorizer = TfidfVectorizer(ngram_range=(1, 3), use_idf=False)
        vectorizer = TfidfVectorizer()

        document_vectors = vectorizer.fit_transform(documents)
        predictions = KMeans(
-            n_clusters=45, max_iter=1000).fit_predict(document_vectors)
+            n_clusters=25, max_iter=1000).fit_predict(document_vectors)

        with open("dev-0/out.tsv", "w") as out_file:
            for prediction in predictions:
--- a/solution-testA.py
+++ b/solution-testA.py
@ -17,12 +17,11 @@ def main():
        documents = [preprocess(document, stopwords)
                     for document in in_file.readlines()]

-        vectorizer = TfidfVectorizer(ngram_range=(1, 3), use_idf=False)
        vectorizer = TfidfVectorizer()

        document_vectors = vectorizer.fit_transform(documents)
        predictions = KMeans(
-            n_clusters=45, max_iter=1000).fit_predict(document_vectors)
+            n_clusters=25, max_iter=1000).fit_predict(document_vectors)

        with open("test-A/out.tsv", "w") as out_file:
            for prediction in predictions:
--- a/test-A/out.tsv
+++ b/test-A/out.tsv