From 31a45be3f8221ccee01646070f982cf06998eca1 Mon Sep 17 00:00:00 2001 From: Aleksy Wroblewski Date: Thu, 15 Apr 2021 18:16:41 +0200 Subject: [PATCH] Add first solution --- dev-0/out.tsv | 87 ++++++ solution-dev0.py | 23 ++ solution-testA.py | 23 ++ test-A/out.tsv | 691 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 824 insertions(+) create mode 100644 dev-0/out.tsv create mode 100644 solution-dev0.py create mode 100644 solution-testA.py create mode 100644 test-A/out.tsv diff --git a/dev-0/out.tsv b/dev-0/out.tsv new file mode 100644 index 0000000..765b967 --- /dev/null +++ b/dev-0/out.tsv @@ -0,0 +1,87 @@ +31 +33 +30 +12 +32 +19 +41 +15 +1 +11 +15 +19 +7 +9 +5 +42 +20 +36 +4 +24 +21 +40 +2 +0 +25 +23 +10 +2 +24 +44 +6 +43 +26 +9 +5 +3 +35 +22 +10 +12 +1 +27 +41 +29 +8 +28 +3 +7 +15 +6 +13 +30 +6 +23 +18 +0 +7 +7 +6 +17 +3 +34 +15 +37 +19 +16 +14 +8 +38 +3 +32 +4 +8 +14 +7 +15 +25 +18 +29 +39 +11 +15 +11 +24 +14 +22 +24 diff --git a/solution-dev0.py b/solution-dev0.py new file mode 100644 index 0000000..2dedafe --- /dev/null +++ b/solution-dev0.py @@ -0,0 +1,23 @@ +from sklearn.feature_extraction.text import TfidfVectorizer +import numpy as np +import sklearn.metrics +from sklearn.cluster import KMeans + + +def main(): + with open("dev-0/in.tsv") as in_file: + documents = in_file.readlines() + + vectorizer = TfidfVectorizer(ngram_range=(1, 3), use_idf=False) + vectorizer = TfidfVectorizer() + + document_vectors = vectorizer.fit_transform(documents) + predictions = KMeans(n_clusters=45).fit_predict(document_vectors) + + with open("dev-0/out.tsv", "w") as out_file: + for prediction in predictions: + out_file.write(str(prediction) + '\n') + + +if __name__ == '__main__': + main() diff --git a/solution-testA.py b/solution-testA.py new file mode 100644 index 0000000..e1e321a --- /dev/null +++ b/solution-testA.py @@ -0,0 +1,23 @@ +from sklearn.feature_extraction.text import TfidfVectorizer +import numpy as np +import sklearn.metrics +from sklearn.cluster import KMeans + + +def main(): + with open("test-A/in.tsv") as in_file: + documents = in_file.readlines() + + vectorizer = TfidfVectorizer(ngram_range=(1, 3), use_idf=False) + vectorizer = TfidfVectorizer() + + document_vectors = vectorizer.fit_transform(documents) + predictions = KMeans(n_clusters=45).fit_predict(document_vectors) + + with open("test-A/out.tsv", "w") as out_file: + for prediction in predictions: + out_file.write(str(prediction) + '\n') + + +if __name__ == '__main__': + main() diff --git a/test-A/out.tsv b/test-A/out.tsv new file mode 100644 index 0000000..8834240 --- /dev/null +++ b/test-A/out.tsv @@ -0,0 +1,691 @@ +8 +9 +13 +39 +7 +24 +39 +32 +37 +20 +38 +17 +1 +15 +32 +35 +7 +23 +7 +2 +5 +21 +24 +16 +28 +30 +2 +36 +32 +30 +23 +1 +24 +23 +10 +9 +17 +23 +13 +34 +23 +39 +7 +1 +44 +15 +5 +43 +13 +31 +13 +23 +30 +16 +31 +12 +23 +13 +23 +6 +23 +7 +2 +23 +30 +18 +16 +24 +2 +30 +5 +10 +7 +7 +23 +23 +16 +30 +16 +30 +29 +25 +42 +2 +7 +17 +23 +16 +20 +23 +30 +5 +10 +7 +7 +7 +8 +15 +23 +39 +6 +1 +41 +26 +30 +12 +30 +23 +23 +2 +1 +11 +30 +13 +3 +35 +2 +9 +30 +23 +6 +19 +39 +39 +31 +13 +7 +12 +1 +2 +5 +10 +31 +12 +2 +10 +27 +30 +42 +27 +26 +29 +6 +27 +24 +23 +2 +27 +5 +12 +7 +41 +5 +23 +16 +17 +7 +6 +35 +21 +4 +23 +2 +43 +5 +41 +42 +4 +10 +10 +9 +16 +34 +39 +7 +42 +5 +38 +12 +2 +25 +6 +6 +2 +10 +32 +19 +42 +32 +31 +8 +24 +17 +12 +22 +10 +2 +13 +5 +8 +30 +3 +8 +38 +7 +7 +13 +43 +20 +8 +37 +43 +23 +7 +43 +17 +1 +24 +22 +37 +31 +12 +5 +29 +14 +10 +4 +23 +42 +23 +23 +33 +12 +43 +24 +30 +10 +16 +33 +5 +39 +30 +18 +36 +30 +22 +5 +5 +36 +43 +30 +2 +25 +1 +3 +14 +1 +39 +23 +6 +12 +1 +7 +2 +27 +33 +16 +2 +3 +9 +29 +31 +5 +6 +24 +7 +15 +1 +6 +35 +2 +5 +31 +30 +1 +28 +6 +13 +41 +24 +23 +34 +30 +30 +9 +31 +7 +28 +41 +36 +38 +23 +2 +31 +31 +38 +2 +23 +20 +32 +39 +31 +35 +23 +42 +9 +5 +6 +43 +23 +2 +42 +21 +32 +34 +3 +20 +44 +30 +15 +23 +23 +2 +12 +23 +13 +30 +35 +32 +26 +23 +2 +1 +23 +16 +6 +23 +17 +23 +23 +30 +23 +5 +32 +7 +23 +31 +30 +26 +7 +2 +2 +30 +17 +31 +30 +13 +1 +43 +4 +20 +33 +17 +36 +32 +23 +12 +7 +16 +23 +10 +30 +29 +30 +8 +1 +21 +17 +26 +24 +2 +18 +23 +13 +23 +9 +8 +32 +7 +5 +15 +2 +12 +40 +12 +23 +23 +1 +5 +1 +39 +28 +32 +30 +12 +5 +43 +31 +30 +43 +37 +2 +2 +24 +4 +23 +13 +7 +29 +2 +23 +16 +43 +39 +25 +2 +39 +1 +16 +24 +22 +13 +16 +40 +7 +9 +35 +25 +32 +15 +30 +39 +31 +33 +43 +23 +30 +43 +30 +31 +32 +31 +31 +2 +34 +7 +13 +30 +39 +34 +34 +34 +37 +23 +2 +15 +3 +23 +10 +22 +4 +14 +37 +37 +0 +15 +23 +31 +3 +20 +31 +13 +43 +30 +1 +23 +1 +27 +7 +6 +32 +13 +23 +23 +23 +35 +21 +35 +1 +38 +24 +4 +24 +9 +3 +17 +2 +7 +9 +43 +1 +32 +4 +23 +30 +39 +23 +42 +2 +34 +25 +23 +23 +13 +23 +21 +29 +23 +31 +23 +11 +27 +7 +11 +40 +24 +22 +1 +37 +1 +35 +16 +3 +36 +10 +25 +30 +8 +10 +18 +24 +13 +44 +32 +30 +30 +12 +23 +32 +10 +13 +13 +30 +9 +35 +17 +1 +26 +17 +23 +12 +6 +23 +24 +23 +23 +25 +12 +6 +31 +9 +6 +30 +24 +21 +9 +8 +5 +2 +26 +13 +23 +31 +23 +12 +44 +23 +23 +43 +15 +17 +1 +8 +26 +17 +31 +6 +43 +9 +20 +6 +21 +5 +13 +5 +14 +2 +15 +41 +22 +17 +43 +23 +7 +34 +30 +42 +16 +22 +30 +23 +8 +6 +12 +32 +4 +23 +16 +36 +9 +31 +5 +25 +17 +30 +12 +7 +3 +17 +12 +44 +24 +23 +2 +2 +1 +40 +39 +24 +23 +1 +7 +32 +4 +34 +42 +23 +39 +30 +24 +22 +10 +30 +4 +1 +13 +23 +26 +8 +32 +4