diff --git a/dev-0/out.tsv b/dev-0/out.tsv new file mode 100644 index 0000000..17903bb --- /dev/null +++ b/dev-0/out.tsv @@ -0,0 +1,87 @@ +39 +32 +0 +20 +2 +22 +41 +11 +4 +5 +46 +22 +30 +14 +6 +25 +34 +40 +18 +13 +12 +23 +17 +9 +11 +35 +3 +17 +13 +30 +24 +36 +29 +47 +25 +8 +44 +49 +3 +20 +4 +26 +41 +10 +43 +15 +8 +30 +11 +42 +37 +0 +24 +35 +7 +9 +30 +30 +24 +27 +8 +33 +11 +31 +22 +28 +1 +45 +21 +8 +2 +18 +16 +1 +30 +11 +11 +7 +10 +38 +5 +48 +5 +13 +1 +19 +13 \ No newline at end of file diff --git a/geval b/geval new file mode 100755 index 0000000..b68b316 Binary files /dev/null and b/geval differ diff --git a/kMeans.ipynb b/kMeans.ipynb new file mode 100644 index 0000000..e23fc37 --- /dev/null +++ b/kMeans.ipynb @@ -0,0 +1,70 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f7e1ae0d", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import csv\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.cluster import KMeans" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7582a8dd", + "metadata": {}, + "outputs": [], + "source": [ + "#dev0\n", + "dev0_data = pd.read_csv('dev-0/in.tsv', header=None, quoting=csv.QUOTE_NONE, sep='\\t')\n", + "\n", + "dev0_y = KMeans(n_clusters=50).fit_predict(TfidfVectorizer().fit_transform(dev0_data[0].values))\n", + "\n", + "#zapis wyników\n", + "dev0_y.tofile('dev-0/out.tsv', sep='\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d3c75abc", + "metadata": {}, + "outputs": [], + "source": [ + "#TestA\n", + "testA_data = pd.read_csv('test-A/in.tsv', header=None, quoting=csv.QUOTE_NONE, sep='\\t')\n", + "\n", + "testA_y = KMeans(n_clusters=50).fit_predict(TfidfVectorizer().fit_transform(testA_data[0].values))\n", + "\n", + "#zapis wyników\n", + "testA_y.tofile('test-A/out.tsv', sep='\\n')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/kMeans.py b/kMeans.py new file mode 100644 index 0000000..ad11f84 --- /dev/null +++ b/kMeans.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +import pandas as pd +import csv +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.cluster import KMeans + + +# In[2]: + + +#dev0 +dev0_data = pd.read_csv('dev-0/in.tsv', header=None, quoting=csv.QUOTE_NONE, sep='\t') + +dev0_y = KMeans(n_clusters=50).fit_predict(TfidfVectorizer().fit_transform(dev0_data[0].values)) + +#zapis wyników +dev0_y.tofile('dev-0/out.tsv', sep='\n') + + +# In[3]: + + +#TestA +testA_data = pd.read_csv('test-A/in.tsv', header=None, quoting=csv.QUOTE_NONE, sep='\t') + +testA_y = KMeans(n_clusters=50).fit_predict(TfidfVectorizer().fit_transform(testA_data[0].values)) + +#zapis wyników +testA_y.tofile('test-A/out.tsv', sep='\n') + diff --git a/test-A/out.tsv b/test-A/out.tsv new file mode 100644 index 0000000..bc77e7b --- /dev/null +++ b/test-A/out.tsv @@ -0,0 +1,691 @@ +22 +48 +25 +10 +33 +8 +13 +39 +24 +47 +26 +4 +15 +24 +3 +18 +33 +12 +33 +3 +12 +34 +8 +9 +0 +23 +0 +28 +39 +0 +21 +39 +8 +2 +38 +48 +4 +19 +2 +21 +43 +33 +17 +21 +26 +26 +26 +11 +25 +3 +25 +19 +10 +9 +28 +7 +19 +25 +17 +45 +19 +17 +0 +47 +24 +46 +9 +8 +17 +24 +28 +38 +33 +18 +19 +33 +1 +45 +43 +23 +23 +6 +5 +48 +33 +16 +33 +41 +24 +43 +24 +28 +38 +33 +33 +17 +22 +0 +19 +19 +14 +34 +33 +41 +4 +42 +14 +23 +12 +16 +15 +2 +24 +25 +12 +18 +17 +16 +14 +10 +45 +39 +20 +17 +19 +2 +33 +7 +15 +19 +28 +38 +12 +7 +26 +38 +36 +23 +5 +10 +20 +26 +45 +36 +14 +10 +17 +36 +29 +7 +33 +17 +17 +3 +9 +4 +33 +45 +18 +34 +37 +12 +42 +11 +28 +17 +5 +37 +38 +38 +48 +28 +21 +19 +33 +5 +9 +26 +7 +30 +6 +11 +45 +16 +14 +0 +39 +5 +39 +28 +22 +14 +4 +42 +39 +45 +10 +25 +10 +22 +23 +31 +22 +26 +33 +17 +25 +11 +16 +22 +24 +43 +19 +33 +11 +4 +39 +8 +44 +24 +3 +7 +23 +30 +5 +38 +37 +12 +40 +19 +17 +41 +7 +11 +8 +24 +38 +9 +19 +10 +22 +24 +46 +26 +23 +44 +28 +28 +26 +11 +24 +16 +6 +21 +31 +5 +12 +3 +19 +45 +7 +39 +13 +23 +36 +12 +10 +16 +31 +39 +26 +12 +28 +45 +8 +13 +26 +15 +45 +18 +12 +28 +13 +24 +15 +27 +49 +25 +17 +8 +19 +24 +14 +23 +48 +28 +17 +27 +17 +26 +26 +12 +16 +28 +28 +26 +33 +13 +16 +19 +17 +1 +18 +21 +40 +16 +28 +45 +11 +47 +43 +5 +34 +1 +19 +43 +16 +33 +14 +26 +33 +19 +13 +7 +19 +25 +23 +18 +28 +10 +19 +29 +39 +3 +9 +45 +41 +4 +42 +43 +14 +32 +28 +39 +33 +19 +28 +23 +41 +47 +37 +26 +14 +24 +28 +23 +2 +21 +32 +37 +17 +19 +4 +20 +39 +28 +7 +17 +7 +12 +38 +23 +26 +13 +22 +15 +47 +4 +41 +8 +12 +46 +0 +25 +33 +48 +22 +12 +33 +26 +26 +10 +7 +39 +7 +3 +19 +39 +29 +39 +43 +27 +39 +23 +7 +28 +32 +28 +23 +14 +23 +49 +20 +8 +37 +31 +25 +33 +28 +3 +17 +9 +11 +19 +6 +29 +33 +39 +9 +8 +44 +25 +43 +19 +33 +30 +18 +6 +39 +26 +23 +48 +11 +39 +35 +19 +24 +11 +9 +28 +14 +42 +3 +16 +21 +47 +25 +23 +20 +43 +43 +16 +43 +19 +16 +26 +31 +19 +17 +44 +37 +5 +24 +23 +24 +35 +17 +1 +31 +16 +20 +25 +11 +24 +15 +19 +39 +36 +13 +32 +39 +25 +16 +19 +19 +18 +34 +18 +15 +26 +8 +37 +24 +16 +19 +4 +30 +33 +48 +11 +21 +23 +37 +12 +23 +33 +16 +40 +39 +21 +6 +12 +33 +2 +14 +34 +23 +3 +28 +42 +25 +27 +33 +35 +39 +8 +44 +21 +23 +15 +18 +43 +31 +33 +38 +6 +22 +22 +38 +46 +8 +25 +45 +39 +23 +23 +7 +28 +39 +38 +25 +25 +24 +16 +43 +4 +15 +41 +4 +19 +7 +45 +12 +8 +12 +19 +6 +7 +45 +28 +30 +24 +23 +8 +24 +48 +22 +29 +29 +20 +25 +3 +28 +24 +7 +35 +28 +15 +35 +26 +4 +15 +22 +41 +4 +12 +45 +32 +16 +16 +45 +34 +28 +25 +16 +5 +26 +23 +17 +26 +4 +11 +19 +33 +39 +23 +5 +9 +44 +23 +3 +22 +28 +7 +39 +37 +12 +10 +12 +48 +28 +26 +6 +4 +24 +7 +17 +31 +4 +24 +4 +8 +12 +16 +17 +21 +44 +12 +32 +14 +15 +33 +43 +37 +16 +40 +19 +28 +14 +12 +44 +38 +23 +37 +15 +2 +3 +41 +22 +47 +37 \ No newline at end of file