zadanie 6

This commit is contained in:
Jedrzej Klepacki 2021-04-25 17:52:29 +02:00
parent e4adfb04dc
commit 30caab44a6
6 changed files with 1832 additions and 0 deletions

View File

@ -0,0 +1,228 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## FUNKCJE"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def inertia_list(all_doc):\n",
" list_inter = []\n",
" K_max = int(len(all_doc)/2)\n",
" while K_max > 100:\n",
" K_max = int(K_max/2)\n",
" K = range(1,K_max)\n",
" for k in K:\n",
" FitMean = KMeans(n_clusters=k).fit(doc_vectors)\n",
" list_inter.append(FitMean.inertia_)\n",
" return list_inter"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def BestK(list_inter):\n",
" position = -10\n",
" for i in range(0, len(list_inter)-1):\n",
" if (int(list_inter[i]) == (int(list_inter[i+1]))):\n",
" position = i\n",
" if position == -10 :\n",
" position = len(list_inter)-1\n",
" return position"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## PLIK DEV-0"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"infile = open('dev-0/in.tsv', 'r', encoding=\"utf-8\")\n",
"outfile = open(\"dev-0/out.tsv\", \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"all_doc = infile.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"vectorizer = TfidfVectorizer()\n",
"doc_vectors = vectorizer.fit_transform(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"list_inter = inertia_list(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"position = BestK(list_inter)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"FitMean = KMeans(n_clusters=position).fit_predict(doc_vectors)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"for x in FitMean:\n",
" outfile.write(str(x) + '\\n')\n",
"infile.close()\n",
"outfile.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## PLIK TEST-A"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"infile = open('test-A/in.tsv', 'r', encoding=\"utf-8\")\n",
"outfile = open(\"test-A/out.tsv\", \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"all_doc = infile.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"vectorizer = TfidfVectorizer()\n",
"doc_vectors = vectorizer.fit_transform(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"list_inter = inertia_list(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"position = BestK(list_inter)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"FitMean = KMeans(n_clusters=position).fit_predict(doc_vectors)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"for x in FitMean:\n",
" outfile.write(str(x) + '\\n')\n",
"infile.close()\n",
"outfile.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

87
dev-0/out.tsv Normal file
View File

@ -0,0 +1,87 @@
30
9
29
4
7
36
2
18
14
3
18
36
11
6
17
20
35
24
5
12
22
15
8
16
23
12
10
8
12
11
1
39
1
6
20
1
33
6
10
4
14
18
1
31
25
27
26
11
18
1
19
29
1
21
13
16
11
18
1
0
1
37
2
38
36
0
28
25
32
26
7
5
25
2
11
18
23
13
31
18
3
18
3
12
2
34
12
1 30
2 9
3 29
4 4
5 7
6 36
7 2
8 18
9 14
10 3
11 18
12 36
13 11
14 6
15 17
16 20
17 35
18 24
19 5
20 12
21 22
22 15
23 8
24 16
25 23
26 12
27 10
28 8
29 12
30 11
31 1
32 39
33 1
34 6
35 20
36 1
37 33
38 6
39 10
40 4
41 14
42 18
43 1
44 31
45 25
46 27
47 26
48 11
49 18
50 1
51 19
52 29
53 1
54 21
55 13
56 16
57 11
58 18
59 1
60 0
61 1
62 37
63 2
64 38
65 36
66 0
67 28
68 25
69 32
70 26
71 7
72 5
73 25
74 2
75 11
76 18
77 23
78 13
79 31
80 18
81 3
82 18
83 3
84 12
85 2
86 34
87 12

228
k-mean_script.ipynb Normal file
View File

@ -0,0 +1,228 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## FUNKCJE"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def inertia_list(all_doc):\n",
" list_inter = []\n",
" K_max = int(len(all_doc)/2)\n",
" while K_max > 100:\n",
" K_max = int(K_max/2)\n",
" K = range(1,K_max)\n",
" for k in K:\n",
" FitMean = KMeans(n_clusters=k).fit(doc_vectors)\n",
" list_inter.append(FitMean.inertia_)\n",
" return list_inter"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def BestK(list_inter):\n",
" position = -10\n",
" for i in range(0, len(list_inter)-1):\n",
" if (int(list_inter[i]) == (int(list_inter[i+1]))):\n",
" position = i\n",
" if position == -10 :\n",
" position = len(list_inter)-1\n",
" return position"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## PLIK DEV-0"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"infile = open('dev-0/in.tsv', 'r', encoding=\"utf-8\")\n",
"outfile = open(\"dev-0/out.tsv\", \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"all_doc = infile.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"vectorizer = TfidfVectorizer()\n",
"doc_vectors = vectorizer.fit_transform(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"list_inter = inertia_list(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"position = BestK(list_inter)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"FitMean = KMeans(n_clusters=position).fit_predict(doc_vectors)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"for x in FitMean:\n",
" outfile.write(str(x) + '\\n')\n",
"infile.close()\n",
"outfile.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## PLIK TEST-A"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"infile = open('test-A/in.tsv', 'r', encoding=\"utf-8\")\n",
"outfile = open(\"test-A/out.tsv\", \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"all_doc = infile.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"vectorizer = TfidfVectorizer()\n",
"doc_vectors = vectorizer.fit_transform(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"list_inter = inertia_list(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"position = BestK(list_inter)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"FitMean = KMeans(n_clusters=position).fit_predict(doc_vectors)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"for x in FitMean:\n",
" outfile.write(str(x) + '\\n')\n",
"infile.close()\n",
"outfile.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

139
k-mean_script.py Normal file
View File

@ -0,0 +1,139 @@
#!/usr/bin/env python
# coding: utf-8
# In[1]:
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import pandas as pd
# ## FUNKCJE
# In[2]:
def inertia_list(all_doc):
list_inter = []
K_max = int(len(all_doc)/2)
while K_max > 100:
K_max = int(K_max/2)
K = range(1,K_max)
for k in K:
FitMean = KMeans(n_clusters=k).fit(doc_vectors)
list_inter.append(FitMean.inertia_)
return list_inter
# In[3]:
def BestK(list_inter):
position = -10
for i in range(0, len(list_inter)-1):
if (int(list_inter[i]) == (int(list_inter[i+1]))):
position = i
if position == -10 :
position = len(list_inter)-1
return position
# ## PLIK DEV-0
# In[4]:
infile = open('dev-0/in.tsv', 'r', encoding="utf-8")
outfile = open("dev-0/out.tsv", "w")
# In[5]:
all_doc = infile.readlines()
# In[6]:
vectorizer = TfidfVectorizer()
doc_vectors = vectorizer.fit_transform(all_doc)
# In[7]:
list_inter = inertia_list(all_doc)
# In[8]:
position = BestK(list_inter)
# In[9]:
FitMean = KMeans(n_clusters=position).fit_predict(doc_vectors)
# In[10]:
for x in FitMean:
outfile.write(str(x) + '\n')
infile.close()
outfile.close()
# ## PLIK TEST-A
# In[11]:
infile = open('test-A/in.tsv', 'r', encoding="utf-8")
outfile = open("test-A/out.tsv", "w")
# In[12]:
all_doc = infile.readlines()
# In[13]:
vectorizer = TfidfVectorizer()
doc_vectors = vectorizer.fit_transform(all_doc)
# In[14]:
list_inter = inertia_list(all_doc)
# In[15]:
position = BestK(list_inter)
# In[16]:
FitMean = KMeans(n_clusters=position).fit_predict(doc_vectors)
# In[17]:
for x in FitMean:
outfile.write(str(x) + '\n')
infile.close()
outfile.close()

459
porba1.ipynb Normal file
View File

@ -0,0 +1,459 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: numpy in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (1.19.2)\n",
"Requirement already satisfied: seaborn in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (0.11.0)\n",
"Requirement already satisfied: scikit-learn in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (0.23.2)\n",
"Requirement already satisfied: matplotlib in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (3.3.2)\n",
"Requirement already satisfied: fasttext in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (0.9.2)\n",
"Requirement already satisfied: pandas>=0.23 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from seaborn) (1.1.3)\n",
"Requirement already satisfied: scipy>=1.0 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from seaborn) (1.5.2)\n",
"Requirement already satisfied: joblib>=0.11 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from scikit-learn) (0.17.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from scikit-learn) (2.1.0)\n",
"Requirement already satisfied: certifi>=2020.06.20 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from matplotlib) (2020.6.20)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from matplotlib) (8.0.1)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from matplotlib) (0.10.0)\n",
"Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from matplotlib) (2.8.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from matplotlib) (1.3.0)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from matplotlib) (2.4.7)\n",
"Requirement already satisfied: setuptools>=0.7.0 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from fasttext) (50.3.1.post20201107)\n",
"Requirement already satisfied: pybind11>=2.2 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from fasttext) (2.6.2)\n",
"Requirement already satisfied: pytz>=2017.2 in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from pandas>=0.23->seaborn) (2020.1)\n",
"Requirement already satisfied: six in c:\\users\\jedpc\\anaconda3\\lib\\site-packages (from cycler>=0.10->matplotlib) (1.15.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install numpy seaborn scikit-learn matplotlib fasttext"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import seaborn as sns\n",
"import copy\n",
"from scipy.cluster.hierarchy import dendrogram\n",
"from scipy.cluster import hierarchy\n",
"import matplotlib.pyplot as plt\n",
"from scipy.spatial import distance_matrix\n",
"import fasttext\n",
"import fasttext.util\n",
"from sklearn.feature_extraction.text import TfidfVectorizer"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"file = open(\"C:/Users/JedPC/Desktop/ISI/polish-urban-legends-public/dev-0/in.tsv\", encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"all_doc = []\n",
"for line in file:\n",
" all_doc.append(line)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"vectorizer = TfidfVectorizer()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"document_vectors = vectorizer.fit_transform(all_doc)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.5937322507759797"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.max(document_vectors)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ILOSC K"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"K = 40"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# OBLICZANIE ILOSCI ZMIENNYCH"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"ELEMENTS = document_vectors.shape[0]\n",
"SIZE = document_vectors.shape[1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ALGORYTM K SREDNICH"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def get_random_centroids():\n",
" CENTROIDS = np.zeros((K, SIZE))\n",
" for i in range(K):\n",
" for j in range(SIZE):\n",
" CENTROIDS[i,j] = np.random.uniform(0,2)\n",
" if CENTROIDS[i,j] > 1:\n",
" CENTROIDS[i,j] = 0\n",
" return CENTROIDS"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"CENTROIDS = get_random_centroids()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def assign_data_to_labels(document_vectors, CENTROIDS):\n",
" LABELS = []\n",
" for POINT in document_vectors:\n",
" DISTANCES = [np.linalg.norm(POINT - CEN) for CEN in CENTROIDS]\n",
" \n",
" LABELS.append(np.argmin(DISTANCES))\n",
" return np.array(LABELS)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"LABELS = assign_data_to_labels(document_vectors, CENTROIDS)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def get_new_centroids(document_vectors, LABELS, CENTROIDS):\n",
" NEW_CENTROIDS = np.zeros_like(CENTROIDS)\n",
" for centroid_label in range(K):\n",
" CENT_DATA = document_vectors[LABELS == centroid_label]\n",
" NEW_CENTROIDS[centroid_label] = np.mean(CENT_DATA) \n",
" return NEW_CENTROIDS"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\JedPC\\anaconda3\\lib\\site-packages\\scipy\\sparse\\base.py:581: RuntimeWarning: divide by zero encountered in true_divide\n",
" return self.astype(np.float_)._mul_scalar(1./other)\n"
]
}
],
"source": [
"NEW_CENTROIDS = get_new_centroids(document_vectors, LABELS, CENTROIDS)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"CENTROIDS = NEW_CENTROIDS"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0., 0., 0., ..., 0., 0., 0.])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"NEW_CENTROIDS[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# LITERACJE"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"NUMBER = 1000"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"for i in range(NUMBER):\n",
" LABELS = assign_data_to_labels(document_vectors, CENTROIDS)\n",
" CENTROIDS = get_new_centroids(document_vectors, LABELS, CENTROIDS)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"30\n",
"0\n",
"1\n",
"1\n",
"30\n",
"1\n",
"0\n",
"30\n",
"30\n",
"1\n",
"0\n",
"1\n",
"30\n",
"1\n",
"1\n",
"1\n",
"1\n",
"1\n",
"0\n",
"0\n",
"1\n",
"1\n",
"0\n",
"1\n",
"1\n",
"30\n",
"30\n",
"0\n",
"0\n",
"1\n",
"30\n",
"0\n",
"0\n",
"1\n",
"1\n",
"0\n",
"1\n",
"1\n",
"30\n",
"1\n",
"0\n",
"1\n",
"1\n",
"0\n",
"0\n",
"0\n",
"1\n",
"0\n",
"30\n",
"1\n",
"0\n",
"1\n",
"0\n",
"1\n",
"0\n",
"0\n",
"0\n",
"30\n",
"0\n",
"1\n",
"0\n",
"0\n",
"0\n",
"0\n",
"0\n",
"1\n",
"0\n",
"1\n",
"1\n",
"1\n",
"0\n",
"0\n",
"0\n",
"0\n",
"30\n",
"30\n",
"30\n",
"0\n",
"0\n",
"30\n",
"1\n",
"30\n",
"1\n",
"0\n",
"30\n",
"1\n",
"30\n"
]
}
],
"source": [
"LABELS.shape[0]\n",
"for i in range(LABELS.shape[0]):\n",
" print(LABELS[i])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ???"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([30, 0, 1, 1, 30, 1, 0, 30, 30, 1, 0, 1, 30, 1, 1, 1, 1,\n",
" 1, 0, 0, 1, 1, 0, 1, 1, 30, 30, 0, 0, 1, 30, 0, 0, 1,\n",
" 1, 0, 1, 1, 30, 1, 0, 1, 1, 0, 0, 0, 1, 0, 30, 1, 0,\n",
" 1, 0, 1, 0, 0, 0, 30, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,\n",
" 1, 1, 0, 0, 0, 0, 30, 30, 30, 0, 0, 30, 1, 30, 1, 0, 30,\n",
" 1, 30], dtype=int64)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"LABELS"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

691
test-A/out.tsv Normal file
View File

@ -0,0 +1,691 @@
2
36
4
17
25
12
39
16
11
36
46
30
68
11
1
62
34
50
34
1
44
8
12
53
37
38
61
73
16
56
5
72
14
62
60
66
30
62
4
72
43
66
34
42
65
75
17
3
4
1
4
52
62
53
28
22
67
4
56
39
66
0
61
25
1
31
53
14
1
11
5
60
34
62
41
62
39
69
62
39
45
6
24
67
25
55
62
69
11
43
10
5
64
25
0
29
2
40
70
5
60
11
34
63
56
17
39
39
50
66
9
4
11
4
27
25
36
66
11
17
13
58
66
17
62
4
0
22
51
62
5
64
15
22
75
60
18
39
24
19
63
45
13
59
54
62
66
59
33
22
34
34
62
1
10
55
70
48
49
8
40
52
35
3
5
25
24
40
60
64
66
28
27
55
34
24
17
46
62
52
71
3
66
34
54
54
58
57
16
28
7
54
30
17
68
60
62
4
62
7
39
27
7
77
29
0
4
3
36
7
5
30
1
62
3
30
1
12
32
11
1
62
5
45
24
60
40
50
24
30
62
73
22
3
14
11
64
53
27
62
25
11
31
75
76
32
44
44
75
3
39
47
6
68
27
24
9
62
40
13
22
9
70
39
18
73
17
50
27
1
14
62
28
38
14
62
77
9
13
49
42
44
41
11
68
21
13
4
34
12
50
17
39
11
65
44
0
21
34
75
26
50
66
28
62
46
14
62
36
14
36
28
49
72
24
47
28
13
3
62
66
24
8
36
43
43
36
41
76
77
62
50
33
22
74
4
39
49
1
5
66
33
68
1
10
13
63
55
17
76
39
5
5
16
29
50
44
34
63
0
1
55
11
38
44
39
4
72
3
40
36
62
30
73
5
28
22
0
10
50
1
11
45
39
2
9
8
30
63
14
76
31
61
4
1
25
7
25
70
62
77
62
22
48
10
1
28
16
33
68
43
21
16
39
22
28
3
28
59
70
39
43
34
12
40
27
4
62
45
1
34
53
3
16
71
33
62
42
53
12
32
4
43
48
34
52
49
71
16
26
11
28
3
75
23
62
11
3
43
20
76
17
29
52
72
0
4
62
57
43
43
2
43
7
34
26
27
66
62
32
40
24
43
39
11
23
74
44
27
36
63
4
3
11
51
50
9
59
25
13
67
4
50
66
35
49
8
39
9
73
12
40
5
66
27
55
52
34
66
3
42
11
40
50
39
62
66
24
9
72
71
50
43
4
54
8
45
1
28
17
4
19
25
23
48
14
38
72
39
68
49
43
27
75
64
6
39
7
64
31
12
4
41
16
39
39
22
56
16
64
4
4
44
36
49
55
9
63
55
67
22
39
50
14
27
50
6
22
13
44
52
39
73
54
11
66
7
33
33
57
4
1
28
1
22
41
62
48
23
26
30
72
2
63
30
50
13
3
66
36
77
8
28
4
15
57
77
44
0
77
55
3
76
37
1
39
24
10
32
76
1
7
28
22
16
40
44
10
73
66
28
75
6
55
11
62
34
27
55
10
41
12
50
66
17
72
32
44
70
54
9
29
37
40
7
24
62
28
11
12
32
64
39
40
9
4
1
63
7
16
40
1 2
2 36
3 4
4 17
5 25
6 12
7 39
8 16
9 11
10 36
11 46
12 30
13 68
14 11
15 1
16 62
17 34
18 50
19 34
20 1
21 44
22 8
23 12
24 53
25 37
26 38
27 61
28 73
29 16
30 56
31 5
32 72
33 14
34 62
35 60
36 66
37 30
38 62
39 4
40 72
41 43
42 66
43 34
44 42
45 65
46 75
47 17
48 3
49 4
50 1
51 4
52 52
53 62
54 53
55 28
56 22
57 67
58 4
59 56
60 39
61 66
62 0
63 61
64 25
65 1
66 31
67 53
68 14
69 1
70 11
71 5
72 60
73 34
74 62
75 41
76 62
77 39
78 69
79 62
80 39
81 45
82 6
83 24
84 67
85 25
86 55
87 62
88 69
89 11
90 43
91 10
92 5
93 64
94 25
95 0
96 29
97 2
98 40
99 70
100 5
101 60
102 11
103 34
104 63
105 56
106 17
107 39
108 39
109 50
110 66
111 9
112 4
113 11
114 4
115 27
116 25
117 36
118 66
119 11
120 17
121 13
122 58
123 66
124 17
125 62
126 4
127 0
128 22
129 51
130 62
131 5
132 64
133 15
134 22
135 75
136 60
137 18
138 39
139 24
140 19
141 63
142 45
143 13
144 59
145 54
146 62
147 66
148 59
149 33
150 22
151 34
152 34
153 62
154 1
155 10
156 55
157 70
158 48
159 49
160 8
161 40
162 52
163 35
164 3
165 5
166 25
167 24
168 40
169 60
170 64
171 66
172 28
173 27
174 55
175 34
176 24
177 17
178 46
179 62
180 52
181 71
182 3
183 66
184 34
185 54
186 54
187 58
188 57
189 16
190 28
191 7
192 54
193 30
194 17
195 68
196 60
197 62
198 4
199 62
200 7
201 39
202 27
203 7
204 77
205 29
206 0
207 4
208 3
209 36
210 7
211 5
212 30
213 1
214 62
215 3
216 30
217 1
218 12
219 32
220 11
221 1
222 62
223 5
224 45
225 24
226 60
227 40
228 50
229 24
230 30
231 62
232 73
233 22
234 3
235 14
236 11
237 64
238 53
239 27
240 62
241 25
242 11
243 31
244 75
245 76
246 32
247 44
248 44
249 75
250 3
251 39
252 47
253 6
254 68
255 27
256 24
257 9
258 62
259 40
260 13
261 22
262 9
263 70
264 39
265 18
266 73
267 17
268 50
269 27
270 1
271 14
272 62
273 28
274 38
275 14
276 62
277 77
278 9
279 13
280 49
281 42
282 44
283 41
284 11
285 68
286 21
287 13
288 4
289 34
290 12
291 50
292 17
293 39
294 11
295 65
296 44
297 0
298 21
299 34
300 75
301 26
302 50
303 66
304 28
305 62
306 46
307 14
308 62
309 36
310 14
311 36
312 28
313 49
314 72
315 24
316 47
317 28
318 13
319 3
320 62
321 66
322 24
323 8
324 36
325 43
326 43
327 36
328 41
329 76
330 77
331 62
332 50
333 33
334 22
335 74
336 4
337 39
338 49
339 1
340 5
341 66
342 33
343 68
344 1
345 10
346 13
347 63
348 55
349 17
350 76
351 39
352 5
353 5
354 16
355 29
356 50
357 44
358 34
359 63
360 0
361 1
362 55
363 11
364 38
365 44
366 39
367 4
368 72
369 3
370 40
371 36
372 62
373 30
374 73
375 5
376 28
377 22
378 0
379 10
380 50
381 1
382 11
383 45
384 39
385 2
386 9
387 8
388 30
389 63
390 14
391 76
392 31
393 61
394 4
395 1
396 25
397 7
398 25
399 70
400 62
401 77
402 62
403 22
404 48
405 10
406 1
407 28
408 16
409 33
410 68
411 43
412 21
413 16
414 39
415 22
416 28
417 3
418 28
419 59
420 70
421 39
422 43
423 34
424 12
425 40
426 27
427 4
428 62
429 45
430 1
431 34
432 53
433 3
434 16
435 71
436 33
437 62
438 42
439 53
440 12
441 32
442 4
443 43
444 48
445 34
446 52
447 49
448 71
449 16
450 26
451 11
452 28
453 3
454 75
455 23
456 62
457 11
458 3
459 43
460 20
461 76
462 17
463 29
464 52
465 72
466 0
467 4
468 62
469 57
470 43
471 43
472 2
473 43
474 7
475 34
476 26
477 27
478 66
479 62
480 32
481 40
482 24
483 43
484 39
485 11
486 23
487 74
488 44
489 27
490 36
491 63
492 4
493 3
494 11
495 51
496 50
497 9
498 59
499 25
500 13
501 67
502 4
503 50
504 66
505 35
506 49
507 8
508 39
509 9
510 73
511 12
512 40
513 5
514 66
515 27
516 55
517 52
518 34
519 66
520 3
521 42
522 11
523 40
524 50
525 39
526 62
527 66
528 24
529 9
530 72
531 71
532 50
533 43
534 4
535 54
536 8
537 45
538 1
539 28
540 17
541 4
542 19
543 25
544 23
545 48
546 14
547 38
548 72
549 39
550 68
551 49
552 43
553 27
554 75
555 64
556 6
557 39
558 7
559 64
560 31
561 12
562 4
563 41
564 16
565 39
566 39
567 22
568 56
569 16
570 64
571 4
572 4
573 44
574 36
575 49
576 55
577 9
578 63
579 55
580 67
581 22
582 39
583 50
584 14
585 27
586 50
587 6
588 22
589 13
590 44
591 52
592 39
593 73
594 54
595 11
596 66
597 7
598 33
599 33
600 57
601 4
602 1
603 28
604 1
605 22
606 41
607 62
608 48
609 23
610 26
611 30
612 72
613 2
614 63
615 30
616 50
617 13
618 3
619 66
620 36
621 77
622 8
623 28
624 4
625 15
626 57
627 77
628 44
629 0
630 77
631 55
632 3
633 76
634 37
635 1
636 39
637 24
638 10
639 32
640 76
641 1
642 7
643 28
644 22
645 16
646 40
647 44
648 10
649 73
650 66
651 28
652 75
653 6
654 55
655 11
656 62
657 34
658 27
659 55
660 10
661 41
662 12
663 50
664 66
665 17
666 72
667 32
668 44
669 70
670 54
671 9
672 29
673 37
674 40
675 7
676 24
677 62
678 28
679 11
680 12
681 32
682 64
683 39
684 40
685 9
686 4
687 1
688 63
689 7
690 16
691 40