This commit is contained in:
Jakub Pokrywka 2022-05-16 10:57:31 +02:00
parent 9612baee51
commit a31ef88426
1 changed files with 39 additions and 47 deletions

View File

@ -30,17 +30,9 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mkdir: cannot create directory dev-0-ireland-news: File exists\r\n"
]
}
],
"outputs": [],
"source": [
"!mkdir dev-0-ireland-news"
]
@ -68,86 +60,86 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2022-05-13 13:23:05-- https://github.com/kubapok/ireland-news-word-gap/raw/11c72875023c5c01c9d0c0ca39d72c90c840aeb3/dev-0/out.tsv\n",
"Resolving github.com (github.com)... 140.82.121.4\n",
"Connecting to github.com (github.com)|140.82.121.4|:443... connected.\n",
"--2022-05-16 10:53:35-- https://github.com/kubapok/ireland-news-word-gap/raw/11c72875023c5c01c9d0c0ca39d72c90c840aeb3/dev-0/out.tsv\n",
"Resolving github.com (github.com)... 140.82.121.3\n",
"Connecting to github.com (github.com)|140.82.121.3|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://raw.githubusercontent.com/kubapok/ireland-news-word-gap/11c72875023c5c01c9d0c0ca39d72c90c840aeb3/dev-0/out.tsv [following]\n",
"--2022-05-13 13:23:06-- https://raw.githubusercontent.com/kubapok/ireland-news-word-gap/11c72875023c5c01c9d0c0ca39d72c90c840aeb3/dev-0/out.tsv\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
"--2022-05-16 10:53:35-- https://raw.githubusercontent.com/kubapok/ireland-news-word-gap/11c72875023c5c01c9d0c0ca39d72c90c840aeb3/dev-0/out.tsv\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 63249692 (60M) [text/plain]\n",
"Saving to: out.tsv\n",
"\n",
"out.tsv 100%[===================>] 60,32M 26,7MB/s in 2,3s \n",
"out.tsv 100%[===================>] 60,32M 27,0MB/s in 2,2s \n",
"\n",
"2022-05-13 13:23:08 (26,7 MB/s) - out.tsv saved [63249692/63249692]\n",
"2022-05-16 10:53:37 (27,0 MB/s) - out.tsv saved [63249692/63249692]\n",
"\n",
"--2022-05-13 13:23:09-- https://github.com/kubapok/ireland-news-word-gap/raw/0c6557c8a3cd6d8c77f64618850b2ae82c19476a/dev-0/out.tsv\n",
"Resolving github.com (github.com)... 140.82.121.4\n",
"Connecting to github.com (github.com)|140.82.121.4|:443... connected.\n",
"--2022-05-16 10:53:38-- https://github.com/kubapok/ireland-news-word-gap/raw/0c6557c8a3cd6d8c77f64618850b2ae82c19476a/dev-0/out.tsv\n",
"Resolving github.com (github.com)... 140.82.121.3\n",
"Connecting to github.com (github.com)|140.82.121.3|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://raw.githubusercontent.com/kubapok/ireland-news-word-gap/0c6557c8a3cd6d8c77f64618850b2ae82c19476a/dev-0/out.tsv [following]\n",
"--2022-05-13 13:23:09-- https://raw.githubusercontent.com/kubapok/ireland-news-word-gap/0c6557c8a3cd6d8c77f64618850b2ae82c19476a/dev-0/out.tsv\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
"--2022-05-16 10:53:38-- https://raw.githubusercontent.com/kubapok/ireland-news-word-gap/0c6557c8a3cd6d8c77f64618850b2ae82c19476a/dev-0/out.tsv\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 63271863 (60M) [text/plain]\n",
"Saving to: out.tsv\n",
"\n",
"out.tsv 100%[===================>] 60,34M 45,1MB/s in 1,3s \n",
"out.tsv 100%[===================>] 60,34M 25,6MB/s in 2,4s \n",
"\n",
"2022-05-13 13:23:10 (45,1 MB/s) - out.tsv saved [63271863/63271863]\n",
"2022-05-16 10:53:40 (25,6 MB/s) - out.tsv saved [63271863/63271863]\n",
"\n",
"--2022-05-13 13:23:11-- https://git.wmi.amu.edu.pl/kubapok/ireland-news-word-gap-prediction/raw/branch/master/dev-0/expected.tsv\n",
"--2022-05-16 10:53:41-- https://git.wmi.amu.edu.pl/kubapok/ireland-news-word-gap-prediction/raw/branch/master/dev-0/expected.tsv\n",
"Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
"Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 866583 (846K) [text/plain]\n",
"Saving to: expected.tsv.1\n",
"Saving to: expected.tsv\n",
"\n",
"expected.tsv.1 100%[===================>] 846,27K 1,91MB/s in 0,4s \n",
"expected.tsv 100%[===================>] 846,27K --.-KB/s in 0,08s \n",
"\n",
"2022-05-13 13:23:11 (1,91 MB/s) - expected.tsv.1 saved [866583/866583]\n",
"2022-05-16 10:53:41 (10,9 MB/s) - expected.tsv saved [866583/866583]\n",
"\n"
]
}
],
"source": [
"!wget https://github.com/kubapok/ireland-news-word-gap/raw/11c72875023c5c01c9d0c0ca39d72c90c840aeb3/dev-0/out.tsv\n",
"!mv out.tsv ./dev-0/out-solution1.tsv\n",
"!mv out.tsv ./dev-0-ireland-news/out-solution1.tsv\n",
"!wget https://github.com/kubapok/ireland-news-word-gap/raw/0c6557c8a3cd6d8c77f64618850b2ae82c19476a/dev-0/out.tsv\n",
"!mv out.tsv ./dev-0/out-solution2.tsv\n",
"! ( cd dev-0 ; wget https://git.wmi.amu.edu.pl/kubapok/ireland-news-word-gap-prediction/raw/branch/master/dev-0/expected.tsv)"
"!mv out.tsv ./dev-0-ireland-news/out-solution2.tsv\n",
"! ( cd dev-0-ireland-news ; wget https://git.wmi.amu.edu.pl/kubapok/ireland-news-word-gap-prediction/raw/branch/master/dev-0/expected.tsv)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2022-05-13 13:23:12-- https://gonito.net/get/bin/geval\n",
"--2022-05-16 10:53:41-- https://gonito.net/get/bin/geval\n",
"Resolving gonito.net (gonito.net)... 150.254.78.126\n",
"Connecting to gonito.net (gonito.net)|150.254.78.126|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 12860136 (12M) [application/octet-stream]\n",
"Saving to: geval.1\n",
"Saving to: geval.2\n",
"\n",
"geval.1 100%[===================>] 12,26M 2,67MB/s in 4,1s \n",
"geval.2 100%[===================>] 12,26M 6,24MB/s in 2,0s \n",
"\n",
"2022-05-13 13:23:16 (2,97 MB/s) - geval.1 saved [12860136/12860136]\n",
"2022-05-16 10:53:43 (6,24 MB/s) - geval.2 saved [12860136/12860136]\n",
"\n"
]
}
@ -159,7 +151,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -171,12 +163,12 @@
}
],
"source": [
"!./geval --metric PerplexityHashed -o ./dev-0/out-solution1.tsv -e dev-0/expected.tsv"
"!./geval --metric PerplexityHashed -o ./dev-0-ireland-news/out-solution1.tsv -e dev-0-ireland-news/expected.tsv"
]
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@ -188,16 +180,16 @@
}
],
"source": [
"!./geval --metric PerplexityHashed -o ./dev-0/out-solution2.tsv -e dev-0/expected.tsv"
"!./geval --metric PerplexityHashed -o ./dev-0-ireland-news/out-solution2.tsv -e dev-0-ireland-news/expected.tsv"
]
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"with open('./dev-0/out-solution1.tsv') as s1, open('./dev-0/out-solution2.tsv') as s2, open('./dev-0/out-merge.tsv','w') as f_merge:\n",
"with open('./dev-0-ireland-news/out-solution1.tsv') as s1, open('./dev-0-ireland-news/out-solution2.tsv') as s2, open('./dev-0-ireland-news/out-merge.tsv','w') as f_merge:\n",
" for l1, l2 in zip(s1, s2):\n",
" dir1 = {''.join(x.split(':')[:-1]): float(x.split(':')[-1]) for x in l1.rstrip().split(' ')}\n",
" dir2 = {''.join(x.split(':')[:-1]): float(x.split(':')[-1]) for x in l2.rstrip().split(' ')}\n",
@ -212,7 +204,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@ -224,7 +216,7 @@
}
],
"source": [
"!./geval --metric PerplexityHashed -o ./dev-0/out-merge.tsv -e dev-0/expected.tsv"
"!./geval --metric PerplexityHashed -o ./dev-0-ireland-news/out-merge.tsv -e dev-0-ireland-news/expected.tsv"
]
},
{