This commit is contained in:
Sebastian 2022-05-18 00:59:01 +02:00
parent 3ade31cf7d
commit 75174effea
3 changed files with 91 additions and 65 deletions

View File

@ -44,7 +44,7 @@
"source": [ "source": [
"train = pd.read_csv('train/train.tsv', header=None, sep='\\t', error_bad_lines=False)\n", "train = pd.read_csv('train/train.tsv', header=None, sep='\\t', error_bad_lines=False)\n",
"print(len(train))\n", "print(len(train))\n",
"train = train.head(10000)" "train = train[:10000]"
] ]
}, },
{ {
@ -60,7 +60,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"id": "dd454ce5-a06e-4fbd-a546-83fb94ad0390", "id": "dd454ce5-a06e-4fbd-a546-83fb94ad0390",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -74,7 +74,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 4,
"id": "0a1cce75-86a1-4f76-9416-e876e01699e3", "id": "0a1cce75-86a1-4f76-9416-e876e01699e3",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -85,7 +85,7 @@
" ('linearregression', LinearRegression())])" " ('linearregression', LinearRegression())])"
] ]
}, },
"execution_count": 5, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -97,7 +97,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"id": "cc1270d5-29dc-4f03-82c1-dc03f3e4fa00", "id": "cc1270d5-29dc-4f03-82c1-dc03f3e4fa00",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -114,25 +114,47 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 5,
"id": "2fd18dfa-0dba-460b-a56d-21793baa7124",
"metadata": {},
"outputs": [],
"source": [
"def readFile(filename):\n",
" result = []\n",
" with open(filename, 'r', encoding=\"utf-8\") as file:\n",
" for line in file:\n",
" text = line.split(\"\\t\")[0].strip()\n",
" result.append(text)\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ce918d1f-2b8d-432c-be19-3a4966062d35",
"metadata": {},
"outputs": [],
"source": [
"x_dev = readFile('dev-0/in.tsv')\n",
"dev_predicted = model.predict(x_dev)\n",
"with open('dev-0/out.tsv', 'wt') as f:\n",
" for i in dev_predicted:\n",
" f.write(str(i)+'\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "223de995-5e91-4254-9214-4fc871c985e9", "id": "223de995-5e91-4254-9214-4fc871c985e9",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"4086.3369441409172\n"
]
}
],
"source": [ "source": [
"print(mean_squared_error(dev_out, dev_expected))" "print(mean_squared_error(dev_out, dev_expected))"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 7,
"id": "3bc8418b-64f1-4163-a0ec-8e3293032341", "id": "3bc8418b-64f1-4163-a0ec-8e3293032341",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -152,19 +174,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": null,
"id": "a18aea56-7fa1-40bd-8aa3-bbaf9d66d6b7", "id": "a18aea56-7fa1-40bd-8aa3-bbaf9d66d6b7",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook run.ipynb to script\n",
"[NbConvertApp] Writing 1607 bytes to run.py\n"
]
}
],
"source": [ "source": [
"!jupyter nbconvert --to script run.ipynb" "!jupyter nbconvert --to script run.ipynb"
] ]

View File

@ -2399,7 +2399,7 @@
1882.922191542162 1882.922191542162
2006.349306172171 2006.349306172171
1901.7102363782105 1901.7102363782105
1845.5158880914453 1845.9729647936565
1907.098520568831 1907.098520568831
1918.9780895683843 1918.9780895683843
1808.2574401126217 1808.2574401126217
@ -4770,7 +4770,7 @@
1869.069137844269 1869.069137844269
1906.274740877195 1906.274740877195
1945.2161014273179 1945.2161014273179
1897.764739918972 1897.6394497272986
1902.242330620895 1902.242330620895
1916.8264849806906 1916.8264849806906
1951.121108306926 1951.121108306926
@ -6336,7 +6336,8 @@
1974.1360045857598 1974.1360045857598
1851.0358973964571 1851.0358973964571
1837.1785138354128 1837.1785138354128
1920.829667692275 1906.347666040038
1941.0011085451317
1862.9050427724196 1862.9050427724196
1935.236110299957 1935.236110299957
1838.7213811070148 1838.7213811070148
@ -11780,7 +11781,7 @@
1971.0497213398978 1971.0497213398978
1965.6443681542146 1965.6443681542146
1893.8995265026135 1893.8995265026135
1843.5424552030524 1843.369474418135
1798.0577429814193 1798.0577429814193
1874.3107569198335 1874.3107569198335
1933.8681512671249 1933.8681512671249
@ -11868,7 +11869,7 @@
1933.2279243717342 1933.2279243717342
1948.4195648206846 1948.4195648206846
1891.657975325964 1891.657975325964
1881.6018296953764 1882.2766120936951
1898.8213099251864 1898.8213099251864
1891.724097045834 1891.724097045834
1898.9588820821 1898.9588820821
@ -13607,7 +13608,8 @@
1898.9944816373588 1898.9944816373588
1831.7070096790642 1831.7070096790642
1830.5322687720245 1830.5322687720245
1859.0850481271784 1872.3532137129366
1886.4495007457836
1821.4668780375355 1821.4668780375355
1912.9269712307623 1912.9269712307623
1984.9697444709716 1984.9697444709716
@ -14150,7 +14152,7 @@
1958.8896600656121 1958.8896600656121
1917.5200385639437 1917.5200385639437
1904.7837664328952 1904.7837664328952
1871.5148054544618 1871.1791610952482
1900.9087322958958 1900.9087322958958
1871.5459519347532 1871.5459519347532
1965.7232636496624 1965.7232636496624
@ -17368,7 +17370,7 @@
1863.8359491028696 1863.8359491028696
1980.291829241186 1980.291829241186
1819.6648275839043 1819.6648275839043
1856.061958173075 1855.3731584258308
1921.5280792457972 1921.5280792457972
1970.6462880262288 1970.6462880262288
1902.5179505003136 1902.5179505003136
@ -19795,7 +19797,7 @@
1917.2137752775773 1917.2137752775773
1850.5061783561657 1850.5061783561657
1911.974999970517 1911.974999970517
1837.7085426751216 1837.5419178319084
1970.1179076824587 1970.1179076824587
1955.9219153909546 1955.9219153909546
1941.3082376506911 1941.3082376506911
@ -19996,5 +19998,3 @@
1998.999096707664 1998.999096707664
1906.1529351577549 1906.1529351577549
1982.0734958856071 1982.0734958856071
1972.9762321594746
1976.0692324960928

1 1839.7731901418288
2399 1882.922191542162
2400 2006.349306172171
2401 1901.7102363782105
2402 1845.5158880914453 1845.9729647936565
2403 1907.098520568831
2404 1918.9780895683843
2405 1808.2574401126217
4770 1869.069137844269
4771 1906.274740877195
4772 1945.2161014273179
4773 1897.764739918972 1897.6394497272986
4774 1902.242330620895
4775 1916.8264849806906
4776 1951.121108306926
6336 1974.1360045857598
6337 1851.0358973964571
6338 1837.1785138354128
6339 1920.829667692275 1906.347666040038
6340 1941.0011085451317
6341 1862.9050427724196
6342 1935.236110299957
6343 1838.7213811070148
11781 1971.0497213398978
11782 1965.6443681542146
11783 1893.8995265026135
11784 1843.5424552030524 1843.369474418135
11785 1798.0577429814193
11786 1874.3107569198335
11787 1933.8681512671249
11869 1933.2279243717342
11870 1948.4195648206846
11871 1891.657975325964
11872 1881.6018296953764 1882.2766120936951
11873 1898.8213099251864
11874 1891.724097045834
11875 1898.9588820821
13608 1898.9944816373588
13609 1831.7070096790642
13610 1830.5322687720245
13611 1859.0850481271784 1872.3532137129366
13612 1886.4495007457836
13613 1821.4668780375355
13614 1912.9269712307623
13615 1984.9697444709716
14152 1958.8896600656121
14153 1917.5200385639437
14154 1904.7837664328952
14155 1871.5148054544618 1871.1791610952482
14156 1900.9087322958958
14157 1871.5459519347532
14158 1965.7232636496624
17370 1863.8359491028696
17371 1980.291829241186
17372 1819.6648275839043
17373 1856.061958173075 1855.3731584258308
17374 1921.5280792457972
17375 1970.6462880262288
17376 1902.5179505003136
19797 1917.2137752775773
19798 1850.5061783561657
19799 1911.974999970517
19800 1837.7085426751216 1837.5419178319084
19801 1970.1179076824587
19802 1955.9219153909546
19803 1941.3082376506911
19998 1998.999096707664
19999 1906.1529351577549
20000 1982.0734958856071
1972.9762321594746
1976.0692324960928

View File

@ -44,7 +44,7 @@
"source": [ "source": [
"train = pd.read_csv('train/train.tsv', header=None, sep='\\t', error_bad_lines=False)\n", "train = pd.read_csv('train/train.tsv', header=None, sep='\\t', error_bad_lines=False)\n",
"print(len(train))\n", "print(len(train))\n",
"train = train.head(10000)" "train = train[:10000]"
] ]
}, },
{ {
@ -60,7 +60,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"id": "dd454ce5-a06e-4fbd-a546-83fb94ad0390", "id": "dd454ce5-a06e-4fbd-a546-83fb94ad0390",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -74,7 +74,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 4,
"id": "0a1cce75-86a1-4f76-9416-e876e01699e3", "id": "0a1cce75-86a1-4f76-9416-e876e01699e3",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -85,7 +85,7 @@
" ('linearregression', LinearRegression())])" " ('linearregression', LinearRegression())])"
] ]
}, },
"execution_count": 5, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -97,7 +97,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"id": "cc1270d5-29dc-4f03-82c1-dc03f3e4fa00", "id": "cc1270d5-29dc-4f03-82c1-dc03f3e4fa00",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -114,25 +114,47 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 5,
"id": "2fd18dfa-0dba-460b-a56d-21793baa7124",
"metadata": {},
"outputs": [],
"source": [
"def readFile(filename):\n",
" result = []\n",
" with open(filename, 'r', encoding=\"utf-8\") as file:\n",
" for line in file:\n",
" text = line.split(\"\\t\")[0].strip()\n",
" result.append(text)\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ce918d1f-2b8d-432c-be19-3a4966062d35",
"metadata": {},
"outputs": [],
"source": [
"x_dev = readFile('dev-0/in.tsv')\n",
"dev_predicted = model.predict(x_dev)\n",
"with open('dev-0/out.tsv', 'wt') as f:\n",
" for i in dev_predicted:\n",
" f.write(str(i)+'\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "223de995-5e91-4254-9214-4fc871c985e9", "id": "223de995-5e91-4254-9214-4fc871c985e9",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"4086.3369441409172\n"
]
}
],
"source": [ "source": [
"print(mean_squared_error(dev_out, dev_expected))" "print(mean_squared_error(dev_out, dev_expected))"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 7,
"id": "3bc8418b-64f1-4163-a0ec-8e3293032341", "id": "3bc8418b-64f1-4163-a0ec-8e3293032341",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -152,19 +174,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": null,
"id": "a18aea56-7fa1-40bd-8aa3-bbaf9d66d6b7", "id": "a18aea56-7fa1-40bd-8aa3-bbaf9d66d6b7",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook run.ipynb to script\n",
"[NbConvertApp] Writing 1607 bytes to run.py\n"
]
}
],
"source": [ "source": [
"!jupyter nbconvert --to script run.ipynb" "!jupyter nbconvert --to script run.ipynb"
] ]