diff --git a/.ipynb_checkpoints/run-checkpoint.ipynb b/.ipynb_checkpoints/run-checkpoint.ipynb index 795207d..3bb4dfe 100644 --- a/.ipynb_checkpoints/run-checkpoint.ipynb +++ b/.ipynb_checkpoints/run-checkpoint.ipynb @@ -44,7 +44,7 @@ "source": [ "train = pd.read_csv('train/train.tsv', header=None, sep='\\t', error_bad_lines=False)\n", "print(len(train))\n", - "train = train.head(10000)" + "train = train[:10000]" ] }, { @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "dd454ce5-a06e-4fbd-a546-83fb94ad0390", "metadata": {}, "outputs": [], @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "0a1cce75-86a1-4f76-9416-e876e01699e3", "metadata": {}, "outputs": [ @@ -85,7 +85,7 @@ " ('linearregression', LinearRegression())])" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -97,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "cc1270d5-29dc-4f03-82c1-dc03f3e4fa00", "metadata": {}, "outputs": [], @@ -114,25 +114,47 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, + "id": "2fd18dfa-0dba-460b-a56d-21793baa7124", + "metadata": {}, + "outputs": [], + "source": [ + "def readFile(filename):\n", + " result = []\n", + " with open(filename, 'r', encoding=\"utf-8\") as file:\n", + " for line in file:\n", + " text = line.split(\"\\t\")[0].strip()\n", + " result.append(text)\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ce918d1f-2b8d-432c-be19-3a4966062d35", + "metadata": {}, + "outputs": [], + "source": [ + "x_dev = readFile('dev-0/in.tsv')\n", + "dev_predicted = model.predict(x_dev)\n", + "with open('dev-0/out.tsv', 'wt') as f:\n", + " for i in dev_predicted:\n", + " f.write(str(i)+'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "223de995-5e91-4254-9214-4fc871c985e9", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4086.3369441409172\n" - ] - } - ], + "outputs": [], "source": [ "print(mean_squared_error(dev_out, dev_expected))" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "3bc8418b-64f1-4163-a0ec-8e3293032341", "metadata": {}, "outputs": [], @@ -152,19 +174,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "a18aea56-7fa1-40bd-8aa3-bbaf9d66d6b7", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NbConvertApp] Converting notebook run.ipynb to script\n", - "[NbConvertApp] Writing 1607 bytes to run.py\n" - ] - } - ], + "outputs": [], "source": [ "!jupyter nbconvert --to script run.ipynb" ] diff --git a/dev-0/out.tsv b/dev-0/out.tsv index 27186ae..70d138b 100644 --- a/dev-0/out.tsv +++ b/dev-0/out.tsv @@ -2399,7 +2399,7 @@ 1882.922191542162 2006.349306172171 1901.7102363782105 -1845.5158880914453 +1845.9729647936565 1907.098520568831 1918.9780895683843 1808.2574401126217 @@ -4770,7 +4770,7 @@ 1869.069137844269 1906.274740877195 1945.2161014273179 -1897.764739918972 +1897.6394497272986 1902.242330620895 1916.8264849806906 1951.121108306926 @@ -6336,7 +6336,8 @@ 1974.1360045857598 1851.0358973964571 1837.1785138354128 -1920.829667692275 +1906.347666040038 +1941.0011085451317 1862.9050427724196 1935.236110299957 1838.7213811070148 @@ -11780,7 +11781,7 @@ 1971.0497213398978 1965.6443681542146 1893.8995265026135 -1843.5424552030524 +1843.369474418135 1798.0577429814193 1874.3107569198335 1933.8681512671249 @@ -11868,7 +11869,7 @@ 1933.2279243717342 1948.4195648206846 1891.657975325964 -1881.6018296953764 +1882.2766120936951 1898.8213099251864 1891.724097045834 1898.9588820821 @@ -13607,7 +13608,8 @@ 1898.9944816373588 1831.7070096790642 1830.5322687720245 -1859.0850481271784 +1872.3532137129366 +1886.4495007457836 1821.4668780375355 1912.9269712307623 1984.9697444709716 @@ -14150,7 +14152,7 @@ 1958.8896600656121 1917.5200385639437 1904.7837664328952 -1871.5148054544618 +1871.1791610952482 1900.9087322958958 1871.5459519347532 1965.7232636496624 @@ -17368,7 +17370,7 @@ 1863.8359491028696 1980.291829241186 1819.6648275839043 -1856.061958173075 +1855.3731584258308 1921.5280792457972 1970.6462880262288 1902.5179505003136 @@ -19795,7 +19797,7 @@ 1917.2137752775773 1850.5061783561657 1911.974999970517 -1837.7085426751216 +1837.5419178319084 1970.1179076824587 1955.9219153909546 1941.3082376506911 @@ -19996,5 +19998,3 @@ 1998.999096707664 1906.1529351577549 1982.0734958856071 -1972.9762321594746 -1976.0692324960928 diff --git a/run.ipynb b/run.ipynb index 795207d..3bb4dfe 100644 --- a/run.ipynb +++ b/run.ipynb @@ -44,7 +44,7 @@ "source": [ "train = pd.read_csv('train/train.tsv', header=None, sep='\\t', error_bad_lines=False)\n", "print(len(train))\n", - "train = train.head(10000)" + "train = train[:10000]" ] }, { @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "dd454ce5-a06e-4fbd-a546-83fb94ad0390", "metadata": {}, "outputs": [], @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "0a1cce75-86a1-4f76-9416-e876e01699e3", "metadata": {}, "outputs": [ @@ -85,7 +85,7 @@ " ('linearregression', LinearRegression())])" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -97,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "cc1270d5-29dc-4f03-82c1-dc03f3e4fa00", "metadata": {}, "outputs": [], @@ -114,25 +114,47 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, + "id": "2fd18dfa-0dba-460b-a56d-21793baa7124", + "metadata": {}, + "outputs": [], + "source": [ + "def readFile(filename):\n", + " result = []\n", + " with open(filename, 'r', encoding=\"utf-8\") as file:\n", + " for line in file:\n", + " text = line.split(\"\\t\")[0].strip()\n", + " result.append(text)\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ce918d1f-2b8d-432c-be19-3a4966062d35", + "metadata": {}, + "outputs": [], + "source": [ + "x_dev = readFile('dev-0/in.tsv')\n", + "dev_predicted = model.predict(x_dev)\n", + "with open('dev-0/out.tsv', 'wt') as f:\n", + " for i in dev_predicted:\n", + " f.write(str(i)+'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "223de995-5e91-4254-9214-4fc871c985e9", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4086.3369441409172\n" - ] - } - ], + "outputs": [], "source": [ "print(mean_squared_error(dev_out, dev_expected))" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "3bc8418b-64f1-4163-a0ec-8e3293032341", "metadata": {}, "outputs": [], @@ -152,19 +174,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "a18aea56-7fa1-40bd-8aa3-bbaf9d66d6b7", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NbConvertApp] Converting notebook run.ipynb to script\n", - "[NbConvertApp] Writing 1607 bytes to run.py\n" - ] - } - ], + "outputs": [], "source": [ "!jupyter nbconvert --to script run.ipynb" ]