{ "cells": [ { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "import lzma\n", "import csv\n", "import re\n", "\n", "def readInput(dir):\n", " X = []\n", " if 'xz' in dir:\n", " with lzma.open(dir) as f:\n", " for line in f:\n", " text = line.decode('utf-8')\n", " text = text.split('\\t')\n", " X.append(text)\n", " else:\n", " with open(dir, encoding='utf8', errors='ignore') as f:\n", " for line in f:\n", " X. append(line.replace('\\n',''))\n", " return X\n", "\n", "def writeOutput(output, dir):\n", " with open(dir, 'w', newline='') as f:\n", " writer = csv.writer(f)\n", " for row in output:\n", " writer.writerow([row])" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "X = readInput('train/train.tsv.xz')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10000" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(X[:10000])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "Xcut = X[:10000]\n", "\n", "\n", "train = pd.DataFrame(Xcut, columns=['Beginning', 'End', 'Title', 'Source', 'X'])\n", "train['Y'] = train.apply(lambda x: (float(x.Beginning) + float(x.End))/2, axis=1)\n", "train = train.drop(columns=['Beginning', 'End', 'Title', 'Source'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from sklearn import linear_model\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.pipeline import Pipeline\n", "\n", "estimators = [('tfidf', TfidfVectorizer()), ('linearRegression', linear_model.LinearRegression())]\n", "model = Pipeline(estimators)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pipeline(steps=[('tfidf', TfidfVectorizer()),\n", " ('linearRegression', LinearRegression())])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(train.X, train.Y)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "dev0X = readInput('dev-0/in.tsv')\n", "dev0Expected = readInput('dev-0/expected.tsv')\n", "dev0Predicted = model.predict(dev0X)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[1840.1873506530203,\n", " 1955.5054038330952,\n", " 1957.188169072484,\n", " 1982.9915478948235,\n", " 1922.1391249931735,\n", " 1953.8540804466156,\n", " 1994.2253928270109,\n", " 1928.8733549680846,\n", " 1878.2307361714463,\n", " 1932.1935071045214,\n", " 1874.0086844537832,\n", " 1990.5539090713487,\n", " 1891.3963033388222,\n", " 1916.1311071475943,\n", " 1965.8835164477582,\n", " 1944.1484754660025,\n", " 1981.2925041285077,\n", " 1969.2231979814874,\n", " 1833.8734424736388,\n", " 1962.692041027358,\n", " 1881.8458215986618,\n", " 1867.336166647721,\n", " 1923.579708076516,\n", " 1918.2099905900664,\n", " 1896.2953230746057,\n", " 1953.899814150262,\n", " 1912.3026680112946,\n", " 1933.100765112436,\n", " 1987.5536174030713,\n", " 1953.1828450164787,\n", " 1907.4139724517734,\n", " 1907.4602781805127,\n", " 1876.2301111417316,\n", " 1894.5329130511432,\n", " 1898.901328501861,\n", " 1909.3986406773226,\n", " 1889.9044917531517,\n", " 1876.3801241447957,\n", " 2009.4570741782811,\n", " 1953.2668326737992,\n", " 1855.4125267298964,\n", " 1941.7194032294385,\n", " 1928.2119128971158,\n", " 1900.0663996597482,\n", " 1989.980205997232,\n", " 1844.2213373379752,\n", " 1951.9725944795941,\n", " 1982.9977733647736,\n", " 1893.1463833825394,\n", " 1912.5417674193895,\n", " 1880.791551771725,\n", " 1876.3159968892533,\n", " 1980.5949973320403,\n", " 1878.6012276482104,\n", " 1905.8484657399335,\n", " 1904.322333017581,\n", " 1997.1955310422322,\n", " 1961.4288173172295,\n", " 1896.8921503331455,\n", " 1903.0302998560226,\n", " 1878.126867601444,\n", " 1992.403886508728,\n", " 1957.5806990035717,\n", " 1926.6392568873607,\n", " 1999.4630026228383,\n", " 1873.785978888278,\n", " 1908.1078217340728,\n", " 1865.7125705190892,\n", " 1925.3843223995948,\n", " 2001.1087185320298,\n", " 1941.7041146366603,\n", " 1982.7200516450678,\n", " 1963.625444656117,\n", " 1983.3299928602103,\n", " 1951.9525271588832,\n", " 1990.7583917931393,\n", " 1978.4347214395655,\n", " 1978.8128252249874,\n", " 1927.7109209458933,\n", " 1892.2810873989572,\n", " 1972.8386316984686,\n", " 1975.758026921782,\n", " 2012.8921174644602,\n", " 1909.0618040644217,\n", " 1874.5366616681504,\n", " 1956.2376442146806,\n", " 1896.7359278470897,\n", " 1961.2012675300855,\n", " 1907.4913255544798,\n", " 1900.9409799297612,\n", " 1999.8471774186446,\n", " 1877.4389725884366,\n", " 1910.3704612119786,\n", " 1985.7453910649942,\n", " 1895.3754286003357,\n", " 1900.6122372878467,\n", " 1882.5647269883825,\n", " 1959.0719488534091,\n", " 1938.4605579343192,\n", " 2017.2513120335204,\n", " 1912.5260919116963,\n", " 1974.7313385320203,\n", " 1989.745443701346,\n", " 1938.4245245979862,\n", " 1929.6074842166595,\n", " 1954.5423059320547,\n", " 1882.2754695126491,\n", " 1935.8121365641807,\n", " 1938.6702139413812,\n", " 1916.708757315056,\n", " 1954.7249863802867,\n", " 1878.6354057929702,\n", " 1966.0516995797793,\n", " 1889.0091804647323,\n", " 1924.0535140985937,\n", " 1919.9763389543784,\n", " 1961.7286336396717,\n", " 1943.4206244063553,\n", " 1902.4036927228083,\n", " 1934.3911140392095,\n", " 1927.6153635334535,\n", " 1937.25195104676,\n", " 1919.8379519749,\n", " 1985.298628274291,\n", " 1913.923957906324,\n", " 1850.149196174956,\n", " 1932.7024886100123,\n", " 1900.2187076454893,\n", " 1939.0316672605582,\n", " 1867.0588890247586,\n", " 1949.495514166902,\n", " 1910.6909354939485,\n", " 1869.0848622853389,\n", " 1892.844610241327,\n", " 1876.9393563739682,\n", " 1872.619310363769,\n", " 1909.0209990421888,\n", " 1898.5385156179004,\n", " 2028.5464265181251,\n", " 1950.2972829214918,\n", " 1945.4689072911763,\n", " 1892.0561105467652,\n", " 1841.8321317652428,\n", " 2007.7594399604984,\n", " 1994.3385584201324,\n", " 1903.8663186815293,\n", " 1910.3840870576093,\n", " 1933.0478418159325,\n", " 1858.20073863683,\n", " 1976.7130661496035,\n", " 1982.040243376582,\n", " 1975.3098869633363,\n", " 1813.6380817764837,\n", " 1860.4868157935061,\n", " 1935.0046786260443,\n", " 1880.046840811149,\n", " 1932.952816825448,\n", " 1952.4720598644697,\n", " 1870.5646754498086,\n", " 1934.1484570897112,\n", " 1922.2083803634544,\n", " 1891.6971069725907,\n", " 1891.5467619299338,\n", " 1878.5767576306152,\n", " 1911.5816456900134,\n", " 1957.0981196781083,\n", " 2010.0914659423524,\n", " 1987.184997975979,\n", " 1878.99541765087,\n", " 1998.9214364221739,\n", " 1961.8880572180435,\n", " 1865.183948449711,\n", " 1992.9028635837597,\n", " 1872.982306810104,\n", " 1884.6022374188033,\n", " 1930.7051521998046,\n", " 1963.9041521508807,\n", " 1971.0852407172647,\n", " 1887.1733177731649,\n", " 1936.9469702482754,\n", " 1945.9693069422215,\n", " 1979.2544432205032,\n", " 1936.2452800759104,\n", " 1987.353371511696,\n", " 1915.4200210532777,\n", " 1980.7985504936808,\n", " 1890.1343304958841,\n", " 1945.5674792360787,\n", " 1919.7995666478391,\n", " 1898.3961649331218,\n", " 1894.974827293279,\n", " 1904.5408449507013,\n", " 1882.2631677580428,\n", " 1866.8029362871148,\n", " 1975.7970493186356,\n", " 1902.8758333839453,\n", " 1910.9677377436387,\n", " 1849.2149188417998,\n", " 1941.0823103662578,\n", " 1963.5667110234926,\n", " 1845.6089273475236,\n", " 2011.873464128537,\n", " 1911.911235887522,\n", " 1991.8821469836173,\n", " 1988.5810448271409,\n", " 1962.7943855912958,\n", " 1971.3620340106725,\n", " 1890.0206853087461,\n", " 2002.9847346366628,\n", " 1879.3320375844173,\n", " 1959.8017355726524,\n", " 1948.8167504689445,\n", " 1966.07510819957,\n", " 1943.8016609077754,\n", " 1961.085714583202,\n", " 1897.9084480496756,\n", " 1939.2705898688241,\n", " 1984.1174372482437,\n", " 1894.2614550602684,\n", " 1895.378665333734,\n", " 1915.4541956029625,\n", " 1898.7823852936374,\n", " 1904.8481870505414,\n", " 1925.5772044783346,\n", " 1966.0083553646334,\n", " 1970.1796762340264,\n", " 1935.1144594596572,\n", " 1953.3501712379602,\n", " 1875.739909944408,\n", " 1876.0020439900882,\n", " 1922.3854141050526,\n", " 1868.0372338851978,\n", " 1910.386887479531,\n", " 1912.1554002134365,\n", " 1915.0931483151676,\n", " 1877.3024203722453,\n", " 1877.253732776828,\n", " 1936.7250053078465,\n", " 1891.3138920227263,\n", " 1908.237673083131,\n", " 1987.9054289917976,\n", " 1903.70412954432,\n", " 1924.723134373608,\n", " 1935.1178725062973,\n", " 1983.0734704185322,\n", " 1887.8924504374272,\n", " 2000.4219968377438,\n", " 1964.3993615673073,\n", " 1900.2773965603762,\n", " 1945.1416027227583,\n", " 1903.3293609712368,\n", " 1912.3866847387694,\n", " 2004.446428739491,\n", " 1904.7627365539536,\n", " 1967.4465180377595,\n", " 1972.1396727420974,\n", " 1980.936226267668,\n", " 1875.6809858271633,\n", " 1877.0996027882427,\n", " 1899.0990556204817,\n", " 2006.029272666147,\n", " 1887.1795035137018,\n", " 1959.825311477964,\n", " 1873.5774368166688,\n", " 1958.2965394815083,\n", " 1938.629586105924,\n", " 1961.2877235097023,\n", " 1923.5597018664055,\n", " 1877.0467396667511,\n", " 1916.6969000825143,\n", " 1918.6912186760549,\n", " 1905.3932883567854,\n", " 2016.8492832033414,\n", " 1917.8502187688466,\n", " 1997.930634551496,\n", " 1952.9382103944763,\n", " 1888.4202321997122,\n", " 1997.8586048022291,\n", " 1909.7063993235488,\n", " 1915.3108929177517,\n", " 1982.9538727811057,\n", " 1944.9403640763462,\n", " 1984.7807251960546,\n", " 1893.8247275150788,\n", " 1985.6464908849266,\n", " 1906.7150684605235,\n", " 1933.813408970437,\n", " 1920.3355480971475,\n", " 1966.0435836236693,\n", " 1889.508247155757,\n", " 1933.3023902482653,\n", " 1900.9749268833702,\n", " 1945.565080772876,\n", " 1882.906035880404,\n", " 1972.0366287341392,\n", " 1965.8407065207352,\n", " 1894.1130192088149,\n", " 1901.9912969997608,\n", " 1856.58679209275,\n", " 1967.8215042527454,\n", " 1960.7625135442524,\n", " 1908.265617316176,\n", " 1906.1022078975698,\n", " 1947.4825120009198,\n", " 1883.0886171054199,\n", " 1849.384532378764,\n", " 1997.3565973500004,\n", " 1879.631204148954,\n", " 1863.9069334385974,\n", " 1939.442507249565,\n", " 1880.284155197719,\n", " 1920.4999317232227,\n", " 1956.0941851429293,\n", " 1881.8428126828428,\n", " 1873.9931943742629,\n", " 1912.1435026760844,\n", " 1890.4741247877955,\n", " 1888.0897378236189,\n", " 1918.4669446644602,\n", " 1915.481242667747,\n", " 2003.4456748747414,\n", " 1913.7750621282885,\n", " 1942.1515832838024,\n", " 1967.0954888427757,\n", " 1973.769064486892,\n", " 1886.0364053247679,\n", " 1869.0669323217257,\n", " 1963.3027731391126,\n", " 1860.2443487225341,\n", " 1947.9204945352824,\n", " 1924.9861408278969,\n", " 1896.8652515178303,\n", " 1907.416622001296,\n", " 1946.5469918185975,\n", " 1973.1504571554392,\n", " 1959.600365772672,\n", " 1880.514244166832,\n", " 1975.7280463592626,\n", " 1923.622451684597,\n", " 1947.191425545782,\n", " 1845.657419451869,\n", " 1932.6153478357207,\n", " 1918.7427806164703,\n", " 1899.3111707467451,\n", " 1898.7032333126201,\n", " 1905.313490156231,\n", " 1943.6097346863887,\n", " 1878.4498755800178,\n", " 1976.0846843845834,\n", " 1890.2547792572032,\n", " 1935.9424074798671,\n", " 1894.1704745324466,\n", " 1898.3449892784472,\n", " 1928.5379025421978,\n", " 1924.7987243673292,\n", " 1863.2744971359884,\n", " 1895.9036036255122,\n", " 1868.7377039973717,\n", " 1991.5185385191858,\n", " 1951.081363800528,\n", " 1957.4394891862007,\n", " 1883.4508459490064,\n", " 1983.7480250855758,\n", " 1854.9567904530027,\n", " 1960.6111926293536,\n", " 1877.028742663458,\n", " 2014.8677711921764,\n", " 1934.5806704594447,\n", " 1879.7045652486677,\n", " 1887.9558405565826,\n", " 1961.148446911978,\n", " 1940.3806342487594,\n", " 1898.3593459263946,\n", " 1928.1069760151158,\n", " 1999.7021671213547,\n", " 1965.709602489129,\n", " 1901.414075729924,\n", " 1916.1084440396407,\n", " 1889.5802232574988,\n", " 1834.8853796798744,\n", " 1919.9677791707393,\n", " 1882.6419652383686,\n", " 1939.2728181505934,\n", " 1866.5378591167123,\n", " 1920.2380089752428,\n", " 1971.7668190915151,\n", " 1845.4748087916,\n", " 1926.2873753619194,\n", " 1900.147981373055,\n", " 1899.8731274162872,\n", " 1903.5003087786888,\n", " 1944.1303549344123,\n", " 1923.6782060598446,\n", " 1913.3819728539138,\n", " 2019.8382779681776,\n", " 1859.7241668791253,\n", " 1926.1538355477855,\n", " 1910.4132105802744,\n", " 1945.9066933813483,\n", " 1973.4504693240144,\n", " 1917.885621621536,\n", " 1965.1006414033952,\n", " 1974.9640405366608,\n", " 1925.6726057202516,\n", " 1880.3738703928516,\n", " 1937.2607468583892,\n", " 1967.120463480692,\n", " 1889.0050273686927,\n", " 1994.4271480086877,\n", " 2020.8334723232865,\n", " 1882.2153244944627,\n", " 1995.1780962042299,\n", " 1898.639109614116,\n", " 1892.2172263046423,\n", " 1812.2300253441358,\n", " 1993.7791682978623,\n", " 1990.2175977844472,\n", " 1986.3692672050438,\n", " 1878.719090318601,\n", " 1943.6148264014284,\n", " 1881.80440866435,\n", " 1936.3813316342926,\n", " 1954.8228271136843,\n", " 1971.341057197389,\n", " 1928.7839973974633,\n", " 1904.812519808914,\n", " 1904.6057806406875,\n", " 1922.4456822284003,\n", " 1948.8546449817368,\n", " 1959.413341657744,\n", " 1947.445687818313,\n", " 1926.2747445496987,\n", " 1970.7964810801,\n", " 1963.7529754786158,\n", " 1930.876845765326,\n", " 1963.4173631449482,\n", " 1946.5996967632987,\n", " 1945.7566042299507,\n", " 1957.5549256017232,\n", " 1897.557343803956,\n", " 1921.5502306452095,\n", " 1861.8048856995517,\n", " 1925.0399941968274,\n", " 1997.936361590576,\n", " 2016.8393064083377,\n", " 1968.4939074250403,\n", " 1966.2552349913524,\n", " 1873.69753875608,\n", " 1953.6582259950821,\n", " 1883.8698068956824,\n", " 1866.6558382790988,\n", " 1966.887305865265,\n", " 1811.2358128716035,\n", " 1966.8694927449387,\n", " 1859.2134360783393,\n", " 1875.7882583823011,\n", " 1875.885328559445,\n", " 1920.177423500038,\n", " 1898.0135959118645,\n", " 1884.8490963908603,\n", " 1998.7136290837036,\n", " 1874.5719288320065,\n", " 1867.387460254497,\n", " 1868.498738201564,\n", " 1855.6415016682688,\n", " 1968.344430087269,\n", " 1884.1770278326535,\n", " 1858.1245344937417,\n", " 1957.4767974163703,\n", " 1928.2242948950288,\n", " 1959.799832041844,\n", " 1863.2870382940553,\n", " 1965.8955231866128,\n", " 1916.1117174521714,\n", " 1961.0214280062808,\n", " 1888.6510243560303,\n", " 1914.3093093673049,\n", " 1872.1488809145544,\n", " 1977.2967621482942,\n", " 1887.3103704745215,\n", " 1896.2430973549995,\n", " 1973.70723948697,\n", " 1861.019558623386,\n", " 1855.5898733760123,\n", " 1998.2341246175683,\n", " 1980.7894907736224,\n", " 1998.195642767237,\n", " 1907.3197846986038,\n", " 1872.2436134786199,\n", " 1951.0690397189594,\n", " 1961.2413267879417,\n", " 1949.1535364837407,\n", " 1984.2926219813878,\n", " 1906.805168495171,\n", " 1928.2688835028935,\n", " 1973.2873909472464,\n", " 1971.3253413771658,\n", " 1956.5875737213937,\n", " 1913.7329066126233,\n", " 1902.8683143563542,\n", " 1992.4118744968048,\n", " 1990.2222700625728,\n", " 1880.186825425649,\n", " 1908.3528531934837,\n", " 1954.5031360066664,\n", " 1937.8429001180739,\n", " 1996.7634538302536,\n", " 1953.700871570044,\n", " 1989.6826696606722,\n", " 1980.196360386968,\n", " 1926.2765029959967,\n", " 1856.7333879779712,\n", " 1898.9687820318643,\n", " 1884.3385479607375,\n", " 1990.5615767605439,\n", " 1904.2177124735524,\n", " 1944.4390637660904,\n", " 1936.7607553209934,\n", " 1933.880402011111,\n", " 1941.443914222045,\n", " 1907.5003596648478,\n", " 1912.7427554459225,\n", " 1913.505533463578,\n", " 1903.527300539802,\n", " 1950.1286917810505,\n", " 1878.3237530142912,\n", " 1838.2360381679755,\n", " 1992.5586601338437,\n", " 1978.241472602853,\n", " 1904.6328114074104,\n", " 1944.5824155752014,\n", " 1890.7451511125462,\n", " 1925.506937826535,\n", " 1912.6813292924421,\n", " 1939.5156010976127,\n", " 1913.896539311866,\n", " 1908.814243995027,\n", " 1905.8620731635556,\n", " 1966.8390758422563,\n", " 1993.626004811837,\n", " 1898.0293404705553,\n", " 1904.1739294404654,\n", " 1844.990288132592,\n", " 1904.0506389601924,\n", " 1992.3716199749506,\n", " 1910.4180396998674,\n", " 1957.9068277841598,\n", " 1944.7425351984177,\n", " 1913.0376282270815,\n", " 1954.4372674620581,\n", " 1951.0750864942056,\n", " 1889.2711483139337,\n", " 1865.3747716079217,\n", " 1961.4778119359578,\n", " 1906.3350511041774,\n", " 1893.1146127363825,\n", " 1887.7672695961214,\n", " 1876.9518834303092,\n", " 1952.8729911355217,\n", " 2022.2055319465876,\n", " 1988.109484202296,\n", " 1965.0773711496972,\n", " 1847.6576789315934,\n", " 1980.9078598563106,\n", " 1962.1042085486288,\n", " 1914.1562606199182,\n", " 1931.6046911781903,\n", " 1973.3946444545988,\n", " 1909.5167601990083,\n", " 1865.7520927582461,\n", " 2017.8701247585288,\n", " 1873.373785268034,\n", " 2022.9669276228508,\n", " 1876.9389836777134,\n", " 1934.4554189938701,\n", " 1886.0811728491344,\n", " 1940.5896517740891,\n", " 2005.5624057806835,\n", " 1988.429145872561,\n", " 1949.7084704364129,\n", " 1912.0673726814002,\n", " 1943.6616280780781,\n", " 1902.4911851330662,\n", " 1898.359301022406,\n", " 1973.7834348374606,\n", " 1992.6751186357935,\n", " 1925.42892273177,\n", " 1889.115283539258,\n", " 1871.5809564942576,\n", " 1983.2853658181978,\n", " 1906.7775482120933,\n", " 1885.1787722278157,\n", " 1966.3616381595768,\n", " 1882.3505114234142,\n", " 1906.2881798782162,\n", " 1904.422220512513,\n", " 1914.6504517273959,\n", " 1871.8761901035105,\n", " 2024.219442042258,\n", " 1973.369252749691,\n", " 1887.5647116668251,\n", " 1929.0754414213882,\n", " 1928.7923843815515,\n", " 1909.4382465906701,\n", " 1907.573424448541,\n", " 1871.6627100530366,\n", " 1972.4839566375508,\n", " 1984.2116286937562,\n", " 1903.3531025711839,\n", " 1876.1154826772988,\n", " 1876.7992828544654,\n", " 1988.2157555103115,\n", " 1961.124473682253,\n", " 1906.7788616584794,\n", " 1933.1399463079408,\n", " 1922.880744804141,\n", " 1959.0697113874498,\n", " 1880.568798416146,\n", " 1963.0410567524332,\n", " 1988.321911736551,\n", " 1994.2993589559824,\n", " 1951.4179113779994,\n", " 1987.2788699273194,\n", " 1861.827075535406,\n", " 1900.8770349131742,\n", " 1954.517580074843,\n", " 2015.6003349970931,\n", " 1872.6526597101185,\n", " 1854.5550437802058,\n", " 1936.0134512250845,\n", " 1887.3278103841128,\n", " 1859.0274146087274,\n", " 1904.222212671927,\n", " 1887.7215216912457,\n", " 1905.3357484064622,\n", " 1964.7278414864293,\n", " 1908.0662411786582,\n", " 1877.135875157637,\n", " 1884.9634811324102,\n", " 1868.449320118214,\n", " 1990.3154263445153,\n", " 1972.1024629768451,\n", " 1911.824220696615,\n", " 1883.0101964471687,\n", " 1969.8639540297709,\n", " 2011.299794538809,\n", " 1994.3475220275445,\n", " 1900.2313707725841,\n", " 1977.1339037585412,\n", " 1913.3511086001718,\n", " 1933.094704073003,\n", " 1878.5382028877639,\n", " 1837.0273459652835,\n", " 1899.309631923626,\n", " 1901.8610264818178,\n", " 1889.3094824723196,\n", " 1955.6210116754937,\n", " 1909.7472750032355,\n", " 1911.0701075862094,\n", " 1924.228393243427,\n", " 1825.896261971044,\n", " 1996.61121803294,\n", " 1934.2873100387171,\n", " 1916.0687419882647,\n", " 1898.8667028132074,\n", " 1887.1794598042625,\n", " 2007.6563157791586,\n", " 1888.0434029780008,\n", " 1917.4981271139015,\n", " 1897.7621460671314,\n", " 1928.1101583864638,\n", " 1904.4396654338339,\n", " 1908.872739139619,\n", " 1949.7332293140382,\n", " 1861.9550247407835,\n", " 1927.5982313507432,\n", " 1985.1471429449919,\n", " 1864.4474640783326,\n", " 1969.6529023401035,\n", " 2027.030750780562,\n", " 1850.0796472817465,\n", " 1935.1167810601,\n", " 1919.769622802888,\n", " 1972.1073226526953,\n", " 1862.939082878527,\n", " 2016.1920897433206,\n", " 1904.7041627473259,\n", " 1976.7011204288174,\n", " 1904.9562485831075,\n", " 1921.5078241054573,\n", " 1942.6708306485136,\n", " 1888.4404504068223,\n", " 1821.128576222884,\n", " 1898.5221820540537,\n", " 1921.601631188711,\n", " 1866.5364654075515,\n", " 1931.4807591388173,\n", " 1881.2920049987063,\n", " 1926.581486003695,\n", " 1859.148957035326,\n", " 2009.8524044967871,\n", " 1966.15674139277,\n", " 1932.827429271749,\n", " 1903.4191308759018,\n", " 1866.6905090148641,\n", " 1899.7041089054703,\n", " 1970.3962557453099,\n", " 1883.433188494066,\n", " 1867.5301326278911,\n", " 1875.4583922201366,\n", " 1956.6869906825882,\n", " 1882.9061759661683,\n", " 1943.9281072726938,\n", " 1954.5604107383313,\n", " 1899.88431705705,\n", " 1899.1381510884169,\n", " 1944.1888595702228,\n", " 1929.9250453133725,\n", " 1944.4812768836796,\n", " 1893.0843317921933,\n", " 1924.1481718955015,\n", " 1970.1310763644872,\n", " 1980.003074000701,\n", " 1970.4544261173733,\n", " 1970.117611204133,\n", " 1901.5453381466452,\n", " 1996.6350487476893,\n", " 1871.4818319482486,\n", " 1803.2567772827604,\n", " 1917.1431618448455,\n", " 1896.0190834277564,\n", " 1884.2755733156675,\n", " 1886.6445079238981,\n", " 1961.1488646334526,\n", " 1989.8113588668118,\n", " 1949.0976491806,\n", " 1924.0193215922502,\n", " 1884.0194398311587,\n", " 1946.137400282998,\n", " 1915.0813179614418,\n", " 1900.398468015362,\n", " 1877.8143106318446,\n", " 1972.507432998812,\n", " 1974.5680917654634,\n", " 1979.2132043614506,\n", " 1904.1782721019956,\n", " 1944.7228050203491,\n", " 2018.1652433606569,\n", " 1873.695946900444,\n", " 1922.063675787507,\n", " 1945.096669054183,\n", " 1933.31758271006,\n", " 1806.0563588875757,\n", " 1882.9125276711898,\n", " 1922.7278771607798,\n", " 1957.792773913764,\n", " 1886.9850707766393,\n", " 1938.9218115684087,\n", " 1893.5402202319822,\n", " 1873.8779212759093,\n", " 1967.5000116222202,\n", " 1857.5205811307808,\n", " 1945.530223882965,\n", " 1913.4767081081898,\n", " 1865.0484604374728,\n", " 1856.3422544921184,\n", " 1945.9377160330773,\n", " 1978.3387084971582,\n", " 1885.5021606855905,\n", " 2022.9898327550939,\n", " 1967.1628080460607,\n", " 1864.7649347418962,\n", " 1880.0508826678795,\n", " 1941.5698255960904,\n", " 1933.629015165894,\n", " 1939.8439834329172,\n", " 1890.0979162287201,\n", " 1916.5771901983862,\n", " 1883.8516481083152,\n", " 1878.24053945953,\n", " 1985.5160862173716,\n", " 1989.8780602913425,\n", " 1971.6447522439034,\n", " 1851.1044418329109,\n", " 1892.090165697662,\n", " 1914.3672427423217,\n", " 1983.7253351348818,\n", " 1996.1236556998692,\n", " 1947.7464634258238,\n", " 1962.4066362839749,\n", " 1976.7972126593527,\n", " 1913.9031538153315,\n", " 1894.1320400997004,\n", " 1984.0574277690243,\n", " 1902.288876506764,\n", " 1991.8261928282197,\n", " 1913.6740391125727,\n", " 1993.2777256792547,\n", " 1833.899617414896,\n", " 1917.3529577134238,\n", " 1918.9020162369798,\n", " 1870.9354262192994,\n", " 1913.2614458476262,\n", " 1894.3853835291916,\n", " 1962.618708343185,\n", " 1934.8204355998914,\n", " 1882.5149612639555,\n", " 1995.5601030679754,\n", " 1823.8092671589106,\n", " 1879.865392985855,\n", " 1927.6903576280479,\n", " 1831.1302321470475,\n", " 1949.715639725068,\n", " 1913.2977400570728,\n", " 1933.2830482140976,\n", " 1899.1880195930366,\n", " 1878.2172023554276,\n", " 1909.2493242188982,\n", " 1992.7416919930713,\n", " 1898.9208793527855,\n", " 1967.615021809471,\n", " 1935.3206527854504,\n", " 1887.5767248157013,\n", " 1921.3288389583533,\n", " 1966.3470918064943,\n", " 1878.4422359102068,\n", " 1864.0358085268113,\n", " 1899.3971589432376,\n", " 1942.665497614408,\n", " 1964.5672048396023,\n", " 1925.4093645233268,\n", " 1935.3836114043852,\n", " 1960.0750475328223,\n", " 1957.3427785716678,\n", " 1983.4525976796908,\n", " 1896.9108526076823,\n", " 1931.1371726676014,\n", " 1892.7798492802226,\n", " 1849.016956630262,\n", " 1997.8846223889063,\n", " 1878.8758607480158,\n", " 1931.3444770507742,\n", " 1892.407630188242,\n", " 1913.0741083046553,\n", " 1932.6551779440877,\n", " 1941.3578753386391,\n", " 1914.226338970547,\n", " 1957.6655600705421,\n", " 1980.8787951385927,\n", " 1953.5665940325464,\n", " 1949.563840207515,\n", " 1861.4924100504331,\n", " 1975.4128939460966,\n", " 1853.603963586428,\n", " 1986.157437471871,\n", " 1880.2288858021284,\n", " 1861.820306758194,\n", " 1952.6531428143544,\n", " 1899.115949232256,\n", " 1916.9680167579054,\n", " 1877.8258171162502,\n", " 1962.3903585286596,\n", " 1886.916531864562,\n", " 1841.930957363409,\n", " 1930.5542021814604,\n", " 1964.0010199183632,\n", " 1908.199259550184,\n", " 1938.7393051878835,\n", " 1874.8075178326214,\n", " 1947.872054104626,\n", " 1869.0712833830244,\n", " 1891.997933792317,\n", " 1801.6871624808573,\n", " 1915.1509398822552,\n", " 1930.8087338453365,\n", " 1844.9991661677643,\n", " 1860.869065085889,\n", " 1869.119251340592,\n", " 1976.5166573379609,\n", " 2011.4225912453735,\n", " 1833.5461029266285,\n", " 1920.666942551285,\n", " 1855.4040050764775,\n", " 1874.865674322178,\n", " 1896.5402919396433,\n", " 1889.1439963128105,\n", " 1944.1571128026505,\n", " 1929.7734724719219,\n", " 1873.4986927998957,\n", " 1921.181025439299,\n", " 1985.736233625712,\n", " 1974.198051131953,\n", " 1951.8499945468718,\n", " 1896.2101828782193,\n", " 1794.87461351374,\n", " 1990.506490700781,\n", " 1927.3705934319676,\n", " 1862.7072566652178,\n", " 1884.043960151638,\n", " 1869.3065277666806,\n", " 1967.5802748461401,\n", " 1971.1257593217683,\n", " 1912.287000913006,\n", " 1892.1263101146571,\n", " 1823.5180835284089,\n", " 1953.6068480142965,\n", " 1948.1093661764355,\n", " 1936.8700507646759,\n", " 1932.8987056190367,\n", " 1907.9399904109544,\n", " 1889.9042168054352,\n", " 1865.8257492161833,\n", " 1895.6122769269289,\n", " 1927.2550806942597,\n", " 1947.1017828134638,\n", " 1887.3881590801893,\n", " 1975.182835936302,\n", " 1852.4075262447977,\n", " 1899.6644576117637,\n", " 1953.4173371667555,\n", " 1955.5130313675254,\n", " 1961.3741804014905,\n", " 1869.5431077917144,\n", " 1889.0470959220895,\n", " 1906.6462032789796,\n", " 1947.679027289672,\n", " 1860.3453544881834,\n", " 1941.0945881910689,\n", " 1899.5612501064272,\n", " 1905.463235918544,\n", " 1944.2055097363298,\n", " 1929.6715761347862,\n", " 1899.9591731468838,\n", " 1883.154862581332,\n", " 1918.4822955653233,\n", " 1897.9173806449874,\n", " 1989.5256344295049,\n", " 1995.3606765674306,\n", " 1887.4809902694656,\n", " 1996.0218564366223,\n", " 1942.0627265628384,\n", " 1883.4575063509112,\n", " 1865.3442352834923,\n", " 1885.5959468129222,\n", " 1911.94890525071,\n", " 1954.6405852240466,\n", " 1970.410576310064,\n", " 1942.7530863380646,\n", " 1894.1236052298514,\n", " 1982.1555397260413,\n", " 1929.0130759095262,\n", " 1913.3987938350576,\n", " 1899.6223967093342,\n", " 1984.5670602579942,\n", " 1904.106279777549,\n", " 1964.3896314729066,\n", " 1905.9634930456546,\n", " 1995.9515816968415,\n", " 1929.5472728419336,\n", " 1850.4859391688447,\n", " 1921.379677659449,\n", " 1868.7601776826066,\n", " 1961.0351229721293,\n", " 2006.4486015191915,\n", " 1888.5823934983741,\n", " 1962.7713798373961,\n", " 1866.9726424919484,\n", " 1947.7508310821665,\n", " 1935.1697485209838,\n", " 1963.97456152255,\n", " 1908.9080962147652,\n", " 1956.8448853150003,\n", " 1894.0126222499782,\n", " 1888.73890687587,\n", " 1873.3743150070563,\n", " 1986.604303782963,\n", " 1903.9596300114256,\n", " 1886.005762440489,\n", " 1872.002017661761,\n", " 1958.033387583204,\n", " 1869.8606365179419,\n", " 1948.5427425941048,\n", " 1958.5477773504133,\n", " 2000.0645670860042,\n", " 1928.4188880300246,\n", " 1896.0473870913881,\n", " 1888.1120727883574,\n", " 1904.606927188032,\n", " 1930.5658592700104,\n", " 1846.7761326865093,\n", " 1871.5533222689082,\n", " 1901.688486168962,\n", " 1869.610331679447,\n", " 1869.5609629780158,\n", " 1902.1613639974207,\n", " 1849.2309579276064,\n", " 1876.478259722388,\n", " 1836.1432242769465,\n", " 1938.2369489068303,\n", " 1971.484408024392,\n", " ...]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dev0Predicted.tolist()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_regression.py:95: FutureWarning: Arrays of bytes/strings is being converted to decimal numbers if dtype='numeric'. This behavior is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Please convert your data to numeric values explicitly instead.\n", " y_true = check_array(y_true, ensure_2d=False, dtype=dtype)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "RMSE = 24.659364457329445\n", "Model score = 0.8175627445862136\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_regression.py:95: FutureWarning: Arrays of bytes/strings is being converted to decimal numbers if dtype='numeric'. This behavior is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Please convert your data to numeric values explicitly instead.\n", " y_true = check_array(y_true, ensure_2d=False, dtype=dtype)\n" ] } ], "source": [ "import sklearn.metrics\n", "import numpy as np\n", "\n", "print('RMSE = ', np.sqrt(sklearn.metrics.mean_squared_error(dev0Expected, dev0Predicted)))\n", "print('Model score = ', model.score(dev0X, dev0Expected))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "predicted = [str(x) for x in dev0Predicted.tolist()]\n", "writeOutput(predicted, 'dev-0/out.tsv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" }, "kernelspec": { "display_name": "Python 3.9.2 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }