retroc2/retro.ipynb

1227 lines
38 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"import lzma\n",
"import csv\n",
"import re\n",
"\n",
"def readInput(dir):\n",
" X = []\n",
" if 'xz' in dir:\n",
" with lzma.open(dir) as f:\n",
" for line in f:\n",
" text = line.decode('utf-8')\n",
" text = text.split('\\t')\n",
" X.append(text)\n",
" else:\n",
" with open(dir, encoding='utf8', errors='ignore') as f:\n",
" for line in f:\n",
" X. append(line.replace('\\n',''))\n",
" return X\n",
"\n",
"def writeOutput(output, dir):\n",
" with open(dir, 'w', newline='') as f:\n",
" writer = csv.writer(f)\n",
" for row in output:\n",
" writer.writerow([row])"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"X = readInput('train/train.tsv.xz')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10000"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(X[:10000])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"Xcut = X[:10000]\n",
"\n",
"\n",
"train = pd.DataFrame(Xcut, columns=['Beginning', 'End', 'Title', 'Source', 'X'])\n",
"train['Y'] = train.apply(lambda x: (float(x.Beginning) + float(x.End))/2, axis=1)\n",
"train = train.drop(columns=['Beginning', 'End', 'Title', 'Source'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import linear_model\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"estimators = [('tfidf', TfidfVectorizer()), ('linearRegression', linear_model.LinearRegression())]\n",
"model = Pipeline(estimators)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Pipeline(steps=[('tfidf', TfidfVectorizer()),\n",
" ('linearRegression', LinearRegression())])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(train.X, train.Y)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"dev0X = readInput('dev-0/in.tsv')\n",
"dev0Expected = readInput('dev-0/expected.tsv')\n",
"dev0Predicted = model.predict(dev0X)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[1840.1873506530203,\n",
" 1955.5054038330952,\n",
" 1957.188169072484,\n",
" 1982.9915478948235,\n",
" 1922.1391249931735,\n",
" 1953.8540804466156,\n",
" 1994.2253928270109,\n",
" 1928.8733549680846,\n",
" 1878.2307361714463,\n",
" 1932.1935071045214,\n",
" 1874.0086844537832,\n",
" 1990.5539090713487,\n",
" 1891.3963033388222,\n",
" 1916.1311071475943,\n",
" 1965.8835164477582,\n",
" 1944.1484754660025,\n",
" 1981.2925041285077,\n",
" 1969.2231979814874,\n",
" 1833.8734424736388,\n",
" 1962.692041027358,\n",
" 1881.8458215986618,\n",
" 1867.336166647721,\n",
" 1923.579708076516,\n",
" 1918.2099905900664,\n",
" 1896.2953230746057,\n",
" 1953.899814150262,\n",
" 1912.3026680112946,\n",
" 1933.100765112436,\n",
" 1987.5536174030713,\n",
" 1953.1828450164787,\n",
" 1907.4139724517734,\n",
" 1907.4602781805127,\n",
" 1876.2301111417316,\n",
" 1894.5329130511432,\n",
" 1898.901328501861,\n",
" 1909.3986406773226,\n",
" 1889.9044917531517,\n",
" 1876.3801241447957,\n",
" 2009.4570741782811,\n",
" 1953.2668326737992,\n",
" 1855.4125267298964,\n",
" 1941.7194032294385,\n",
" 1928.2119128971158,\n",
" 1900.0663996597482,\n",
" 1989.980205997232,\n",
" 1844.2213373379752,\n",
" 1951.9725944795941,\n",
" 1982.9977733647736,\n",
" 1893.1463833825394,\n",
" 1912.5417674193895,\n",
" 1880.791551771725,\n",
" 1876.3159968892533,\n",
" 1980.5949973320403,\n",
" 1878.6012276482104,\n",
" 1905.8484657399335,\n",
" 1904.322333017581,\n",
" 1997.1955310422322,\n",
" 1961.4288173172295,\n",
" 1896.8921503331455,\n",
" 1903.0302998560226,\n",
" 1878.126867601444,\n",
" 1992.403886508728,\n",
" 1957.5806990035717,\n",
" 1926.6392568873607,\n",
" 1999.4630026228383,\n",
" 1873.785978888278,\n",
" 1908.1078217340728,\n",
" 1865.7125705190892,\n",
" 1925.3843223995948,\n",
" 2001.1087185320298,\n",
" 1941.7041146366603,\n",
" 1982.7200516450678,\n",
" 1963.625444656117,\n",
" 1983.3299928602103,\n",
" 1951.9525271588832,\n",
" 1990.7583917931393,\n",
" 1978.4347214395655,\n",
" 1978.8128252249874,\n",
" 1927.7109209458933,\n",
" 1892.2810873989572,\n",
" 1972.8386316984686,\n",
" 1975.758026921782,\n",
" 2012.8921174644602,\n",
" 1909.0618040644217,\n",
" 1874.5366616681504,\n",
" 1956.2376442146806,\n",
" 1896.7359278470897,\n",
" 1961.2012675300855,\n",
" 1907.4913255544798,\n",
" 1900.9409799297612,\n",
" 1999.8471774186446,\n",
" 1877.4389725884366,\n",
" 1910.3704612119786,\n",
" 1985.7453910649942,\n",
" 1895.3754286003357,\n",
" 1900.6122372878467,\n",
" 1882.5647269883825,\n",
" 1959.0719488534091,\n",
" 1938.4605579343192,\n",
" 2017.2513120335204,\n",
" 1912.5260919116963,\n",
" 1974.7313385320203,\n",
" 1989.745443701346,\n",
" 1938.4245245979862,\n",
" 1929.6074842166595,\n",
" 1954.5423059320547,\n",
" 1882.2754695126491,\n",
" 1935.8121365641807,\n",
" 1938.6702139413812,\n",
" 1916.708757315056,\n",
" 1954.7249863802867,\n",
" 1878.6354057929702,\n",
" 1966.0516995797793,\n",
" 1889.0091804647323,\n",
" 1924.0535140985937,\n",
" 1919.9763389543784,\n",
" 1961.7286336396717,\n",
" 1943.4206244063553,\n",
" 1902.4036927228083,\n",
" 1934.3911140392095,\n",
" 1927.6153635334535,\n",
" 1937.25195104676,\n",
" 1919.8379519749,\n",
" 1985.298628274291,\n",
" 1913.923957906324,\n",
" 1850.149196174956,\n",
" 1932.7024886100123,\n",
" 1900.2187076454893,\n",
" 1939.0316672605582,\n",
" 1867.0588890247586,\n",
" 1949.495514166902,\n",
" 1910.6909354939485,\n",
" 1869.0848622853389,\n",
" 1892.844610241327,\n",
" 1876.9393563739682,\n",
" 1872.619310363769,\n",
" 1909.0209990421888,\n",
" 1898.5385156179004,\n",
" 2028.5464265181251,\n",
" 1950.2972829214918,\n",
" 1945.4689072911763,\n",
" 1892.0561105467652,\n",
" 1841.8321317652428,\n",
" 2007.7594399604984,\n",
" 1994.3385584201324,\n",
" 1903.8663186815293,\n",
" 1910.3840870576093,\n",
" 1933.0478418159325,\n",
" 1858.20073863683,\n",
" 1976.7130661496035,\n",
" 1982.040243376582,\n",
" 1975.3098869633363,\n",
" 1813.6380817764837,\n",
" 1860.4868157935061,\n",
" 1935.0046786260443,\n",
" 1880.046840811149,\n",
" 1932.952816825448,\n",
" 1952.4720598644697,\n",
" 1870.5646754498086,\n",
" 1934.1484570897112,\n",
" 1922.2083803634544,\n",
" 1891.6971069725907,\n",
" 1891.5467619299338,\n",
" 1878.5767576306152,\n",
" 1911.5816456900134,\n",
" 1957.0981196781083,\n",
" 2010.0914659423524,\n",
" 1987.184997975979,\n",
" 1878.99541765087,\n",
" 1998.9214364221739,\n",
" 1961.8880572180435,\n",
" 1865.183948449711,\n",
" 1992.9028635837597,\n",
" 1872.982306810104,\n",
" 1884.6022374188033,\n",
" 1930.7051521998046,\n",
" 1963.9041521508807,\n",
" 1971.0852407172647,\n",
" 1887.1733177731649,\n",
" 1936.9469702482754,\n",
" 1945.9693069422215,\n",
" 1979.2544432205032,\n",
" 1936.2452800759104,\n",
" 1987.353371511696,\n",
" 1915.4200210532777,\n",
" 1980.7985504936808,\n",
" 1890.1343304958841,\n",
" 1945.5674792360787,\n",
" 1919.7995666478391,\n",
" 1898.3961649331218,\n",
" 1894.974827293279,\n",
" 1904.5408449507013,\n",
" 1882.2631677580428,\n",
" 1866.8029362871148,\n",
" 1975.7970493186356,\n",
" 1902.8758333839453,\n",
" 1910.9677377436387,\n",
" 1849.2149188417998,\n",
" 1941.0823103662578,\n",
" 1963.5667110234926,\n",
" 1845.6089273475236,\n",
" 2011.873464128537,\n",
" 1911.911235887522,\n",
" 1991.8821469836173,\n",
" 1988.5810448271409,\n",
" 1962.7943855912958,\n",
" 1971.3620340106725,\n",
" 1890.0206853087461,\n",
" 2002.9847346366628,\n",
" 1879.3320375844173,\n",
" 1959.8017355726524,\n",
" 1948.8167504689445,\n",
" 1966.07510819957,\n",
" 1943.8016609077754,\n",
" 1961.085714583202,\n",
" 1897.9084480496756,\n",
" 1939.2705898688241,\n",
" 1984.1174372482437,\n",
" 1894.2614550602684,\n",
" 1895.378665333734,\n",
" 1915.4541956029625,\n",
" 1898.7823852936374,\n",
" 1904.8481870505414,\n",
" 1925.5772044783346,\n",
" 1966.0083553646334,\n",
" 1970.1796762340264,\n",
" 1935.1144594596572,\n",
" 1953.3501712379602,\n",
" 1875.739909944408,\n",
" 1876.0020439900882,\n",
" 1922.3854141050526,\n",
" 1868.0372338851978,\n",
" 1910.386887479531,\n",
" 1912.1554002134365,\n",
" 1915.0931483151676,\n",
" 1877.3024203722453,\n",
" 1877.253732776828,\n",
" 1936.7250053078465,\n",
" 1891.3138920227263,\n",
" 1908.237673083131,\n",
" 1987.9054289917976,\n",
" 1903.70412954432,\n",
" 1924.723134373608,\n",
" 1935.1178725062973,\n",
" 1983.0734704185322,\n",
" 1887.8924504374272,\n",
" 2000.4219968377438,\n",
" 1964.3993615673073,\n",
" 1900.2773965603762,\n",
" 1945.1416027227583,\n",
" 1903.3293609712368,\n",
" 1912.3866847387694,\n",
" 2004.446428739491,\n",
" 1904.7627365539536,\n",
" 1967.4465180377595,\n",
" 1972.1396727420974,\n",
" 1980.936226267668,\n",
" 1875.6809858271633,\n",
" 1877.0996027882427,\n",
" 1899.0990556204817,\n",
" 2006.029272666147,\n",
" 1887.1795035137018,\n",
" 1959.825311477964,\n",
" 1873.5774368166688,\n",
" 1958.2965394815083,\n",
" 1938.629586105924,\n",
" 1961.2877235097023,\n",
" 1923.5597018664055,\n",
" 1877.0467396667511,\n",
" 1916.6969000825143,\n",
" 1918.6912186760549,\n",
" 1905.3932883567854,\n",
" 2016.8492832033414,\n",
" 1917.8502187688466,\n",
" 1997.930634551496,\n",
" 1952.9382103944763,\n",
" 1888.4202321997122,\n",
" 1997.8586048022291,\n",
" 1909.7063993235488,\n",
" 1915.3108929177517,\n",
" 1982.9538727811057,\n",
" 1944.9403640763462,\n",
" 1984.7807251960546,\n",
" 1893.8247275150788,\n",
" 1985.6464908849266,\n",
" 1906.7150684605235,\n",
" 1933.813408970437,\n",
" 1920.3355480971475,\n",
" 1966.0435836236693,\n",
" 1889.508247155757,\n",
" 1933.3023902482653,\n",
" 1900.9749268833702,\n",
" 1945.565080772876,\n",
" 1882.906035880404,\n",
" 1972.0366287341392,\n",
" 1965.8407065207352,\n",
" 1894.1130192088149,\n",
" 1901.9912969997608,\n",
" 1856.58679209275,\n",
" 1967.8215042527454,\n",
" 1960.7625135442524,\n",
" 1908.265617316176,\n",
" 1906.1022078975698,\n",
" 1947.4825120009198,\n",
" 1883.0886171054199,\n",
" 1849.384532378764,\n",
" 1997.3565973500004,\n",
" 1879.631204148954,\n",
" 1863.9069334385974,\n",
" 1939.442507249565,\n",
" 1880.284155197719,\n",
" 1920.4999317232227,\n",
" 1956.0941851429293,\n",
" 1881.8428126828428,\n",
" 1873.9931943742629,\n",
" 1912.1435026760844,\n",
" 1890.4741247877955,\n",
" 1888.0897378236189,\n",
" 1918.4669446644602,\n",
" 1915.481242667747,\n",
" 2003.4456748747414,\n",
" 1913.7750621282885,\n",
" 1942.1515832838024,\n",
" 1967.0954888427757,\n",
" 1973.769064486892,\n",
" 1886.0364053247679,\n",
" 1869.0669323217257,\n",
" 1963.3027731391126,\n",
" 1860.2443487225341,\n",
" 1947.9204945352824,\n",
" 1924.9861408278969,\n",
" 1896.8652515178303,\n",
" 1907.416622001296,\n",
" 1946.5469918185975,\n",
" 1973.1504571554392,\n",
" 1959.600365772672,\n",
" 1880.514244166832,\n",
" 1975.7280463592626,\n",
" 1923.622451684597,\n",
" 1947.191425545782,\n",
" 1845.657419451869,\n",
" 1932.6153478357207,\n",
" 1918.7427806164703,\n",
" 1899.3111707467451,\n",
" 1898.7032333126201,\n",
" 1905.313490156231,\n",
" 1943.6097346863887,\n",
" 1878.4498755800178,\n",
" 1976.0846843845834,\n",
" 1890.2547792572032,\n",
" 1935.9424074798671,\n",
" 1894.1704745324466,\n",
" 1898.3449892784472,\n",
" 1928.5379025421978,\n",
" 1924.7987243673292,\n",
" 1863.2744971359884,\n",
" 1895.9036036255122,\n",
" 1868.7377039973717,\n",
" 1991.5185385191858,\n",
" 1951.081363800528,\n",
" 1957.4394891862007,\n",
" 1883.4508459490064,\n",
" 1983.7480250855758,\n",
" 1854.9567904530027,\n",
" 1960.6111926293536,\n",
" 1877.028742663458,\n",
" 2014.8677711921764,\n",
" 1934.5806704594447,\n",
" 1879.7045652486677,\n",
" 1887.9558405565826,\n",
" 1961.148446911978,\n",
" 1940.3806342487594,\n",
" 1898.3593459263946,\n",
" 1928.1069760151158,\n",
" 1999.7021671213547,\n",
" 1965.709602489129,\n",
" 1901.414075729924,\n",
" 1916.1084440396407,\n",
" 1889.5802232574988,\n",
" 1834.8853796798744,\n",
" 1919.9677791707393,\n",
" 1882.6419652383686,\n",
" 1939.2728181505934,\n",
" 1866.5378591167123,\n",
" 1920.2380089752428,\n",
" 1971.7668190915151,\n",
" 1845.4748087916,\n",
" 1926.2873753619194,\n",
" 1900.147981373055,\n",
" 1899.8731274162872,\n",
" 1903.5003087786888,\n",
" 1944.1303549344123,\n",
" 1923.6782060598446,\n",
" 1913.3819728539138,\n",
" 2019.8382779681776,\n",
" 1859.7241668791253,\n",
" 1926.1538355477855,\n",
" 1910.4132105802744,\n",
" 1945.9066933813483,\n",
" 1973.4504693240144,\n",
" 1917.885621621536,\n",
" 1965.1006414033952,\n",
" 1974.9640405366608,\n",
" 1925.6726057202516,\n",
" 1880.3738703928516,\n",
" 1937.2607468583892,\n",
" 1967.120463480692,\n",
" 1889.0050273686927,\n",
" 1994.4271480086877,\n",
" 2020.8334723232865,\n",
" 1882.2153244944627,\n",
" 1995.1780962042299,\n",
" 1898.639109614116,\n",
" 1892.2172263046423,\n",
" 1812.2300253441358,\n",
" 1993.7791682978623,\n",
" 1990.2175977844472,\n",
" 1986.3692672050438,\n",
" 1878.719090318601,\n",
" 1943.6148264014284,\n",
" 1881.80440866435,\n",
" 1936.3813316342926,\n",
" 1954.8228271136843,\n",
" 1971.341057197389,\n",
" 1928.7839973974633,\n",
" 1904.812519808914,\n",
" 1904.6057806406875,\n",
" 1922.4456822284003,\n",
" 1948.8546449817368,\n",
" 1959.413341657744,\n",
" 1947.445687818313,\n",
" 1926.2747445496987,\n",
" 1970.7964810801,\n",
" 1963.7529754786158,\n",
" 1930.876845765326,\n",
" 1963.4173631449482,\n",
" 1946.5996967632987,\n",
" 1945.7566042299507,\n",
" 1957.5549256017232,\n",
" 1897.557343803956,\n",
" 1921.5502306452095,\n",
" 1861.8048856995517,\n",
" 1925.0399941968274,\n",
" 1997.936361590576,\n",
" 2016.8393064083377,\n",
" 1968.4939074250403,\n",
" 1966.2552349913524,\n",
" 1873.69753875608,\n",
" 1953.6582259950821,\n",
" 1883.8698068956824,\n",
" 1866.6558382790988,\n",
" 1966.887305865265,\n",
" 1811.2358128716035,\n",
" 1966.8694927449387,\n",
" 1859.2134360783393,\n",
" 1875.7882583823011,\n",
" 1875.885328559445,\n",
" 1920.177423500038,\n",
" 1898.0135959118645,\n",
" 1884.8490963908603,\n",
" 1998.7136290837036,\n",
" 1874.5719288320065,\n",
" 1867.387460254497,\n",
" 1868.498738201564,\n",
" 1855.6415016682688,\n",
" 1968.344430087269,\n",
" 1884.1770278326535,\n",
" 1858.1245344937417,\n",
" 1957.4767974163703,\n",
" 1928.2242948950288,\n",
" 1959.799832041844,\n",
" 1863.2870382940553,\n",
" 1965.8955231866128,\n",
" 1916.1117174521714,\n",
" 1961.0214280062808,\n",
" 1888.6510243560303,\n",
" 1914.3093093673049,\n",
" 1872.1488809145544,\n",
" 1977.2967621482942,\n",
" 1887.3103704745215,\n",
" 1896.2430973549995,\n",
" 1973.70723948697,\n",
" 1861.019558623386,\n",
" 1855.5898733760123,\n",
" 1998.2341246175683,\n",
" 1980.7894907736224,\n",
" 1998.195642767237,\n",
" 1907.3197846986038,\n",
" 1872.2436134786199,\n",
" 1951.0690397189594,\n",
" 1961.2413267879417,\n",
" 1949.1535364837407,\n",
" 1984.2926219813878,\n",
" 1906.805168495171,\n",
" 1928.2688835028935,\n",
" 1973.2873909472464,\n",
" 1971.3253413771658,\n",
" 1956.5875737213937,\n",
" 1913.7329066126233,\n",
" 1902.8683143563542,\n",
" 1992.4118744968048,\n",
" 1990.2222700625728,\n",
" 1880.186825425649,\n",
" 1908.3528531934837,\n",
" 1954.5031360066664,\n",
" 1937.8429001180739,\n",
" 1996.7634538302536,\n",
" 1953.700871570044,\n",
" 1989.6826696606722,\n",
" 1980.196360386968,\n",
" 1926.2765029959967,\n",
" 1856.7333879779712,\n",
" 1898.9687820318643,\n",
" 1884.3385479607375,\n",
" 1990.5615767605439,\n",
" 1904.2177124735524,\n",
" 1944.4390637660904,\n",
" 1936.7607553209934,\n",
" 1933.880402011111,\n",
" 1941.443914222045,\n",
" 1907.5003596648478,\n",
" 1912.7427554459225,\n",
" 1913.505533463578,\n",
" 1903.527300539802,\n",
" 1950.1286917810505,\n",
" 1878.3237530142912,\n",
" 1838.2360381679755,\n",
" 1992.5586601338437,\n",
" 1978.241472602853,\n",
" 1904.6328114074104,\n",
" 1944.5824155752014,\n",
" 1890.7451511125462,\n",
" 1925.506937826535,\n",
" 1912.6813292924421,\n",
" 1939.5156010976127,\n",
" 1913.896539311866,\n",
" 1908.814243995027,\n",
" 1905.8620731635556,\n",
" 1966.8390758422563,\n",
" 1993.626004811837,\n",
" 1898.0293404705553,\n",
" 1904.1739294404654,\n",
" 1844.990288132592,\n",
" 1904.0506389601924,\n",
" 1992.3716199749506,\n",
" 1910.4180396998674,\n",
" 1957.9068277841598,\n",
" 1944.7425351984177,\n",
" 1913.0376282270815,\n",
" 1954.4372674620581,\n",
" 1951.0750864942056,\n",
" 1889.2711483139337,\n",
" 1865.3747716079217,\n",
" 1961.4778119359578,\n",
" 1906.3350511041774,\n",
" 1893.1146127363825,\n",
" 1887.7672695961214,\n",
" 1876.9518834303092,\n",
" 1952.8729911355217,\n",
" 2022.2055319465876,\n",
" 1988.109484202296,\n",
" 1965.0773711496972,\n",
" 1847.6576789315934,\n",
" 1980.9078598563106,\n",
" 1962.1042085486288,\n",
" 1914.1562606199182,\n",
" 1931.6046911781903,\n",
" 1973.3946444545988,\n",
" 1909.5167601990083,\n",
" 1865.7520927582461,\n",
" 2017.8701247585288,\n",
" 1873.373785268034,\n",
" 2022.9669276228508,\n",
" 1876.9389836777134,\n",
" 1934.4554189938701,\n",
" 1886.0811728491344,\n",
" 1940.5896517740891,\n",
" 2005.5624057806835,\n",
" 1988.429145872561,\n",
" 1949.7084704364129,\n",
" 1912.0673726814002,\n",
" 1943.6616280780781,\n",
" 1902.4911851330662,\n",
" 1898.359301022406,\n",
" 1973.7834348374606,\n",
" 1992.6751186357935,\n",
" 1925.42892273177,\n",
" 1889.115283539258,\n",
" 1871.5809564942576,\n",
" 1983.2853658181978,\n",
" 1906.7775482120933,\n",
" 1885.1787722278157,\n",
" 1966.3616381595768,\n",
" 1882.3505114234142,\n",
" 1906.2881798782162,\n",
" 1904.422220512513,\n",
" 1914.6504517273959,\n",
" 1871.8761901035105,\n",
" 2024.219442042258,\n",
" 1973.369252749691,\n",
" 1887.5647116668251,\n",
" 1929.0754414213882,\n",
" 1928.7923843815515,\n",
" 1909.4382465906701,\n",
" 1907.573424448541,\n",
" 1871.6627100530366,\n",
" 1972.4839566375508,\n",
" 1984.2116286937562,\n",
" 1903.3531025711839,\n",
" 1876.1154826772988,\n",
" 1876.7992828544654,\n",
" 1988.2157555103115,\n",
" 1961.124473682253,\n",
" 1906.7788616584794,\n",
" 1933.1399463079408,\n",
" 1922.880744804141,\n",
" 1959.0697113874498,\n",
" 1880.568798416146,\n",
" 1963.0410567524332,\n",
" 1988.321911736551,\n",
" 1994.2993589559824,\n",
" 1951.4179113779994,\n",
" 1987.2788699273194,\n",
" 1861.827075535406,\n",
" 1900.8770349131742,\n",
" 1954.517580074843,\n",
" 2015.6003349970931,\n",
" 1872.6526597101185,\n",
" 1854.5550437802058,\n",
" 1936.0134512250845,\n",
" 1887.3278103841128,\n",
" 1859.0274146087274,\n",
" 1904.222212671927,\n",
" 1887.7215216912457,\n",
" 1905.3357484064622,\n",
" 1964.7278414864293,\n",
" 1908.0662411786582,\n",
" 1877.135875157637,\n",
" 1884.9634811324102,\n",
" 1868.449320118214,\n",
" 1990.3154263445153,\n",
" 1972.1024629768451,\n",
" 1911.824220696615,\n",
" 1883.0101964471687,\n",
" 1969.8639540297709,\n",
" 2011.299794538809,\n",
" 1994.3475220275445,\n",
" 1900.2313707725841,\n",
" 1977.1339037585412,\n",
" 1913.3511086001718,\n",
" 1933.094704073003,\n",
" 1878.5382028877639,\n",
" 1837.0273459652835,\n",
" 1899.309631923626,\n",
" 1901.8610264818178,\n",
" 1889.3094824723196,\n",
" 1955.6210116754937,\n",
" 1909.7472750032355,\n",
" 1911.0701075862094,\n",
" 1924.228393243427,\n",
" 1825.896261971044,\n",
" 1996.61121803294,\n",
" 1934.2873100387171,\n",
" 1916.0687419882647,\n",
" 1898.8667028132074,\n",
" 1887.1794598042625,\n",
" 2007.6563157791586,\n",
" 1888.0434029780008,\n",
" 1917.4981271139015,\n",
" 1897.7621460671314,\n",
" 1928.1101583864638,\n",
" 1904.4396654338339,\n",
" 1908.872739139619,\n",
" 1949.7332293140382,\n",
" 1861.9550247407835,\n",
" 1927.5982313507432,\n",
" 1985.1471429449919,\n",
" 1864.4474640783326,\n",
" 1969.6529023401035,\n",
" 2027.030750780562,\n",
" 1850.0796472817465,\n",
" 1935.1167810601,\n",
" 1919.769622802888,\n",
" 1972.1073226526953,\n",
" 1862.939082878527,\n",
" 2016.1920897433206,\n",
" 1904.7041627473259,\n",
" 1976.7011204288174,\n",
" 1904.9562485831075,\n",
" 1921.5078241054573,\n",
" 1942.6708306485136,\n",
" 1888.4404504068223,\n",
" 1821.128576222884,\n",
" 1898.5221820540537,\n",
" 1921.601631188711,\n",
" 1866.5364654075515,\n",
" 1931.4807591388173,\n",
" 1881.2920049987063,\n",
" 1926.581486003695,\n",
" 1859.148957035326,\n",
" 2009.8524044967871,\n",
" 1966.15674139277,\n",
" 1932.827429271749,\n",
" 1903.4191308759018,\n",
" 1866.6905090148641,\n",
" 1899.7041089054703,\n",
" 1970.3962557453099,\n",
" 1883.433188494066,\n",
" 1867.5301326278911,\n",
" 1875.4583922201366,\n",
" 1956.6869906825882,\n",
" 1882.9061759661683,\n",
" 1943.9281072726938,\n",
" 1954.5604107383313,\n",
" 1899.88431705705,\n",
" 1899.1381510884169,\n",
" 1944.1888595702228,\n",
" 1929.9250453133725,\n",
" 1944.4812768836796,\n",
" 1893.0843317921933,\n",
" 1924.1481718955015,\n",
" 1970.1310763644872,\n",
" 1980.003074000701,\n",
" 1970.4544261173733,\n",
" 1970.117611204133,\n",
" 1901.5453381466452,\n",
" 1996.6350487476893,\n",
" 1871.4818319482486,\n",
" 1803.2567772827604,\n",
" 1917.1431618448455,\n",
" 1896.0190834277564,\n",
" 1884.2755733156675,\n",
" 1886.6445079238981,\n",
" 1961.1488646334526,\n",
" 1989.8113588668118,\n",
" 1949.0976491806,\n",
" 1924.0193215922502,\n",
" 1884.0194398311587,\n",
" 1946.137400282998,\n",
" 1915.0813179614418,\n",
" 1900.398468015362,\n",
" 1877.8143106318446,\n",
" 1972.507432998812,\n",
" 1974.5680917654634,\n",
" 1979.2132043614506,\n",
" 1904.1782721019956,\n",
" 1944.7228050203491,\n",
" 2018.1652433606569,\n",
" 1873.695946900444,\n",
" 1922.063675787507,\n",
" 1945.096669054183,\n",
" 1933.31758271006,\n",
" 1806.0563588875757,\n",
" 1882.9125276711898,\n",
" 1922.7278771607798,\n",
" 1957.792773913764,\n",
" 1886.9850707766393,\n",
" 1938.9218115684087,\n",
" 1893.5402202319822,\n",
" 1873.8779212759093,\n",
" 1967.5000116222202,\n",
" 1857.5205811307808,\n",
" 1945.530223882965,\n",
" 1913.4767081081898,\n",
" 1865.0484604374728,\n",
" 1856.3422544921184,\n",
" 1945.9377160330773,\n",
" 1978.3387084971582,\n",
" 1885.5021606855905,\n",
" 2022.9898327550939,\n",
" 1967.1628080460607,\n",
" 1864.7649347418962,\n",
" 1880.0508826678795,\n",
" 1941.5698255960904,\n",
" 1933.629015165894,\n",
" 1939.8439834329172,\n",
" 1890.0979162287201,\n",
" 1916.5771901983862,\n",
" 1883.8516481083152,\n",
" 1878.24053945953,\n",
" 1985.5160862173716,\n",
" 1989.8780602913425,\n",
" 1971.6447522439034,\n",
" 1851.1044418329109,\n",
" 1892.090165697662,\n",
" 1914.3672427423217,\n",
" 1983.7253351348818,\n",
" 1996.1236556998692,\n",
" 1947.7464634258238,\n",
" 1962.4066362839749,\n",
" 1976.7972126593527,\n",
" 1913.9031538153315,\n",
" 1894.1320400997004,\n",
" 1984.0574277690243,\n",
" 1902.288876506764,\n",
" 1991.8261928282197,\n",
" 1913.6740391125727,\n",
" 1993.2777256792547,\n",
" 1833.899617414896,\n",
" 1917.3529577134238,\n",
" 1918.9020162369798,\n",
" 1870.9354262192994,\n",
" 1913.2614458476262,\n",
" 1894.3853835291916,\n",
" 1962.618708343185,\n",
" 1934.8204355998914,\n",
" 1882.5149612639555,\n",
" 1995.5601030679754,\n",
" 1823.8092671589106,\n",
" 1879.865392985855,\n",
" 1927.6903576280479,\n",
" 1831.1302321470475,\n",
" 1949.715639725068,\n",
" 1913.2977400570728,\n",
" 1933.2830482140976,\n",
" 1899.1880195930366,\n",
" 1878.2172023554276,\n",
" 1909.2493242188982,\n",
" 1992.7416919930713,\n",
" 1898.9208793527855,\n",
" 1967.615021809471,\n",
" 1935.3206527854504,\n",
" 1887.5767248157013,\n",
" 1921.3288389583533,\n",
" 1966.3470918064943,\n",
" 1878.4422359102068,\n",
" 1864.0358085268113,\n",
" 1899.3971589432376,\n",
" 1942.665497614408,\n",
" 1964.5672048396023,\n",
" 1925.4093645233268,\n",
" 1935.3836114043852,\n",
" 1960.0750475328223,\n",
" 1957.3427785716678,\n",
" 1983.4525976796908,\n",
" 1896.9108526076823,\n",
" 1931.1371726676014,\n",
" 1892.7798492802226,\n",
" 1849.016956630262,\n",
" 1997.8846223889063,\n",
" 1878.8758607480158,\n",
" 1931.3444770507742,\n",
" 1892.407630188242,\n",
" 1913.0741083046553,\n",
" 1932.6551779440877,\n",
" 1941.3578753386391,\n",
" 1914.226338970547,\n",
" 1957.6655600705421,\n",
" 1980.8787951385927,\n",
" 1953.5665940325464,\n",
" 1949.563840207515,\n",
" 1861.4924100504331,\n",
" 1975.4128939460966,\n",
" 1853.603963586428,\n",
" 1986.157437471871,\n",
" 1880.2288858021284,\n",
" 1861.820306758194,\n",
" 1952.6531428143544,\n",
" 1899.115949232256,\n",
" 1916.9680167579054,\n",
" 1877.8258171162502,\n",
" 1962.3903585286596,\n",
" 1886.916531864562,\n",
" 1841.930957363409,\n",
" 1930.5542021814604,\n",
" 1964.0010199183632,\n",
" 1908.199259550184,\n",
" 1938.7393051878835,\n",
" 1874.8075178326214,\n",
" 1947.872054104626,\n",
" 1869.0712833830244,\n",
" 1891.997933792317,\n",
" 1801.6871624808573,\n",
" 1915.1509398822552,\n",
" 1930.8087338453365,\n",
" 1844.9991661677643,\n",
" 1860.869065085889,\n",
" 1869.119251340592,\n",
" 1976.5166573379609,\n",
" 2011.4225912453735,\n",
" 1833.5461029266285,\n",
" 1920.666942551285,\n",
" 1855.4040050764775,\n",
" 1874.865674322178,\n",
" 1896.5402919396433,\n",
" 1889.1439963128105,\n",
" 1944.1571128026505,\n",
" 1929.7734724719219,\n",
" 1873.4986927998957,\n",
" 1921.181025439299,\n",
" 1985.736233625712,\n",
" 1974.198051131953,\n",
" 1951.8499945468718,\n",
" 1896.2101828782193,\n",
" 1794.87461351374,\n",
" 1990.506490700781,\n",
" 1927.3705934319676,\n",
" 1862.7072566652178,\n",
" 1884.043960151638,\n",
" 1869.3065277666806,\n",
" 1967.5802748461401,\n",
" 1971.1257593217683,\n",
" 1912.287000913006,\n",
" 1892.1263101146571,\n",
" 1823.5180835284089,\n",
" 1953.6068480142965,\n",
" 1948.1093661764355,\n",
" 1936.8700507646759,\n",
" 1932.8987056190367,\n",
" 1907.9399904109544,\n",
" 1889.9042168054352,\n",
" 1865.8257492161833,\n",
" 1895.6122769269289,\n",
" 1927.2550806942597,\n",
" 1947.1017828134638,\n",
" 1887.3881590801893,\n",
" 1975.182835936302,\n",
" 1852.4075262447977,\n",
" 1899.6644576117637,\n",
" 1953.4173371667555,\n",
" 1955.5130313675254,\n",
" 1961.3741804014905,\n",
" 1869.5431077917144,\n",
" 1889.0470959220895,\n",
" 1906.6462032789796,\n",
" 1947.679027289672,\n",
" 1860.3453544881834,\n",
" 1941.0945881910689,\n",
" 1899.5612501064272,\n",
" 1905.463235918544,\n",
" 1944.2055097363298,\n",
" 1929.6715761347862,\n",
" 1899.9591731468838,\n",
" 1883.154862581332,\n",
" 1918.4822955653233,\n",
" 1897.9173806449874,\n",
" 1989.5256344295049,\n",
" 1995.3606765674306,\n",
" 1887.4809902694656,\n",
" 1996.0218564366223,\n",
" 1942.0627265628384,\n",
" 1883.4575063509112,\n",
" 1865.3442352834923,\n",
" 1885.5959468129222,\n",
" 1911.94890525071,\n",
" 1954.6405852240466,\n",
" 1970.410576310064,\n",
" 1942.7530863380646,\n",
" 1894.1236052298514,\n",
" 1982.1555397260413,\n",
" 1929.0130759095262,\n",
" 1913.3987938350576,\n",
" 1899.6223967093342,\n",
" 1984.5670602579942,\n",
" 1904.106279777549,\n",
" 1964.3896314729066,\n",
" 1905.9634930456546,\n",
" 1995.9515816968415,\n",
" 1929.5472728419336,\n",
" 1850.4859391688447,\n",
" 1921.379677659449,\n",
" 1868.7601776826066,\n",
" 1961.0351229721293,\n",
" 2006.4486015191915,\n",
" 1888.5823934983741,\n",
" 1962.7713798373961,\n",
" 1866.9726424919484,\n",
" 1947.7508310821665,\n",
" 1935.1697485209838,\n",
" 1963.97456152255,\n",
" 1908.9080962147652,\n",
" 1956.8448853150003,\n",
" 1894.0126222499782,\n",
" 1888.73890687587,\n",
" 1873.3743150070563,\n",
" 1986.604303782963,\n",
" 1903.9596300114256,\n",
" 1886.005762440489,\n",
" 1872.002017661761,\n",
" 1958.033387583204,\n",
" 1869.8606365179419,\n",
" 1948.5427425941048,\n",
" 1958.5477773504133,\n",
" 2000.0645670860042,\n",
" 1928.4188880300246,\n",
" 1896.0473870913881,\n",
" 1888.1120727883574,\n",
" 1904.606927188032,\n",
" 1930.5658592700104,\n",
" 1846.7761326865093,\n",
" 1871.5533222689082,\n",
" 1901.688486168962,\n",
" 1869.610331679447,\n",
" 1869.5609629780158,\n",
" 1902.1613639974207,\n",
" 1849.2309579276064,\n",
" 1876.478259722388,\n",
" 1836.1432242769465,\n",
" 1938.2369489068303,\n",
" 1971.484408024392,\n",
" ...]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dev0Predicted.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_regression.py:95: FutureWarning: Arrays of bytes/strings is being converted to decimal numbers if dtype='numeric'. This behavior is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Please convert your data to numeric values explicitly instead.\n",
" y_true = check_array(y_true, ensure_2d=False, dtype=dtype)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE = 24.659364457329445\n",
"Model score = 0.8175627445862136\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_regression.py:95: FutureWarning: Arrays of bytes/strings is being converted to decimal numbers if dtype='numeric'. This behavior is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Please convert your data to numeric values explicitly instead.\n",
" y_true = check_array(y_true, ensure_2d=False, dtype=dtype)\n"
]
}
],
"source": [
"import sklearn.metrics\n",
"import numpy as np\n",
"\n",
"print('RMSE = ', np.sqrt(sklearn.metrics.mean_squared_error(dev0Expected, dev0Predicted)))\n",
"print('Model score = ', model.score(dev0X, dev0Expected))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"predicted = [str(x) for x in dev0Predicted.tolist()]\n",
"writeOutput(predicted, 'dev-0/out.tsv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
},
"kernelspec": {
"display_name": "Python 3.9.2 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}