retroc2/retro.ipynb
Iwona Christop c183cce711 Add out.tsv
2022-05-19 23:01:39 +02:00

38 KiB

import lzma
import csv
import re

def readInput(dir):
    X = []
    if 'xz' in dir:
        with lzma.open(dir) as f:
            for line in f:
                text = line.decode('utf-8')
                text = text.split('\t')
                X.append(text)
    else:
        with open(dir, encoding='utf8', errors='ignore') as f:
            for line in f:
                X. append(line.replace('\n',''))
    return X

def writeOutput(output, dir):
    with open(dir, 'w', newline='') as f:
        writer = csv.writer(f)
        for row in output:
            writer.writerow([row])
X = readInput('train/train.tsv.xz')
len(X[:10000])
10000
import pandas as pd

Xcut = X[:10000]


train = pd.DataFrame(Xcut, columns=['Beginning', 'End', 'Title', 'Source', 'X'])
train['Y'] = train.apply(lambda x: (float(x.Beginning) + float(x.End))/2, axis=1)
train = train.drop(columns=['Beginning', 'End', 'Title', 'Source'])
from sklearn import linear_model
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline

estimators = [('tfidf', TfidfVectorizer()), ('linearRegression', linear_model.LinearRegression())]
model = Pipeline(estimators)
model.fit(train.X, train.Y)
Pipeline(steps=[('tfidf', TfidfVectorizer()),
                ('linearRegression', LinearRegression())])
dev0X = readInput('dev-0/in.tsv')
dev0Expected = readInput('dev-0/expected.tsv')
dev0Predicted = model.predict(dev0X)
dev0Predicted.tolist()
[1840.1873506530203,
 1955.5054038330952,
 1957.188169072484,
 1982.9915478948235,
 1922.1391249931735,
 1953.8540804466156,
 1994.2253928270109,
 1928.8733549680846,
 1878.2307361714463,
 1932.1935071045214,
 1874.0086844537832,
 1990.5539090713487,
 1891.3963033388222,
 1916.1311071475943,
 1965.8835164477582,
 1944.1484754660025,
 1981.2925041285077,
 1969.2231979814874,
 1833.8734424736388,
 1962.692041027358,
 1881.8458215986618,
 1867.336166647721,
 1923.579708076516,
 1918.2099905900664,
 1896.2953230746057,
 1953.899814150262,
 1912.3026680112946,
 1933.100765112436,
 1987.5536174030713,
 1953.1828450164787,
 1907.4139724517734,
 1907.4602781805127,
 1876.2301111417316,
 1894.5329130511432,
 1898.901328501861,
 1909.3986406773226,
 1889.9044917531517,
 1876.3801241447957,
 2009.4570741782811,
 1953.2668326737992,
 1855.4125267298964,
 1941.7194032294385,
 1928.2119128971158,
 1900.0663996597482,
 1989.980205997232,
 1844.2213373379752,
 1951.9725944795941,
 1982.9977733647736,
 1893.1463833825394,
 1912.5417674193895,
 1880.791551771725,
 1876.3159968892533,
 1980.5949973320403,
 1878.6012276482104,
 1905.8484657399335,
 1904.322333017581,
 1997.1955310422322,
 1961.4288173172295,
 1896.8921503331455,
 1903.0302998560226,
 1878.126867601444,
 1992.403886508728,
 1957.5806990035717,
 1926.6392568873607,
 1999.4630026228383,
 1873.785978888278,
 1908.1078217340728,
 1865.7125705190892,
 1925.3843223995948,
 2001.1087185320298,
 1941.7041146366603,
 1982.7200516450678,
 1963.625444656117,
 1983.3299928602103,
 1951.9525271588832,
 1990.7583917931393,
 1978.4347214395655,
 1978.8128252249874,
 1927.7109209458933,
 1892.2810873989572,
 1972.8386316984686,
 1975.758026921782,
 2012.8921174644602,
 1909.0618040644217,
 1874.5366616681504,
 1956.2376442146806,
 1896.7359278470897,
 1961.2012675300855,
 1907.4913255544798,
 1900.9409799297612,
 1999.8471774186446,
 1877.4389725884366,
 1910.3704612119786,
 1985.7453910649942,
 1895.3754286003357,
 1900.6122372878467,
 1882.5647269883825,
 1959.0719488534091,
 1938.4605579343192,
 2017.2513120335204,
 1912.5260919116963,
 1974.7313385320203,
 1989.745443701346,
 1938.4245245979862,
 1929.6074842166595,
 1954.5423059320547,
 1882.2754695126491,
 1935.8121365641807,
 1938.6702139413812,
 1916.708757315056,
 1954.7249863802867,
 1878.6354057929702,
 1966.0516995797793,
 1889.0091804647323,
 1924.0535140985937,
 1919.9763389543784,
 1961.7286336396717,
 1943.4206244063553,
 1902.4036927228083,
 1934.3911140392095,
 1927.6153635334535,
 1937.25195104676,
 1919.8379519749,
 1985.298628274291,
 1913.923957906324,
 1850.149196174956,
 1932.7024886100123,
 1900.2187076454893,
 1939.0316672605582,
 1867.0588890247586,
 1949.495514166902,
 1910.6909354939485,
 1869.0848622853389,
 1892.844610241327,
 1876.9393563739682,
 1872.619310363769,
 1909.0209990421888,
 1898.5385156179004,
 2028.5464265181251,
 1950.2972829214918,
 1945.4689072911763,
 1892.0561105467652,
 1841.8321317652428,
 2007.7594399604984,
 1994.3385584201324,
 1903.8663186815293,
 1910.3840870576093,
 1933.0478418159325,
 1858.20073863683,
 1976.7130661496035,
 1982.040243376582,
 1975.3098869633363,
 1813.6380817764837,
 1860.4868157935061,
 1935.0046786260443,
 1880.046840811149,
 1932.952816825448,
 1952.4720598644697,
 1870.5646754498086,
 1934.1484570897112,
 1922.2083803634544,
 1891.6971069725907,
 1891.5467619299338,
 1878.5767576306152,
 1911.5816456900134,
 1957.0981196781083,
 2010.0914659423524,
 1987.184997975979,
 1878.99541765087,
 1998.9214364221739,
 1961.8880572180435,
 1865.183948449711,
 1992.9028635837597,
 1872.982306810104,
 1884.6022374188033,
 1930.7051521998046,
 1963.9041521508807,
 1971.0852407172647,
 1887.1733177731649,
 1936.9469702482754,
 1945.9693069422215,
 1979.2544432205032,
 1936.2452800759104,
 1987.353371511696,
 1915.4200210532777,
 1980.7985504936808,
 1890.1343304958841,
 1945.5674792360787,
 1919.7995666478391,
 1898.3961649331218,
 1894.974827293279,
 1904.5408449507013,
 1882.2631677580428,
 1866.8029362871148,
 1975.7970493186356,
 1902.8758333839453,
 1910.9677377436387,
 1849.2149188417998,
 1941.0823103662578,
 1963.5667110234926,
 1845.6089273475236,
 2011.873464128537,
 1911.911235887522,
 1991.8821469836173,
 1988.5810448271409,
 1962.7943855912958,
 1971.3620340106725,
 1890.0206853087461,
 2002.9847346366628,
 1879.3320375844173,
 1959.8017355726524,
 1948.8167504689445,
 1966.07510819957,
 1943.8016609077754,
 1961.085714583202,
 1897.9084480496756,
 1939.2705898688241,
 1984.1174372482437,
 1894.2614550602684,
 1895.378665333734,
 1915.4541956029625,
 1898.7823852936374,
 1904.8481870505414,
 1925.5772044783346,
 1966.0083553646334,
 1970.1796762340264,
 1935.1144594596572,
 1953.3501712379602,
 1875.739909944408,
 1876.0020439900882,
 1922.3854141050526,
 1868.0372338851978,
 1910.386887479531,
 1912.1554002134365,
 1915.0931483151676,
 1877.3024203722453,
 1877.253732776828,
 1936.7250053078465,
 1891.3138920227263,
 1908.237673083131,
 1987.9054289917976,
 1903.70412954432,
 1924.723134373608,
 1935.1178725062973,
 1983.0734704185322,
 1887.8924504374272,
 2000.4219968377438,
 1964.3993615673073,
 1900.2773965603762,
 1945.1416027227583,
 1903.3293609712368,
 1912.3866847387694,
 2004.446428739491,
 1904.7627365539536,
 1967.4465180377595,
 1972.1396727420974,
 1980.936226267668,
 1875.6809858271633,
 1877.0996027882427,
 1899.0990556204817,
 2006.029272666147,
 1887.1795035137018,
 1959.825311477964,
 1873.5774368166688,
 1958.2965394815083,
 1938.629586105924,
 1961.2877235097023,
 1923.5597018664055,
 1877.0467396667511,
 1916.6969000825143,
 1918.6912186760549,
 1905.3932883567854,
 2016.8492832033414,
 1917.8502187688466,
 1997.930634551496,
 1952.9382103944763,
 1888.4202321997122,
 1997.8586048022291,
 1909.7063993235488,
 1915.3108929177517,
 1982.9538727811057,
 1944.9403640763462,
 1984.7807251960546,
 1893.8247275150788,
 1985.6464908849266,
 1906.7150684605235,
 1933.813408970437,
 1920.3355480971475,
 1966.0435836236693,
 1889.508247155757,
 1933.3023902482653,
 1900.9749268833702,
 1945.565080772876,
 1882.906035880404,
 1972.0366287341392,
 1965.8407065207352,
 1894.1130192088149,
 1901.9912969997608,
 1856.58679209275,
 1967.8215042527454,
 1960.7625135442524,
 1908.265617316176,
 1906.1022078975698,
 1947.4825120009198,
 1883.0886171054199,
 1849.384532378764,
 1997.3565973500004,
 1879.631204148954,
 1863.9069334385974,
 1939.442507249565,
 1880.284155197719,
 1920.4999317232227,
 1956.0941851429293,
 1881.8428126828428,
 1873.9931943742629,
 1912.1435026760844,
 1890.4741247877955,
 1888.0897378236189,
 1918.4669446644602,
 1915.481242667747,
 2003.4456748747414,
 1913.7750621282885,
 1942.1515832838024,
 1967.0954888427757,
 1973.769064486892,
 1886.0364053247679,
 1869.0669323217257,
 1963.3027731391126,
 1860.2443487225341,
 1947.9204945352824,
 1924.9861408278969,
 1896.8652515178303,
 1907.416622001296,
 1946.5469918185975,
 1973.1504571554392,
 1959.600365772672,
 1880.514244166832,
 1975.7280463592626,
 1923.622451684597,
 1947.191425545782,
 1845.657419451869,
 1932.6153478357207,
 1918.7427806164703,
 1899.3111707467451,
 1898.7032333126201,
 1905.313490156231,
 1943.6097346863887,
 1878.4498755800178,
 1976.0846843845834,
 1890.2547792572032,
 1935.9424074798671,
 1894.1704745324466,
 1898.3449892784472,
 1928.5379025421978,
 1924.7987243673292,
 1863.2744971359884,
 1895.9036036255122,
 1868.7377039973717,
 1991.5185385191858,
 1951.081363800528,
 1957.4394891862007,
 1883.4508459490064,
 1983.7480250855758,
 1854.9567904530027,
 1960.6111926293536,
 1877.028742663458,
 2014.8677711921764,
 1934.5806704594447,
 1879.7045652486677,
 1887.9558405565826,
 1961.148446911978,
 1940.3806342487594,
 1898.3593459263946,
 1928.1069760151158,
 1999.7021671213547,
 1965.709602489129,
 1901.414075729924,
 1916.1084440396407,
 1889.5802232574988,
 1834.8853796798744,
 1919.9677791707393,
 1882.6419652383686,
 1939.2728181505934,
 1866.5378591167123,
 1920.2380089752428,
 1971.7668190915151,
 1845.4748087916,
 1926.2873753619194,
 1900.147981373055,
 1899.8731274162872,
 1903.5003087786888,
 1944.1303549344123,
 1923.6782060598446,
 1913.3819728539138,
 2019.8382779681776,
 1859.7241668791253,
 1926.1538355477855,
 1910.4132105802744,
 1945.9066933813483,
 1973.4504693240144,
 1917.885621621536,
 1965.1006414033952,
 1974.9640405366608,
 1925.6726057202516,
 1880.3738703928516,
 1937.2607468583892,
 1967.120463480692,
 1889.0050273686927,
 1994.4271480086877,
 2020.8334723232865,
 1882.2153244944627,
 1995.1780962042299,
 1898.639109614116,
 1892.2172263046423,
 1812.2300253441358,
 1993.7791682978623,
 1990.2175977844472,
 1986.3692672050438,
 1878.719090318601,
 1943.6148264014284,
 1881.80440866435,
 1936.3813316342926,
 1954.8228271136843,
 1971.341057197389,
 1928.7839973974633,
 1904.812519808914,
 1904.6057806406875,
 1922.4456822284003,
 1948.8546449817368,
 1959.413341657744,
 1947.445687818313,
 1926.2747445496987,
 1970.7964810801,
 1963.7529754786158,
 1930.876845765326,
 1963.4173631449482,
 1946.5996967632987,
 1945.7566042299507,
 1957.5549256017232,
 1897.557343803956,
 1921.5502306452095,
 1861.8048856995517,
 1925.0399941968274,
 1997.936361590576,
 2016.8393064083377,
 1968.4939074250403,
 1966.2552349913524,
 1873.69753875608,
 1953.6582259950821,
 1883.8698068956824,
 1866.6558382790988,
 1966.887305865265,
 1811.2358128716035,
 1966.8694927449387,
 1859.2134360783393,
 1875.7882583823011,
 1875.885328559445,
 1920.177423500038,
 1898.0135959118645,
 1884.8490963908603,
 1998.7136290837036,
 1874.5719288320065,
 1867.387460254497,
 1868.498738201564,
 1855.6415016682688,
 1968.344430087269,
 1884.1770278326535,
 1858.1245344937417,
 1957.4767974163703,
 1928.2242948950288,
 1959.799832041844,
 1863.2870382940553,
 1965.8955231866128,
 1916.1117174521714,
 1961.0214280062808,
 1888.6510243560303,
 1914.3093093673049,
 1872.1488809145544,
 1977.2967621482942,
 1887.3103704745215,
 1896.2430973549995,
 1973.70723948697,
 1861.019558623386,
 1855.5898733760123,
 1998.2341246175683,
 1980.7894907736224,
 1998.195642767237,
 1907.3197846986038,
 1872.2436134786199,
 1951.0690397189594,
 1961.2413267879417,
 1949.1535364837407,
 1984.2926219813878,
 1906.805168495171,
 1928.2688835028935,
 1973.2873909472464,
 1971.3253413771658,
 1956.5875737213937,
 1913.7329066126233,
 1902.8683143563542,
 1992.4118744968048,
 1990.2222700625728,
 1880.186825425649,
 1908.3528531934837,
 1954.5031360066664,
 1937.8429001180739,
 1996.7634538302536,
 1953.700871570044,
 1989.6826696606722,
 1980.196360386968,
 1926.2765029959967,
 1856.7333879779712,
 1898.9687820318643,
 1884.3385479607375,
 1990.5615767605439,
 1904.2177124735524,
 1944.4390637660904,
 1936.7607553209934,
 1933.880402011111,
 1941.443914222045,
 1907.5003596648478,
 1912.7427554459225,
 1913.505533463578,
 1903.527300539802,
 1950.1286917810505,
 1878.3237530142912,
 1838.2360381679755,
 1992.5586601338437,
 1978.241472602853,
 1904.6328114074104,
 1944.5824155752014,
 1890.7451511125462,
 1925.506937826535,
 1912.6813292924421,
 1939.5156010976127,
 1913.896539311866,
 1908.814243995027,
 1905.8620731635556,
 1966.8390758422563,
 1993.626004811837,
 1898.0293404705553,
 1904.1739294404654,
 1844.990288132592,
 1904.0506389601924,
 1992.3716199749506,
 1910.4180396998674,
 1957.9068277841598,
 1944.7425351984177,
 1913.0376282270815,
 1954.4372674620581,
 1951.0750864942056,
 1889.2711483139337,
 1865.3747716079217,
 1961.4778119359578,
 1906.3350511041774,
 1893.1146127363825,
 1887.7672695961214,
 1876.9518834303092,
 1952.8729911355217,
 2022.2055319465876,
 1988.109484202296,
 1965.0773711496972,
 1847.6576789315934,
 1980.9078598563106,
 1962.1042085486288,
 1914.1562606199182,
 1931.6046911781903,
 1973.3946444545988,
 1909.5167601990083,
 1865.7520927582461,
 2017.8701247585288,
 1873.373785268034,
 2022.9669276228508,
 1876.9389836777134,
 1934.4554189938701,
 1886.0811728491344,
 1940.5896517740891,
 2005.5624057806835,
 1988.429145872561,
 1949.7084704364129,
 1912.0673726814002,
 1943.6616280780781,
 1902.4911851330662,
 1898.359301022406,
 1973.7834348374606,
 1992.6751186357935,
 1925.42892273177,
 1889.115283539258,
 1871.5809564942576,
 1983.2853658181978,
 1906.7775482120933,
 1885.1787722278157,
 1966.3616381595768,
 1882.3505114234142,
 1906.2881798782162,
 1904.422220512513,
 1914.6504517273959,
 1871.8761901035105,
 2024.219442042258,
 1973.369252749691,
 1887.5647116668251,
 1929.0754414213882,
 1928.7923843815515,
 1909.4382465906701,
 1907.573424448541,
 1871.6627100530366,
 1972.4839566375508,
 1984.2116286937562,
 1903.3531025711839,
 1876.1154826772988,
 1876.7992828544654,
 1988.2157555103115,
 1961.124473682253,
 1906.7788616584794,
 1933.1399463079408,
 1922.880744804141,
 1959.0697113874498,
 1880.568798416146,
 1963.0410567524332,
 1988.321911736551,
 1994.2993589559824,
 1951.4179113779994,
 1987.2788699273194,
 1861.827075535406,
 1900.8770349131742,
 1954.517580074843,
 2015.6003349970931,
 1872.6526597101185,
 1854.5550437802058,
 1936.0134512250845,
 1887.3278103841128,
 1859.0274146087274,
 1904.222212671927,
 1887.7215216912457,
 1905.3357484064622,
 1964.7278414864293,
 1908.0662411786582,
 1877.135875157637,
 1884.9634811324102,
 1868.449320118214,
 1990.3154263445153,
 1972.1024629768451,
 1911.824220696615,
 1883.0101964471687,
 1969.8639540297709,
 2011.299794538809,
 1994.3475220275445,
 1900.2313707725841,
 1977.1339037585412,
 1913.3511086001718,
 1933.094704073003,
 1878.5382028877639,
 1837.0273459652835,
 1899.309631923626,
 1901.8610264818178,
 1889.3094824723196,
 1955.6210116754937,
 1909.7472750032355,
 1911.0701075862094,
 1924.228393243427,
 1825.896261971044,
 1996.61121803294,
 1934.2873100387171,
 1916.0687419882647,
 1898.8667028132074,
 1887.1794598042625,
 2007.6563157791586,
 1888.0434029780008,
 1917.4981271139015,
 1897.7621460671314,
 1928.1101583864638,
 1904.4396654338339,
 1908.872739139619,
 1949.7332293140382,
 1861.9550247407835,
 1927.5982313507432,
 1985.1471429449919,
 1864.4474640783326,
 1969.6529023401035,
 2027.030750780562,
 1850.0796472817465,
 1935.1167810601,
 1919.769622802888,
 1972.1073226526953,
 1862.939082878527,
 2016.1920897433206,
 1904.7041627473259,
 1976.7011204288174,
 1904.9562485831075,
 1921.5078241054573,
 1942.6708306485136,
 1888.4404504068223,
 1821.128576222884,
 1898.5221820540537,
 1921.601631188711,
 1866.5364654075515,
 1931.4807591388173,
 1881.2920049987063,
 1926.581486003695,
 1859.148957035326,
 2009.8524044967871,
 1966.15674139277,
 1932.827429271749,
 1903.4191308759018,
 1866.6905090148641,
 1899.7041089054703,
 1970.3962557453099,
 1883.433188494066,
 1867.5301326278911,
 1875.4583922201366,
 1956.6869906825882,
 1882.9061759661683,
 1943.9281072726938,
 1954.5604107383313,
 1899.88431705705,
 1899.1381510884169,
 1944.1888595702228,
 1929.9250453133725,
 1944.4812768836796,
 1893.0843317921933,
 1924.1481718955015,
 1970.1310763644872,
 1980.003074000701,
 1970.4544261173733,
 1970.117611204133,
 1901.5453381466452,
 1996.6350487476893,
 1871.4818319482486,
 1803.2567772827604,
 1917.1431618448455,
 1896.0190834277564,
 1884.2755733156675,
 1886.6445079238981,
 1961.1488646334526,
 1989.8113588668118,
 1949.0976491806,
 1924.0193215922502,
 1884.0194398311587,
 1946.137400282998,
 1915.0813179614418,
 1900.398468015362,
 1877.8143106318446,
 1972.507432998812,
 1974.5680917654634,
 1979.2132043614506,
 1904.1782721019956,
 1944.7228050203491,
 2018.1652433606569,
 1873.695946900444,
 1922.063675787507,
 1945.096669054183,
 1933.31758271006,
 1806.0563588875757,
 1882.9125276711898,
 1922.7278771607798,
 1957.792773913764,
 1886.9850707766393,
 1938.9218115684087,
 1893.5402202319822,
 1873.8779212759093,
 1967.5000116222202,
 1857.5205811307808,
 1945.530223882965,
 1913.4767081081898,
 1865.0484604374728,
 1856.3422544921184,
 1945.9377160330773,
 1978.3387084971582,
 1885.5021606855905,
 2022.9898327550939,
 1967.1628080460607,
 1864.7649347418962,
 1880.0508826678795,
 1941.5698255960904,
 1933.629015165894,
 1939.8439834329172,
 1890.0979162287201,
 1916.5771901983862,
 1883.8516481083152,
 1878.24053945953,
 1985.5160862173716,
 1989.8780602913425,
 1971.6447522439034,
 1851.1044418329109,
 1892.090165697662,
 1914.3672427423217,
 1983.7253351348818,
 1996.1236556998692,
 1947.7464634258238,
 1962.4066362839749,
 1976.7972126593527,
 1913.9031538153315,
 1894.1320400997004,
 1984.0574277690243,
 1902.288876506764,
 1991.8261928282197,
 1913.6740391125727,
 1993.2777256792547,
 1833.899617414896,
 1917.3529577134238,
 1918.9020162369798,
 1870.9354262192994,
 1913.2614458476262,
 1894.3853835291916,
 1962.618708343185,
 1934.8204355998914,
 1882.5149612639555,
 1995.5601030679754,
 1823.8092671589106,
 1879.865392985855,
 1927.6903576280479,
 1831.1302321470475,
 1949.715639725068,
 1913.2977400570728,
 1933.2830482140976,
 1899.1880195930366,
 1878.2172023554276,
 1909.2493242188982,
 1992.7416919930713,
 1898.9208793527855,
 1967.615021809471,
 1935.3206527854504,
 1887.5767248157013,
 1921.3288389583533,
 1966.3470918064943,
 1878.4422359102068,
 1864.0358085268113,
 1899.3971589432376,
 1942.665497614408,
 1964.5672048396023,
 1925.4093645233268,
 1935.3836114043852,
 1960.0750475328223,
 1957.3427785716678,
 1983.4525976796908,
 1896.9108526076823,
 1931.1371726676014,
 1892.7798492802226,
 1849.016956630262,
 1997.8846223889063,
 1878.8758607480158,
 1931.3444770507742,
 1892.407630188242,
 1913.0741083046553,
 1932.6551779440877,
 1941.3578753386391,
 1914.226338970547,
 1957.6655600705421,
 1980.8787951385927,
 1953.5665940325464,
 1949.563840207515,
 1861.4924100504331,
 1975.4128939460966,
 1853.603963586428,
 1986.157437471871,
 1880.2288858021284,
 1861.820306758194,
 1952.6531428143544,
 1899.115949232256,
 1916.9680167579054,
 1877.8258171162502,
 1962.3903585286596,
 1886.916531864562,
 1841.930957363409,
 1930.5542021814604,
 1964.0010199183632,
 1908.199259550184,
 1938.7393051878835,
 1874.8075178326214,
 1947.872054104626,
 1869.0712833830244,
 1891.997933792317,
 1801.6871624808573,
 1915.1509398822552,
 1930.8087338453365,
 1844.9991661677643,
 1860.869065085889,
 1869.119251340592,
 1976.5166573379609,
 2011.4225912453735,
 1833.5461029266285,
 1920.666942551285,
 1855.4040050764775,
 1874.865674322178,
 1896.5402919396433,
 1889.1439963128105,
 1944.1571128026505,
 1929.7734724719219,
 1873.4986927998957,
 1921.181025439299,
 1985.736233625712,
 1974.198051131953,
 1951.8499945468718,
 1896.2101828782193,
 1794.87461351374,
 1990.506490700781,
 1927.3705934319676,
 1862.7072566652178,
 1884.043960151638,
 1869.3065277666806,
 1967.5802748461401,
 1971.1257593217683,
 1912.287000913006,
 1892.1263101146571,
 1823.5180835284089,
 1953.6068480142965,
 1948.1093661764355,
 1936.8700507646759,
 1932.8987056190367,
 1907.9399904109544,
 1889.9042168054352,
 1865.8257492161833,
 1895.6122769269289,
 1927.2550806942597,
 1947.1017828134638,
 1887.3881590801893,
 1975.182835936302,
 1852.4075262447977,
 1899.6644576117637,
 1953.4173371667555,
 1955.5130313675254,
 1961.3741804014905,
 1869.5431077917144,
 1889.0470959220895,
 1906.6462032789796,
 1947.679027289672,
 1860.3453544881834,
 1941.0945881910689,
 1899.5612501064272,
 1905.463235918544,
 1944.2055097363298,
 1929.6715761347862,
 1899.9591731468838,
 1883.154862581332,
 1918.4822955653233,
 1897.9173806449874,
 1989.5256344295049,
 1995.3606765674306,
 1887.4809902694656,
 1996.0218564366223,
 1942.0627265628384,
 1883.4575063509112,
 1865.3442352834923,
 1885.5959468129222,
 1911.94890525071,
 1954.6405852240466,
 1970.410576310064,
 1942.7530863380646,
 1894.1236052298514,
 1982.1555397260413,
 1929.0130759095262,
 1913.3987938350576,
 1899.6223967093342,
 1984.5670602579942,
 1904.106279777549,
 1964.3896314729066,
 1905.9634930456546,
 1995.9515816968415,
 1929.5472728419336,
 1850.4859391688447,
 1921.379677659449,
 1868.7601776826066,
 1961.0351229721293,
 2006.4486015191915,
 1888.5823934983741,
 1962.7713798373961,
 1866.9726424919484,
 1947.7508310821665,
 1935.1697485209838,
 1963.97456152255,
 1908.9080962147652,
 1956.8448853150003,
 1894.0126222499782,
 1888.73890687587,
 1873.3743150070563,
 1986.604303782963,
 1903.9596300114256,
 1886.005762440489,
 1872.002017661761,
 1958.033387583204,
 1869.8606365179419,
 1948.5427425941048,
 1958.5477773504133,
 2000.0645670860042,
 1928.4188880300246,
 1896.0473870913881,
 1888.1120727883574,
 1904.606927188032,
 1930.5658592700104,
 1846.7761326865093,
 1871.5533222689082,
 1901.688486168962,
 1869.610331679447,
 1869.5609629780158,
 1902.1613639974207,
 1849.2309579276064,
 1876.478259722388,
 1836.1432242769465,
 1938.2369489068303,
 1971.484408024392,
 ...]
import sklearn.metrics
import numpy as np

print('RMSE = ', np.sqrt(sklearn.metrics.mean_squared_error(dev0Expected, dev0Predicted)))
print('Model score = ', model.score(dev0X, dev0Expected))
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_regression.py:95: FutureWarning: Arrays of bytes/strings is being converted to decimal numbers if dtype='numeric'. This behavior is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Please convert your data to numeric values explicitly instead.
  y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
RMSE =  24.659364457329445
Model score =  0.8175627445862136
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_regression.py:95: FutureWarning: Arrays of bytes/strings is being converted to decimal numbers if dtype='numeric'. This behavior is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Please convert your data to numeric values explicitly instead.
  y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
predicted = [str(x) for x in dev0Predicted.tolist()]
writeOutput(predicted, 'dev-0/out.tsv')