Merge branch 'master' of ssh://re-research.wmi.amu.edu.pl:1977/gonito

This commit is contained in:
Filip Gralinski 2017-09-22 14:52:11 +02:00
commit 0a534c093d
8 changed files with 59 additions and 6 deletions

1
static/css/bootstrap-table.min.css vendored Normal file

File diff suppressed because one or more lines are too long

BIN
static/images/amu-logo.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

BIN
static/images/sample1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 224 KiB

8
static/js/bootstrap-table.min.js vendored Normal file

File diff suppressed because one or more lines are too long

5
static/js/jquery-1.11.3.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,26 @@
$.getJSON( "/static/js/years-stats.json").done(function( data ) {
$(function () {
$("#gcontainerX01").highcharts({
title: {
text: ''
},
yAxis: {
title: {
text: ''
},
plotLines: [{
value: 0,
width: 1,
color: '#808080'
}]
},
legend: {
layout: 'vertical',
align: 'right',
verticalAlign: 'middle',
borderWidth: 0
},
series: data
});
});
});

View File

@ -0,0 +1 @@
[{"data":[[1750,196],[1751,111],[1752,277],[1753,154],[1754,306],[1755,146],[1756,409],[1757,440],[1758,291],[1759,284],[1760,334],[1761,403],[1762,1035],[1763,689],[1764,505],[1765,573],[1766,888],[1767,600],[1768,517],[1769,516],[1770,408],[1771,396],[1772,381],[1773,398],[1774,293],[1775,258],[1776,450],[1777,412],[1778,487],[1779,500],[1780,593],[1781,487],[1782,322],[1783,482],[1784,593],[1785,360],[1786,471],[1787,527],[1788,483],[1789,446],[1790,526],[1791,638],[1792,638],[1793,561],[1794,460],[1795,275],[1796,483],[1797,541],[1798,580],[1799,506],[1800,905],[1801,744],[1802,862],[1803,755],[1804,611],[1805,729],[1806,810],[1807,1010],[1808,923],[1809,747],[1810,755],[1811,968],[1812,1203],[1813,1081],[1814,1227],[1815,1236],[1816,1425],[1817,1930],[1818,2141],[1819,2457],[1820,2420],[1821,2505],[1822,2891],[1823,2325],[1824,2667],[1825,2514],[1826,2614],[1827,3316],[1828,3877],[1829,3343],[1830,4869],[1831,4664],[1832,4220],[1833,3890],[1834,4079],[1835,4292],[1836,5096],[1837,5042],[1838,5073],[1839,5017],[1840,4811],[1841,4476],[1842,4573],[1843,4717],[1844,4824],[1845,4963],[1846,4465],[1847,3955],[1848,5224],[1849,4609],[1850,5189],[1851,4378],[1852,4461],[1853,4255],[1854,4563],[1855,4916],[1856,6204],[1857,3948],[1858,4567],[1859,5327],[1860,5768],[1861,5532],[1862,5451],[1863,5726],[1864,5870],[1865,6917],[1866,6378],[1867,6334],[1868,6698],[1869,6196],[1870,7330],[1871,6208],[1872,6585],[1873,7322],[1874,7679],[1875,8647],[1876,7284],[1877,8427],[1878,8367],[1879,9017],[1880,9925],[1881,9519],[1882,10547],[1883,10982],[1884,10629],[1885,11358],[1886,11143],[1887,11467],[1888,11303],[1889,12101],[1890,11684],[1891,10802],[1892,11088],[1893,11340],[1894,12458],[1895,14029],[1896,13920],[1897,14399],[1898,15333],[1899,13407],[1900,16438],[1901,15985],[1902,15982],[1903,15690],[1904,15756],[1905,16221],[1906,17343],[1907,18327],[1908,18960],[1909,17593],[1910,18333],[1911,17203],[1912,18479],[1913,18140],[1914,17608],[1915,19320],[1916,18953],[1917,19210],[1918,19030],[1919,19110],[1920,20004],[1921,18660],[1922,20772],[1923,19946],[1924,24044],[1925,28551],[1926,30385],[1927,30418],[1928,32456],[1929,31990],[1930,32962],[1931,30590],[1932,30437],[1933,30498],[1934,28593],[1935,29084],[1936,28664],[1937,28143],[1938,30803],[1939,19858],[1940,6492],[1941,7068],[1942,6320],[1943,6470],[1944,7670],[1945,12393],[1946,10847],[1947,11731],[1948,11301],[1949,11153],[1950,15044],[1951,13405],[1952,12835],[1953,12649],[1954,13452],[1955,7626],[1956,6919],[1957,7074],[1958,7556],[1959,8194],[1960,7761],[1961,7559],[1962,6998],[1963,9716],[1964,9248],[1965,10292],[1966,10332],[1967,9950],[1968,9122],[1969,9091],[1970,9573],[1971,9861],[1972,10673],[1973,8714],[1974,14117],[1975,14955],[1976,13943],[1977,14415],[1978,12671],[1979,12637],[1980,16954],[1981,29436],[1982,12787],[1983,12684],[1984,12267],[1985,13128],[1986,13002],[1987,13149],[1988,13490],[1989,15131],[1990,12440],[1991,10878],[1992,13120],[1993,19776],[1994,31670],[1995,40765],[1996,44241],[1997,34074],[1998,45850],[1999,28224],[2000,27371],[2001,31841],[2002,23885],[2003,19042],[2004,26928],[2005,32251],[2006,32068],[2007,41350],[2008,41560],[2009,45673],[2010,45566],[2011,48377],[2012,61339],[2013,94367],[2014,92056],[2015,83766],[2016,35200]],"name":"items"}]

View File

@ -1,16 +1,19 @@
<script src="/static/js/sigma.min.js">
<script src="/static/js/sigma.parsers.json.min.js">
<script type="text/javascript" src="/static/js/jquery-1.11.3.min.js">
<script type="text/javascript" src="http://code.highcharts.com/highcharts.js">
<div id="title" class="step" data-x="0" data-y="0">
<h1>RetroC challenge
<p>how to guess the publication year of a text?
<p>how to guess the publication year of a text?
<table>
<tr>
<td>
<p class="footnote">Filip Graliński, Rafał Jaworski,<br/>Łukasz Borchmann, Piotr Wierzchoń&nbsp;&nbsp;&nbsp;
<td>
<center>
<img src="https://siw.amu.edu.pl/__data/assets/file/0006/162987/logotyp_wersja-pozioma_granat_ENG_1.jpg" width="300">
<img src="/static/images/amu-logo.jpg" width="300">
<p class="footnote">DATeCH 2017
<div class="step slide" data-x="0" data-y="1000">
@ -28,7 +31,10 @@
<li>Polish digital libraries (OCRed DjVus/PDFs)
<li>digital-born material, (pre-)history of Polish Internet, manually transcribed, grassroots digitization efforts, <i>samiskan</i> etc.
<ul>
<li>… anything that is timestamped
<li>… anything in Polish that is timestamped
<center>
<img src="/static/images/sample1.png">
<div class="step slide" data-x="1000" data-y="2000">
<h2>Raw corpus in numbers
@ -42,6 +48,10 @@
<ul>
<li>… (also German if you're interested — 350K publications)
<div class="graph">
<div id="gcontainerX01" style="width:100%; height:300px;">
<script type="text/javascript" src="/static/js/years-stats-draw.js">
<div class="step slide" data-x="0" data-y="3000">
<h2>The challenge…
@ -54,9 +64,9 @@
git clone git://gonito.net/retroc
<div class="step slide" data-x="1000" data-y="3000">
<p>Core assumption
<p>Core motivation
<ul>
<li>the temporal classifier will be used to date historical texts with no publication date given in metadata
<li>temporal classifier will be used to date historical texts with no publication date given in metadata
<h2>Assumptions
<ul>
@ -95,7 +105,7 @@
<td align="right">11.5K
<td align="right">14.2K
<p style="padding-top: 20px">RetroC2
<p style="padding-top: 30px">RetroC2
<ul>
<li>train set contains more information (source is given for each entry),<br/>but this is <b>not</b> present in the test set
<li>timestamps given as years with possible fractions<br/> (if publication day/month is known)
@ -119,6 +129,8 @@
<li>small NN (6 units)
<li>weights inversely proportional to (the root of) year frequency in the train set
<p style="padding-top: 30px">The result to beat:&nbsp;&nbsp;<span style="font-size:64px">RMSE=24.8 years</span>
<div class="step slide" style="height: 800px;" data-x="2000" data-y="4000">
<h2>RetroC2