diff --git a/dev-0/out.tsv b/dev-0/out.tsv index d079673..bad7409 100644 --- a/dev-0/out.tsv +++ b/dev-0/out.tsv @@ -2399,7 +2399,7 @@ 1891.8254454938751 1994.973087857785 1895.7513842115543 -1818.468449786985 +1819.7853557275253 1916.4225469982782 1925.8444768767752 1803.6945227432166 @@ -4770,7 +4770,7 @@ 1877.9846992892226 1927.009228274752 1956.0342603495803 -1888.728666465564 +1888.0946097935803 1902.5565053201137 1919.9584774918735 1943.741259739466 @@ -6336,7 +6336,8 @@ 1974.1502369615996 1831.5375015711845 1838.1608467765225 -1926.4517809943604 +1915.3427108465514 +1945.8932757740788 1851.5741435105465 1920.8586218289263 1828.99219378231 @@ -11780,7 +11781,7 @@ 1987.6178513090335 1976.1096335883008 1897.3822154643528 -1804.1910303799948 +1803.4968757212146 1793.8259724220343 1856.8856127048498 1935.8772372820815 @@ -11868,7 +11869,7 @@ 1906.6083785205117 1945.6961233591253 1877.8654541239662 -1862.5549501707837 +1863.0871196534576 1912.8091241588024 1878.5815561770557 1880.3702786837491 @@ -13607,7 +13608,8 @@ 1882.0478883198302 1834.685523281624 1844.0560097378213 -1859.6210808559347 +1872.743150588325 +1893.1425762717217 1832.5381472381803 1908.4484935360613 1955.4009255116378 @@ -14150,7 +14152,7 @@ 1980.8143436550736 1907.5393156416155 1922.1638826574053 -1844.7215182713494 +1844.4184956010529 1890.065231312476 1863.7765604110195 1980.6354081854365 @@ -17368,7 +17370,7 @@ 1846.2550936280184 1979.23830132232 1800.1994574405205 -1832.372260013428 +1832.3808282396355 1935.3331588825008 2007.7035519353667 1909.090541851314 @@ -19795,7 +19797,7 @@ 1898.696569031822 1825.9802939645404 1905.4852331424952 -1807.2101670002144 +1807.0538386309568 1977.6639932896976 1943.9415272899018 1928.926654259205 @@ -19996,5 +19998,3 @@ 2004.0621537194256 1924.6022305381223 1967.4833548100212 -1999.0 -1999.0 diff --git a/dev-1/out.tsv b/dev-1/out.tsv index d46053f..c2f49c1 100644 --- a/dev-1/out.tsv +++ b/dev-1/out.tsv @@ -5092,7 +5092,8 @@ 1909.56242503168 1953.8328520988782 1921.8106923392882 -1979.3749344320097 +1923.8885604935026 +2015.8703429697698 1976.9427187889949 1930.5965204821428 1917.0939862972396 @@ -6910,7 +6911,7 @@ 1961.1725384202596 1963.9217067157326 1893.4105498020222 -1898.2814309257847 +1899.2417818146473 1918.4752716026355 1917.6311354762902 1975.6025888121983 @@ -10779,7 +10780,7 @@ 1994.0961323923023 1938.0237957835966 1990.889018140576 -1888.085728596831 +1887.388047497512 1946.5265433699708 1929.3545648330125 1987.6455689218913 @@ -10891,7 +10892,7 @@ 1955.8104187740462 1926.633915138585 1935.1304506914537 -1942.248290403189 +1940.7148364219047 1893.7314353356096 1910.9624947958764 1958.6362159089115 @@ -11560,4 +11561,3 @@ 1935.8485031558441 1940.562491157199 1958.4093899338734 -1999.0 \ No newline at end of file diff --git a/run.ipynb b/run.ipynb index 486fbdb..511eb4b 100644 --- a/run.ipynb +++ b/run.ipynb @@ -504,29 +504,34 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 157, "metadata": {}, "outputs": [], "source": [ - "testXPath = \"./dev-1/in.tsv\"\n", - "testYPath = \"./dev-1/out.tsv\"\n", + "testXPath = \"./test-A/in.tsv\"\n", + "testYPath = \"./test-A/out.tsv\"\n", "\n", - "testX = pd.read_csv(testXPath, sep='\\t', nrows=nrows, header=None)\n", + "# testX = pd.read_csv(testXPath, sep='\\t', nrows=nrows, header=None, skip_blank_lines=False)\n", "# testY = pd.read_csv(testYPath, sep='\\t', nrows=nrows, header=None)\n", + "reviews = []\n", + "with open(testXPath, 'r', encoding=\"utf8\") as openfile:\n", + " for line in openfile:\n", + " reviews.append(line)\n", + " testX = pd.DataFrame(reviews)\n", "testXtfidfVector = vectorizer.transform(testX[0])\n" ] }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 158, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "11562\n", - "11562\n" + "14220\n", + "14220\n" ] } ], @@ -538,14 +543,14 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 159, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "11562\n" + "14220\n" ] } ], diff --git a/test-A/out.tsv b/test-A/out.tsv index ee694fa..67acd41 100644 --- a/test-A/out.tsv +++ b/test-A/out.tsv @@ -206,7 +206,7 @@ 1910.269907491521 1988.9016465203608 1872.3206779400325 -1872.866105906589 +1871.876258621021 1868.1361140797396 1935.5402895297493 1907.5442950788085 @@ -961,7 +961,7 @@ 1911.978549223843 1934.9192698619865 2013.3192694175561 -1909.715199874406 +1910.4291713668608 1957.179565988373 1971.0504703681663 2000.595229979751 @@ -1458,7 +1458,7 @@ 1903.1783917758141 1904.1313760354244 1897.4844568570802 -1897.9485726722803 +1899.0967382020651 1930.404429576726 1882.5053743232945 1920.0283716116683 @@ -1944,7 +1944,7 @@ 1955.0631304427375 1931.9425886889717 1925.5842582504251 -1893.3593433923945 +1893.5914351450795 1835.5017742683756 1922.5070233258218 1901.9521531654223 @@ -3448,7 +3448,8 @@ 1981.3948298971816 1950.9010374740571 1970.0023726367149 -1969.9987622640303 +1954.420557110378 +1970.891743103846 1923.2064408497374 1893.2345014590535 1954.7696173042737 @@ -5241,7 +5242,7 @@ 1978.228175885359 1903.1104713527263 1998.1846287501376 -1852.5503648213512 +1852.7087407508104 1906.571359305605 1916.7043428211625 1933.8425059929073 @@ -6326,7 +6327,7 @@ 1900.1304844266074 1961.809316865927 1904.458851015373 -1914.3185871224223 +1911.9112181775058 1966.2085064344847 1949.9880698367597 1833.617092397901 @@ -8450,7 +8451,7 @@ 1940.4785618713847 1933.4202727244033 1913.2727194544962 -1939.6872570033222 +1939.8872547798828 1905.9918163328934 1939.3018559789575 1938.7494877668814 @@ -10178,7 +10179,7 @@ 1934.930332546927 1905.631198335156 1968.838577126508 -1906.9780186196351 +1908.0074065694862 1914.1797648588122 1977.055073972757 1972.2950606109712 @@ -11287,7 +11288,7 @@ 1931.7839823275406 1856.130787467182 1967.186168494344 -1904.958080903425 +1902.3830435931652 1925.7029500805036 1964.9327312548162 1904.5121779427361 @@ -13417,7 +13418,7 @@ 1991.6331877838395 1908.055685337203 1987.8174233479353 -1864.500047672751 +1866.8568255755872 1952.1602111681595 1950.9861520180957 1906.0082808001252 @@ -14217,4 +14218,3 @@ 1936.0490932436148 1990.1215448613398 1913.0711081496029 -1999.0 \ No newline at end of file