fix reading files

This commit is contained in:
s444417 2022-05-17 20:57:47 +02:00
parent bea6c0cada
commit 097ebb7b0f
4 changed files with 42 additions and 37 deletions

View File

@ -2399,7 +2399,7 @@
1891.8254454938751
1994.973087857785
1895.7513842115543
1818.468449786985
1819.7853557275253
1916.4225469982782
1925.8444768767752
1803.6945227432166
@ -4770,7 +4770,7 @@
1877.9846992892226
1927.009228274752
1956.0342603495803
1888.728666465564
1888.0946097935803
1902.5565053201137
1919.9584774918735
1943.741259739466
@ -6336,7 +6336,8 @@
1974.1502369615996
1831.5375015711845
1838.1608467765225
1926.4517809943604
1915.3427108465514
1945.8932757740788
1851.5741435105465
1920.8586218289263
1828.99219378231
@ -11780,7 +11781,7 @@
1987.6178513090335
1976.1096335883008
1897.3822154643528
1804.1910303799948
1803.4968757212146
1793.8259724220343
1856.8856127048498
1935.8772372820815
@ -11868,7 +11869,7 @@
1906.6083785205117
1945.6961233591253
1877.8654541239662
1862.5549501707837
1863.0871196534576
1912.8091241588024
1878.5815561770557
1880.3702786837491
@ -13607,7 +13608,8 @@
1882.0478883198302
1834.685523281624
1844.0560097378213
1859.6210808559347
1872.743150588325
1893.1425762717217
1832.5381472381803
1908.4484935360613
1955.4009255116378
@ -14150,7 +14152,7 @@
1980.8143436550736
1907.5393156416155
1922.1638826574053
1844.7215182713494
1844.4184956010529
1890.065231312476
1863.7765604110195
1980.6354081854365
@ -17368,7 +17370,7 @@
1846.2550936280184
1979.23830132232
1800.1994574405205
1832.372260013428
1832.3808282396355
1935.3331588825008
2007.7035519353667
1909.090541851314
@ -19795,7 +19797,7 @@
1898.696569031822
1825.9802939645404
1905.4852331424952
1807.2101670002144
1807.0538386309568
1977.6639932896976
1943.9415272899018
1928.926654259205
@ -19996,5 +19998,3 @@
2004.0621537194256
1924.6022305381223
1967.4833548100212
1999.0
1999.0

1 1825.0344950703334
2399 1891.8254454938751
2400 1994.973087857785
2401 1895.7513842115543
2402 1818.468449786985 1819.7853557275253
2403 1916.4225469982782
2404 1925.8444768767752
2405 1803.6945227432166
4770 1877.9846992892226
4771 1927.009228274752
4772 1956.0342603495803
4773 1888.728666465564 1888.0946097935803
4774 1902.5565053201137
4775 1919.9584774918735
4776 1943.741259739466
6336 1974.1502369615996
6337 1831.5375015711845
6338 1838.1608467765225
6339 1926.4517809943604 1915.3427108465514
6340 1945.8932757740788
6341 1851.5741435105465
6342 1920.8586218289263
6343 1828.99219378231
11781 1987.6178513090335
11782 1976.1096335883008
11783 1897.3822154643528
11784 1804.1910303799948 1803.4968757212146
11785 1793.8259724220343
11786 1856.8856127048498
11787 1935.8772372820815
11869 1906.6083785205117
11870 1945.6961233591253
11871 1877.8654541239662
11872 1862.5549501707837 1863.0871196534576
11873 1912.8091241588024
11874 1878.5815561770557
11875 1880.3702786837491
13608 1882.0478883198302
13609 1834.685523281624
13610 1844.0560097378213
13611 1859.6210808559347 1872.743150588325
13612 1893.1425762717217
13613 1832.5381472381803
13614 1908.4484935360613
13615 1955.4009255116378
14152 1980.8143436550736
14153 1907.5393156416155
14154 1922.1638826574053
14155 1844.7215182713494 1844.4184956010529
14156 1890.065231312476
14157 1863.7765604110195
14158 1980.6354081854365
17370 1846.2550936280184
17371 1979.23830132232
17372 1800.1994574405205
17373 1832.372260013428 1832.3808282396355
17374 1935.3331588825008
17375 2007.7035519353667
17376 1909.090541851314
19797 1898.696569031822
19798 1825.9802939645404
19799 1905.4852331424952
19800 1807.2101670002144 1807.0538386309568
19801 1977.6639932896976
19802 1943.9415272899018
19803 1928.926654259205
19998 2004.0621537194256
19999 1924.6022305381223
20000 1967.4833548100212
1999.0
1999.0

View File

@ -5092,7 +5092,8 @@
1909.56242503168
1953.8328520988782
1921.8106923392882
1979.3749344320097
1923.8885604935026
2015.8703429697698
1976.9427187889949
1930.5965204821428
1917.0939862972396
@ -6910,7 +6911,7 @@
1961.1725384202596
1963.9217067157326
1893.4105498020222
1898.2814309257847
1899.2417818146473
1918.4752716026355
1917.6311354762902
1975.6025888121983
@ -10779,7 +10780,7 @@
1994.0961323923023
1938.0237957835966
1990.889018140576
1888.085728596831
1887.388047497512
1946.5265433699708
1929.3545648330125
1987.6455689218913
@ -10891,7 +10892,7 @@
1955.8104187740462
1926.633915138585
1935.1304506914537
1942.248290403189
1940.7148364219047
1893.7314353356096
1910.9624947958764
1958.6362159089115
@ -11560,4 +11561,3 @@
1935.8485031558441
1940.562491157199
1958.4093899338734
1999.0
1 1967.9383941311928
5092 1909.56242503168
5093 1953.8328520988782
5094 1921.8106923392882
5095 1979.3749344320097 1923.8885604935026
5096 2015.8703429697698
5097 1976.9427187889949
5098 1930.5965204821428
5099 1917.0939862972396
6911 1961.1725384202596
6912 1963.9217067157326
6913 1893.4105498020222
6914 1898.2814309257847 1899.2417818146473
6915 1918.4752716026355
6916 1917.6311354762902
6917 1975.6025888121983
10780 1994.0961323923023
10781 1938.0237957835966
10782 1990.889018140576
10783 1888.085728596831 1887.388047497512
10784 1946.5265433699708
10785 1929.3545648330125
10786 1987.6455689218913
10892 1955.8104187740462
10893 1926.633915138585
10894 1935.1304506914537
10895 1942.248290403189 1940.7148364219047
10896 1893.7314353356096
10897 1910.9624947958764
10898 1958.6362159089115
11561 1935.8485031558441
11562 1940.562491157199
11563 1958.4093899338734
1999.0

View File

@ -504,29 +504,34 @@
},
{
"cell_type": "code",
"execution_count": 127,
"execution_count": 157,
"metadata": {},
"outputs": [],
"source": [
"testXPath = \"./dev-1/in.tsv\"\n",
"testYPath = \"./dev-1/out.tsv\"\n",
"testXPath = \"./test-A/in.tsv\"\n",
"testYPath = \"./test-A/out.tsv\"\n",
"\n",
"testX = pd.read_csv(testXPath, sep='\\t', nrows=nrows, header=None)\n",
"# testX = pd.read_csv(testXPath, sep='\\t', nrows=nrows, header=None, skip_blank_lines=False)\n",
"# testY = pd.read_csv(testYPath, sep='\\t', nrows=nrows, header=None)\n",
"reviews = []\n",
"with open(testXPath, 'r', encoding=\"utf8\") as openfile:\n",
" for line in openfile:\n",
" reviews.append(line)\n",
" testX = pd.DataFrame(reviews)\n",
"testXtfidfVector = vectorizer.transform(testX[0])\n"
]
},
{
"cell_type": "code",
"execution_count": 128,
"execution_count": 158,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"11562\n",
"11562\n"
"14220\n",
"14220\n"
]
}
],
@ -538,14 +543,14 @@
},
{
"cell_type": "code",
"execution_count": 117,
"execution_count": 159,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"11562\n"
"14220\n"
]
}
],

View File

@ -206,7 +206,7 @@
1910.269907491521
1988.9016465203608
1872.3206779400325
1872.866105906589
1871.876258621021
1868.1361140797396
1935.5402895297493
1907.5442950788085
@ -961,7 +961,7 @@
1911.978549223843
1934.9192698619865
2013.3192694175561
1909.715199874406
1910.4291713668608
1957.179565988373
1971.0504703681663
2000.595229979751
@ -1458,7 +1458,7 @@
1903.1783917758141
1904.1313760354244
1897.4844568570802
1897.9485726722803
1899.0967382020651
1930.404429576726
1882.5053743232945
1920.0283716116683
@ -1944,7 +1944,7 @@
1955.0631304427375
1931.9425886889717
1925.5842582504251
1893.3593433923945
1893.5914351450795
1835.5017742683756
1922.5070233258218
1901.9521531654223
@ -3448,7 +3448,8 @@
1981.3948298971816
1950.9010374740571
1970.0023726367149
1969.9987622640303
1954.420557110378
1970.891743103846
1923.2064408497374
1893.2345014590535
1954.7696173042737
@ -5241,7 +5242,7 @@
1978.228175885359
1903.1104713527263
1998.1846287501376
1852.5503648213512
1852.7087407508104
1906.571359305605
1916.7043428211625
1933.8425059929073
@ -6326,7 +6327,7 @@
1900.1304844266074
1961.809316865927
1904.458851015373
1914.3185871224223
1911.9112181775058
1966.2085064344847
1949.9880698367597
1833.617092397901
@ -8450,7 +8451,7 @@
1940.4785618713847
1933.4202727244033
1913.2727194544962
1939.6872570033222
1939.8872547798828
1905.9918163328934
1939.3018559789575
1938.7494877668814
@ -10178,7 +10179,7 @@
1934.930332546927
1905.631198335156
1968.838577126508
1906.9780186196351
1908.0074065694862
1914.1797648588122
1977.055073972757
1972.2950606109712
@ -11287,7 +11288,7 @@
1931.7839823275406
1856.130787467182
1967.186168494344
1904.958080903425
1902.3830435931652
1925.7029500805036
1964.9327312548162
1904.5121779427361
@ -13417,7 +13418,7 @@
1991.6331877838395
1908.055685337203
1987.8174233479353
1864.500047672751
1866.8568255755872
1952.1602111681595
1950.9861520180957
1906.0082808001252
@ -14217,4 +14218,3 @@
1936.0490932436148
1990.1215448613398
1913.0711081496029
1999.0
1 1984.7906451252588
206 1910.269907491521
207 1988.9016465203608
208 1872.3206779400325
209 1872.866105906589 1871.876258621021
210 1868.1361140797396
211 1935.5402895297493
212 1907.5442950788085
961 1911.978549223843
962 1934.9192698619865
963 2013.3192694175561
964 1909.715199874406 1910.4291713668608
965 1957.179565988373
966 1971.0504703681663
967 2000.595229979751
1458 1903.1783917758141
1459 1904.1313760354244
1460 1897.4844568570802
1461 1897.9485726722803 1899.0967382020651
1462 1930.404429576726
1463 1882.5053743232945
1464 1920.0283716116683
1944 1955.0631304427375
1945 1931.9425886889717
1946 1925.5842582504251
1947 1893.3593433923945 1893.5914351450795
1948 1835.5017742683756
1949 1922.5070233258218
1950 1901.9521531654223
3448 1981.3948298971816
3449 1950.9010374740571
3450 1970.0023726367149
3451 1969.9987622640303 1954.420557110378
3452 1970.891743103846
3453 1923.2064408497374
3454 1893.2345014590535
3455 1954.7696173042737
5242 1978.228175885359
5243 1903.1104713527263
5244 1998.1846287501376
5245 1852.5503648213512 1852.7087407508104
5246 1906.571359305605
5247 1916.7043428211625
5248 1933.8425059929073
6327 1900.1304844266074
6328 1961.809316865927
6329 1904.458851015373
6330 1914.3185871224223 1911.9112181775058
6331 1966.2085064344847
6332 1949.9880698367597
6333 1833.617092397901
8451 1940.4785618713847
8452 1933.4202727244033
8453 1913.2727194544962
8454 1939.6872570033222 1939.8872547798828
8455 1905.9918163328934
8456 1939.3018559789575
8457 1938.7494877668814
10179 1934.930332546927
10180 1905.631198335156
10181 1968.838577126508
10182 1906.9780186196351 1908.0074065694862
10183 1914.1797648588122
10184 1977.055073972757
10185 1972.2950606109712
11288 1931.7839823275406
11289 1856.130787467182
11290 1967.186168494344
11291 1904.958080903425 1902.3830435931652
11292 1925.7029500805036
11293 1964.9327312548162
11294 1904.5121779427361
13418 1991.6331877838395
13419 1908.055685337203
13420 1987.8174233479353
13421 1864.500047672751 1866.8568255755872
13422 1952.1602111681595
13423 1950.9861520180957
13424 1906.0082808001252
14218 1936.0490932436148
14219 1990.1215448613398
14220 1913.0711081496029
1999.0