forked from tdwojak/Python2019
add labs05
This commit is contained in:
parent
e6ab48cc18
commit
5b75c30384
BIN
labs05/220px-KnnClassification.svg.png
Normal file
BIN
labs05/220px-KnnClassification.svg.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 6.1 KiB |
111070
labs05/311.csv
Normal file
111070
labs05/311.csv
Normal file
File diff suppressed because it is too large
Load Diff
177
labs05/gapminder.csv
Normal file
177
labs05/gapminder.csv
Normal file
@ -0,0 +1,177 @@
|
||||
,female_BMI,male_BMI,gdp,population,under5mortality,life_expectancy,fertility
|
||||
Afghanistan,21.07402,20.62058,1311.0,26528741.0,110.4,52.8,6.2
|
||||
Albania,25.65726,26.44657,8644.0,2968026.0,17.9,76.8,1.76
|
||||
Algeria,26.368409999999997,24.5962,12314.0,34811059.0,29.5,75.5,2.73
|
||||
Angola,23.48431,22.25083,7103.0,19842251.0,192.0,56.7,6.43
|
||||
Antigua and Barbuda,27.50545,25.76602,25736.0,85350.0,10.9,75.5,2.16
|
||||
Argentina,27.46523,27.5017,14646.0,40381860.0,15.4,75.4,2.24
|
||||
Armenia,27.1342,25.355420000000002,7383.0,2975029.0,20.0,72.3,1.4
|
||||
Australia,26.87777,27.56373,41312.0,21370348.0,5.2,81.6,1.96
|
||||
Austria,25.09414,26.467409999999997,43952.0,8331465.0,4.6,80.4,1.41
|
||||
Azerbaijan,27.50879,25.65117,14365.0,8868713.0,43.3,69.2,1.99
|
||||
Bahamas,29.13948,27.24594,24373.0,348587.0,14.5,72.2,1.89
|
||||
Bahrain,28.790940000000003,27.83721,42507.0,1115777.0,9.4,77.6,2.23
|
||||
Bangladesh,20.54531,20.39742,2265.0,148252473.0,55.9,68.3,2.38
|
||||
Barbados,29.221690000000002,26.384390000000003,16075.0,277315.0,15.4,75.3,1.83
|
||||
Belarus,26.641859999999998,26.16443,14488.0,9526453.0,7.2,70.0,1.42
|
||||
Belgium,25.1446,26.75915,41641.0,10779155.0,4.7,79.6,1.82
|
||||
Belize,29.81663,27.02255,8293.0,306165.0,20.1,70.7,2.91
|
||||
Benin,23.74026,22.41835,1646.0,8973525.0,116.3,59.7,5.27
|
||||
Bhutan,22.88243,22.8218,5663.0,694990.0,48.1,70.7,2.51
|
||||
Bolivia,26.8633,24.43335,5066.0,9599916.0,52.0,71.2,3.48
|
||||
Bosnia and Herzegovina,26.35874,26.611629999999998,9316.0,3839749.0,8.1,77.5,1.22
|
||||
Botswana,26.09156,22.129839999999998,13858.0,1967866.0,63.8,53.2,2.86
|
||||
Brazil,25.99113,25.78623,13906.0,194769696.0,18.6,73.2,1.9
|
||||
Brunei,22.892310000000002,24.18179,72351.0,380786.0,9.0,76.9,2.1
|
||||
Bulgaria,25.51574,26.542859999999997,15368.0,7513646.0,13.7,73.2,1.43
|
||||
Burkina Faso,21.63031,21.27157,1358.0,14709011.0,130.4,58.0,6.04
|
||||
Burundi,21.27927,21.50291,723.0,8821795.0,108.6,59.1,6.48
|
||||
Cambodia,21.69608,20.80496,2442.0,13933660.0,51.5,66.1,3.05
|
||||
Cameroon,24.9527,23.681729999999998,2571.0,19570418.0,113.8,56.6,5.17
|
||||
Canada,26.698290000000004,27.4521,41468.0,33363256.0,5.8,80.8,1.68
|
||||
Cape Verde,24.96136,23.515220000000003,6031.0,483824.0,28.4,70.4,2.57
|
||||
Chad,21.95424,21.485689999999998,1753.0,11139740.0,168.0,54.3,6.81
|
||||
Chile,27.92807,27.015420000000002,18698.0,16645940.0,8.9,78.5,1.89
|
||||
China,22.91041,22.92176,7880.0,1326690636.0,18.5,73.4,1.53
|
||||
Colombia,26.22529,24.94041,10489.0,44901660.0,19.7,76.2,2.43
|
||||
Comoros,22.444329999999997,22.06131,1440.0,665414.0,91.2,67.1,5.05
|
||||
"Congo, Dem. Rep.",21.6677,19.86692,607.0,61809278.0,124.5,57.5,6.45
|
||||
"Congo, Rep.",23.10824,21.87134,5022.0,3832771.0,72.6,58.8,5.1
|
||||
Costa Rica,27.03497,26.47897,12219.0,4429506.0,10.3,79.8,1.91
|
||||
Cote d'Ivoire,23.82088,22.56469,2854.0,19261647.0,116.9,55.4,4.91
|
||||
Croatia,25.17882,26.596290000000003,21873.0,4344151.0,5.9,76.2,1.43
|
||||
Cuba,26.576140000000002,25.06867,17765.0,11290239.0,6.3,77.6,1.5
|
||||
Cyprus,25.92587,27.41899,35828.0,1077010.0,4.2,80.0,1.49
|
||||
Denmark,25.106270000000002,26.13287,45017.0,5495302.0,4.3,78.9,1.89
|
||||
Djibouti,24.38177,23.38403,2502.0,809639.0,81.0,61.8,3.76
|
||||
Ecuador,27.062690000000003,25.58841,9244.0,14447600.0,26.8,74.7,2.73
|
||||
Egypt,30.099970000000003,26.732429999999997,9974.0,78976122.0,31.4,70.2,2.95
|
||||
El Salvador,27.84092,26.36751,7450.0,6004199.0,21.6,73.7,2.32
|
||||
Equatorial Guinea,24.528370000000002,23.7664,40143.0,686223.0,118.4,57.5,5.31
|
||||
Eritrea,21.082320000000003,20.885089999999998,1088.0,4500638.0,60.4,60.1,5.16
|
||||
Estonia,25.185979999999997,26.264459999999996,24743.0,1339941.0,5.5,74.2,1.62
|
||||
Ethiopia,20.71463,20.247,931.0,83079608.0,86.9,60.0,5.19
|
||||
Fiji,29.339409999999997,26.53078,7129.0,843206.0,24.0,64.9,2.74
|
||||
Finland,25.58418,26.733390000000004,42122.0,5314170.0,3.3,79.6,1.85
|
||||
France,24.82949,25.853289999999998,37505.0,62309529.0,4.3,81.1,1.97
|
||||
Gabon,25.95121,24.0762,15800.0,1473741.0,68.0,61.7,4.28
|
||||
Gambia,24.82101,21.65029,1566.0,1586749.0,87.4,65.7,5.8
|
||||
Georgia,26.45014,25.54942,5900.0,4343290.0,19.3,71.8,1.79
|
||||
Germany,25.73903,27.165090000000003,41199.0,80665906.0,4.4,80.0,1.37
|
||||
Ghana,24.33014,22.842470000000002,2907.0,23115919.0,79.9,62.0,4.19
|
||||
Greece,24.92026,26.33786,32197.0,11161755.0,4.9,80.2,1.46
|
||||
Grenada,27.31948,25.179879999999997,12116.0,103934.0,13.5,70.8,2.28
|
||||
Guatemala,26.84324,25.29947,6960.0,14106687.0,36.9,71.2,4.12
|
||||
Guinea,22.45206,22.52449,1230.0,10427356.0,121.0,57.1,5.34
|
||||
Guinea-Bissau,22.92809,21.64338,1326.0,1561293.0,127.6,53.6,5.25
|
||||
Guyana,26.470190000000002,23.68465,5208.0,748096.0,41.9,65.0,2.74
|
||||
Haiti,23.27785,23.66302,1600.0,9705130.0,83.3,61.0,3.5
|
||||
Honduras,26.73191,25.10872,4391.0,7259470.0,26.5,71.8,3.27
|
||||
"Hong Kong, China",23.71046,25.057470000000002,46635.0,6910384.0,3.06,82.49,1.04
|
||||
Hungary,25.97839,27.115679999999998,23334.0,10050699.0,7.2,73.9,1.33
|
||||
Iceland,26.02599,27.206870000000002,42294.0,310033.0,2.7,82.4,2.12
|
||||
India,21.31478,20.95956,3901.0,1197070109.0,65.6,64.7,2.64
|
||||
Indonesia,22.986929999999997,21.85576,7856.0,235360765.0,36.2,69.4,2.48
|
||||
Iran,27.236079999999998,25.310029999999998,15955.0,72530693.0,21.4,73.1,1.88
|
||||
Iraq,28.411170000000002,26.71017,11616.0,29163327.0,38.3,66.6,4.34
|
||||
Ireland,26.62176,27.65325,47713.0,4480145.0,4.5,80.1,2.0
|
||||
Israel,27.301920000000003,27.13151,28562.0,7093808.0,4.9,80.6,2.92
|
||||
Italy,24.79289,26.4802,37475.0,59319234.0,4.1,81.5,1.39
|
||||
Jamaica,27.22601,24.00421,8951.0,2717344.0,18.9,75.1,2.39
|
||||
Japan,21.87088,23.50004,34800.0,127317900.0,3.4,82.5,1.34
|
||||
Jordan,29.218009999999996,27.47362,10897.0,6010035.0,22.1,76.9,3.59
|
||||
Kazakhstan,26.65065,26.290779999999998,18797.0,15915966.0,25.9,67.1,2.51
|
||||
Kenya,23.06181,21.592579999999998,2358.0,38244442.0,71.0,60.8,4.76
|
||||
Kiribati,31.30769,29.2384,1803.0,98437.0,64.5,61.5,3.13
|
||||
Kuwait,31.161859999999997,29.172109999999996,91966.0,2705290.0,11.3,77.3,2.68
|
||||
Latvia,25.615129999999997,26.45693,20977.0,2144215.0,10.5,72.4,1.5
|
||||
Lebanon,27.70471,27.20117,14158.0,4109389.0,11.3,77.8,1.57
|
||||
Lesotho,26.780520000000003,21.90157,2041.0,1972194.0,114.2,44.5,3.34
|
||||
Liberia,23.21679,21.89537,588.0,3672782.0,100.9,59.9,5.19
|
||||
Libya,29.19874,26.54164,29853.0,6123022.0,18.8,75.6,2.64
|
||||
Lithuania,26.01424,26.86102,23223.0,3219802.0,8.2,72.1,1.42
|
||||
Luxembourg,26.09326,27.434040000000003,95001.0,485079.0,2.8,81.0,1.63
|
||||
"Macao, China",24.895039999999998,25.713820000000002,80191.0,507274.0,6.72,79.32,0.94
|
||||
"Macedonia, FYR",25.37646,26.34473,10872.0,2055266.0,11.8,74.5,1.47
|
||||
Madagascar,20.73501,21.403470000000002,1528.0,19926798.0,66.7,62.2,4.79
|
||||
Malawi,22.91455,22.034679999999998,674.0,13904671.0,101.1,52.4,5.78
|
||||
Malaysia,25.448320000000002,24.73069,19968.0,27197419.0,8.0,74.5,2.05
|
||||
Maldives,26.4132,23.219910000000002,12029.0,321026.0,16.0,78.5,2.38
|
||||
Mali,23.07655,21.78881,1602.0,14223403.0,148.3,58.5,6.82
|
||||
Malta,27.04993,27.683609999999998,27872.0,406392.0,6.6,80.7,1.38
|
||||
Mauritania,26.26476,22.62295,3356.0,3414552.0,103.0,67.9,4.94
|
||||
Mauritius,26.09824,25.15669,14615.0,1238013.0,15.8,72.9,1.58
|
||||
Mexico,28.737509999999997,27.42468,15826.0,114972821.0,17.9,75.4,2.35
|
||||
"Micronesia, Fed. Sts.",31.28402,28.10315,3197.0,104472.0,43.1,68.0,3.59
|
||||
Moldova,27.05617,24.2369,3890.0,4111168.0,17.6,70.4,1.49
|
||||
Mongolia,25.71375,24.88385,7563.0,2629666.0,34.8,64.8,2.37
|
||||
Montenegro,25.70186,26.55412,14183.0,619740.0,8.1,76.0,1.72
|
||||
Morocco,26.223090000000003,25.63182,6091.0,31350544.0,35.8,73.3,2.44
|
||||
Mozambique,23.317339999999998,21.93536,864.0,22994867.0,114.4,54.0,5.54
|
||||
Myanmar,22.47733,21.44932,2891.0,51030006.0,87.2,59.4,2.05
|
||||
Namibia,25.14988,22.65008,8169.0,2115703.0,62.2,59.1,3.36
|
||||
Nepal,20.72814,20.76344,1866.0,26325183.0,50.7,68.4,2.9
|
||||
Netherlands,25.47269,26.01541,47388.0,16519862.0,4.8,80.3,1.77
|
||||
New Zealand,27.36642,27.768929999999997,32122.0,4285380.0,6.4,80.3,2.12
|
||||
Nicaragua,27.57259,25.77291,4060.0,5594524.0,28.1,77.0,2.72
|
||||
Niger,21.95958,21.21958,843.0,15085130.0,141.3,58.0,7.59
|
||||
Nigeria,23.674020000000002,23.03322,4684.0,151115683.0,140.9,59.2,6.02
|
||||
Norway,25.73772,26.934240000000003,65216.0,4771633.0,3.6,80.8,1.96
|
||||
Oman,26.66535,26.241090000000003,47799.0,2652281.0,11.9,76.2,2.89
|
||||
Pakistan,23.44986,22.299139999999998,4187.0,163096985.0,95.5,64.1,3.58
|
||||
Panama,27.67758,26.26959,14033.0,3498679.0,21.0,77.3,2.61
|
||||
Papua New Guinea,25.77189,25.015060000000002,1982.0,6540267.0,69.7,58.6,4.07
|
||||
Paraguay,25.90523,25.54223,6684.0,6047131.0,25.7,74.0,3.06
|
||||
Peru,25.98511,24.770410000000002,9249.0,28642048.0,23.2,78.2,2.58
|
||||
Philippines,23.4671,22.872629999999997,5332.0,90297115.0,33.4,69.8,3.26
|
||||
Poland,25.918870000000002,26.6738,19996.0,38525752.0,6.7,75.4,1.33
|
||||
Portugal,26.183020000000003,26.68445,27747.0,10577458.0,4.1,79.4,1.36
|
||||
Puerto Rico,30.2212,28.378040000000002,35855.0,3728126.0,8.78,77.0,1.69
|
||||
Qatar,28.912509999999997,28.13138,126076.0,1388962.0,9.5,77.9,2.2
|
||||
Romania,25.22425,25.41069,18032.0,20741669.0,16.1,73.2,1.34
|
||||
Russia,27.21272,26.01131,22506.0,143123163.0,13.5,67.9,1.49
|
||||
Rwanda,22.07156,22.55453,1173.0,9750314.0,78.3,64.1,5.06
|
||||
Samoa,33.659079999999996,30.42475,5731.0,183440.0,18.8,72.3,4.43
|
||||
Sao Tome and Principe,24.88216,23.51233,2673.0,163595.0,61.0,66.0,4.41
|
||||
Saudi Arabia,29.598779999999998,27.884320000000002,44189.0,26742842.0,18.1,78.3,2.97
|
||||
Senegal,24.30968,21.927429999999998,2162.0,12229703.0,75.8,63.5,5.11
|
||||
Serbia,25.669970000000003,26.51495,12522.0,9109535.0,8.0,74.3,1.41
|
||||
Seychelles,27.973740000000003,25.56236,20065.0,91634.0,14.2,72.9,2.28
|
||||
Sierra Leone,23.93364,22.53139,1289.0,5521838.0,179.1,53.6,5.13
|
||||
Singapore,22.86642,23.83996,65991.0,4849641.0,2.8,80.6,1.28
|
||||
Slovak Republic,26.323729999999998,26.92717,24670.0,5396710.0,8.8,74.9,1.31
|
||||
Slovenia,26.582140000000003,27.43983,30816.0,2030599.0,3.7,78.7,1.43
|
||||
Solomon Islands,28.8762,27.159879999999998,1835.0,503410.0,33.1,62.3,4.36
|
||||
Somalia,22.66607,21.969170000000002,615.0,9132589.0,168.5,52.6,7.06
|
||||
South Africa,29.4803,26.85538,12263.0,50348811.0,66.1,53.4,2.54
|
||||
Spain,26.30554,27.49975,34676.0,45817016.0,5.0,81.1,1.42
|
||||
Sri Lanka,23.11717,21.96671,6907.0,19949553.0,11.7,74.0,2.32
|
||||
Sudan,23.16132,22.40484,3246.0,34470138.0,84.7,65.5,4.79
|
||||
Suriname,27.749859999999998,25.49887,13470.0,506657.0,26.4,70.2,2.41
|
||||
Swaziland,28.448859999999996,23.16969,5887.0,1153750.0,112.2,45.1,3.7
|
||||
Sweden,25.1466,26.37629,43421.0,9226333.0,3.2,81.1,1.92
|
||||
Switzerland,24.07242,26.20195,55020.0,7646542.0,4.7,82.0,1.47
|
||||
Syria,28.87418,26.919690000000003,6246.0,20097057.0,16.5,76.1,3.17
|
||||
Tajikistan,23.84799,23.77966,2001.0,7254072.0,56.2,69.6,3.7
|
||||
Tanzania,23.0843,22.47792,2030.0,42844744.0,72.4,60.4,5.54
|
||||
Thailand,24.38577,23.008029999999998,12216.0,66453255.0,15.6,73.9,1.48
|
||||
Timor-Leste,21.50694,20.59082,1486.0,1030915.0,70.2,69.9,6.48
|
||||
Togo,22.73858,21.87875,1219.0,6052937.0,96.4,57.5,4.88
|
||||
Tonga,34.25969,30.99563,4748.0,102816.0,17.0,70.3,4.01
|
||||
Trinidad and Tobago,28.27587,26.396690000000003,30875.0,1315372.0,24.9,71.7,1.8
|
||||
Tunisia,27.93706,25.15699,9938.0,10408091.0,19.4,76.8,2.04
|
||||
Turkey,28.247490000000003,26.703709999999997,16454.0,70344357.0,22.2,77.8,2.15
|
||||
Turkmenistan,24.66154,25.24796,8877.0,4917541.0,63.9,67.2,2.48
|
||||
Uganda,22.48126,22.35833,1437.0,31014427.0,89.3,56.0,6.34
|
||||
Ukraine,26.23317,25.42379,8762.0,46028476.0,12.9,67.8,1.38
|
||||
United Arab Emirates,29.614009999999997,28.053590000000003,73029.0,6900142.0,9.1,75.6,1.95
|
||||
United Kingdom,26.944490000000002,27.392490000000002,37739.0,61689620.0,5.6,79.7,1.87
|
||||
United States,28.343590000000003,28.456979999999998,50384.0,304473143.0,7.7,78.3,2.07
|
||||
Uruguay,26.593040000000002,26.39123,15317.0,3350832.0,13.0,76.0,2.11
|
||||
Uzbekistan,25.43432,25.32054,3733.0,26952719.0,49.2,69.6,2.46
|
||||
Vanuatu,28.458759999999998,26.78926,2944.0,225335.0,28.2,63.4,3.61
|
||||
Venezuela,28.134079999999997,27.445,17911.0,28116716.0,17.1,74.2,2.53
|
||||
Vietnam,21.065,20.9163,4085.0,86589342.0,26.2,74.1,1.86
|
||||
West Bank and Gaza,29.026429999999998,26.5775,3564.0,3854667.0,24.7,74.1,4.38
|
||||
Zambia,23.05436,20.68321,3039.0,13114579.0,94.9,51.1,5.88
|
||||
Zimbabwe,24.645220000000002,22.0266,1286.0,13495462.0,98.3,47.3,3.85
|
|
151
labs05/iris.data
Normal file
151
labs05/iris.data
Normal file
@ -0,0 +1,151 @@
|
||||
sepal_length,sepal_width,petal_length,petal_width,class
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
5001
labs05/mieszkania.csv
Normal file
5001
labs05/mieszkania.csv
Normal file
File diff suppressed because it is too large
Load Diff
1200
labs05/points.csv
Normal file
1200
labs05/points.csv
Normal file
File diff suppressed because it is too large
Load Diff
210
labs05/seeds-width-vs-length.csv
Normal file
210
labs05/seeds-width-vs-length.csv
Normal file
@ -0,0 +1,210 @@
|
||||
3.312,5.763
|
||||
3.333,5.554
|
||||
3.337,5.291
|
||||
3.379,5.324
|
||||
3.562,5.658
|
||||
3.312,5.386
|
||||
3.259,5.563
|
||||
3.302,5.42
|
||||
3.465,6.053
|
||||
3.505,5.884
|
||||
3.242,5.714
|
||||
3.201,5.438
|
||||
3.199,5.439
|
||||
3.156,5.479
|
||||
3.114,5.482
|
||||
3.333,5.351
|
||||
3.383,5.119
|
||||
3.514,5.527
|
||||
3.466,5.205
|
||||
3.049,5.226
|
||||
3.129,5.658
|
||||
3.168,5.52
|
||||
3.507,5.618
|
||||
2.936,5.099
|
||||
3.245,5.789
|
||||
3.421,5.833
|
||||
3.026,5.395
|
||||
2.956,5.395
|
||||
3.221,5.541
|
||||
3.065,5.516
|
||||
2.975,5.454
|
||||
3.371,5.757
|
||||
3.186,5.717
|
||||
3.15,5.585
|
||||
3.328,5.712
|
||||
3.485,5.709
|
||||
3.464,5.826
|
||||
3.683,5.832
|
||||
3.288,5.656
|
||||
3.298,5.397
|
||||
3.156,5.348
|
||||
3.158,5.351
|
||||
3.201,5.138
|
||||
3.396,5.877
|
||||
3.462,5.579
|
||||
3.155,5.376
|
||||
3.393,5.701
|
||||
3.377,5.57
|
||||
3.291,5.545
|
||||
3.258,5.678
|
||||
3.272,5.585
|
||||
3.434,5.674
|
||||
3.113,5.715
|
||||
3.199,5.504
|
||||
3.113,5.741
|
||||
3.212,5.702
|
||||
3.377,5.388
|
||||
3.412,5.384
|
||||
3.419,5.662
|
||||
3.032,5.159
|
||||
2.85,5.008
|
||||
2.879,4.902
|
||||
3.042,5.076
|
||||
3.07,5.395
|
||||
3.026,5.262
|
||||
3.119,5.139
|
||||
3.19,5.63
|
||||
3.158,5.609
|
||||
3.153,5.569
|
||||
2.882,5.412
|
||||
3.561,6.191
|
||||
3.484,5.998
|
||||
3.594,5.978
|
||||
3.93,6.154
|
||||
3.486,6.017
|
||||
3.438,5.927
|
||||
3.403,6.064
|
||||
3.814,6.579
|
||||
3.639,6.445
|
||||
3.566,5.85
|
||||
3.467,5.875
|
||||
3.857,6.006
|
||||
3.864,6.285
|
||||
3.772,6.384
|
||||
3.801,6.366
|
||||
3.651,6.173
|
||||
3.764,6.084
|
||||
3.67,6.549
|
||||
4.033,6.573
|
||||
4.032,6.45
|
||||
3.785,6.581
|
||||
3.796,6.172
|
||||
3.693,6.272
|
||||
3.86,6.037
|
||||
3.485,6.666
|
||||
3.463,6.139
|
||||
3.81,6.341
|
||||
3.552,6.449
|
||||
3.512,6.271
|
||||
3.684,6.219
|
||||
3.525,5.718
|
||||
3.694,5.89
|
||||
3.892,6.113
|
||||
3.681,6.369
|
||||
3.755,6.248
|
||||
3.786,6.037
|
||||
3.806,6.152
|
||||
3.573,6.033
|
||||
3.763,6.675
|
||||
3.674,6.153
|
||||
3.769,6.107
|
||||
3.791,6.303
|
||||
3.902,6.183
|
||||
3.737,6.259
|
||||
3.991,6.563
|
||||
3.719,6.416
|
||||
3.897,6.051
|
||||
3.815,6.245
|
||||
3.769,6.227
|
||||
3.857,6.493
|
||||
3.962,6.315
|
||||
3.563,6.059
|
||||
3.387,5.762
|
||||
3.771,5.98
|
||||
3.582,5.363
|
||||
3.869,6.111
|
||||
3.594,6.285
|
||||
3.687,5.979
|
||||
3.773,6.513
|
||||
3.69,5.791
|
||||
3.755,5.979
|
||||
3.825,6.144
|
||||
3.268,5.884
|
||||
3.395,5.845
|
||||
3.408,5.776
|
||||
3.465,5.477
|
||||
3.574,6.145
|
||||
3.231,5.92
|
||||
3.286,5.832
|
||||
3.472,5.872
|
||||
2.994,5.472
|
||||
3.073,5.541
|
||||
3.074,5.389
|
||||
2.967,5.224
|
||||
2.777,5.314
|
||||
2.687,5.279
|
||||
2.719,5.176
|
||||
2.967,5.267
|
||||
2.911,5.386
|
||||
2.648,5.317
|
||||
2.84,5.263
|
||||
2.776,5.405
|
||||
2.833,5.408
|
||||
2.693,5.22
|
||||
2.755,5.175
|
||||
2.675,5.25
|
||||
2.849,5.053
|
||||
2.745,5.394
|
||||
2.678,5.444
|
||||
2.695,5.304
|
||||
2.879,5.451
|
||||
2.81,5.35
|
||||
2.847,5.267
|
||||
2.968,5.333
|
||||
2.794,5.011
|
||||
2.941,5.105
|
||||
2.897,5.319
|
||||
2.837,5.417
|
||||
2.668,5.176
|
||||
2.715,5.09
|
||||
2.701,5.325
|
||||
2.845,5.167
|
||||
2.763,5.088
|
||||
2.763,5.136
|
||||
2.641,5.278
|
||||
2.821,4.981
|
||||
2.71,5.186
|
||||
2.642,5.145
|
||||
2.758,5.18
|
||||
2.893,5.357
|
||||
2.775,5.09
|
||||
3.017,5.236
|
||||
2.909,5.24
|
||||
2.85,5.108
|
||||
3.026,5.495
|
||||
2.683,5.363
|
||||
2.716,5.413
|
||||
2.675,5.088
|
||||
2.821,5.089
|
||||
2.787,4.899
|
||||
2.717,5.046
|
||||
2.804,5.091
|
||||
2.953,5.132
|
||||
2.63,5.18
|
||||
2.975,5.236
|
||||
3.126,5.16
|
||||
3.054,5.224
|
||||
3.128,5.32
|
||||
2.911,5.41
|
||||
3.155,5.073
|
||||
2.989,5.219
|
||||
3.135,4.984
|
||||
2.81,5.009
|
||||
3.091,5.183
|
||||
2.96,5.204
|
||||
2.981,5.137
|
||||
2.795,5.14
|
||||
3.232,5.236
|
||||
2.836,5.175
|
||||
2.974,5.243
|
|
606
labs05/sklearn cz. 1.ipynb
Normal file
606
labs05/sklearn cz. 1.ipynb
Normal file
File diff suppressed because one or more lines are too long
394
labs05/sklearn cz. 2.ipynb
Normal file
394
labs05/sklearn cz. 2.ipynb
Normal file
@ -0,0 +1,394 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Klasyfikacja w Pythonie"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**zad. 1** Które z poniższych problemów jest problemem regresji, a które klasyfikacji?\n",
|
||||
" 1. Sprawdzenie, czy wiadomość jest spamem.\n",
|
||||
" 1. Przewidzenie oceny (od 1 do 5 gwiazdek) na podstawie komentarza.\n",
|
||||
" 1. OCR cyfr: rozpoznanie cyfry z obrazka.\n",
|
||||
" \n",
|
||||
" Jeżeli problem jest klasyfikacyjny, to jakie mamy klasy?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Miary dla klasyfikacji"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Istnieje wieje miar (metryk), na podstawie których możemy ocenić jakość modelu. Podobnie jak w przypadku regresji liniowej potrzebne są dwie listy: lista poprawnych klas i lista predykcji z modelu. Najpopularniejszą z metryk jest trafność, którą definiuje się w następujący sposób:\n",
|
||||
" $$ACC = \\frac{k}{N}$$ \n",
|
||||
" \n",
|
||||
" gdzie: \n",
|
||||
" * $k$ to liczba poprawnie zaklasyfikowanych przypadków,\n",
|
||||
" * $N$ liczebność zbioru testującego."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**zadanie** Napisz funkcję, która jako parametry przyjmnie dwie listy (lista poprawnych klas i wyjście z klasyfikatora) i zwróci trafność."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def accuracy_measure(true, predicted):\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"true_label = [1, 1, 1, 0, 0]\n",
|
||||
"predicted = [0, 1, 0, 1, 0]\n",
|
||||
"print(\"ACC:\", accuracy_measure(true_label, predicted))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Klasyfikator $k$ najbliższych sąsiadów *(ang. k-nearest neighbors, KNN)*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Klasyfikator [KNN](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm), który został wprowadzony na ostatnim wykładzie, jest bardzo intuicyjny. Pomysł, który stoi za tym klasyfikatorem jest bardzo prosty: Jeżeli mamy nowy obiekt do zaklasyfikowania, to szukamy wśród danych trenujących $k$ najbardziej podobnych do niego przykładów i na ich podstawie decydujemy (np. biorąc większość) do jakie klasy powinien należeć dany obiekt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"** Przykład 1** Mamy za zadanie przydzielenie obiektów do dwóch klas: trójkątów lub kwadratów. Rozpatrywany obiekt jest zaznaczony zielonym kółkiem. Przyjmując $k=3$, mamy wśród sąsiadów 2 trójkąty i 1 kwadrat. Stąd obiekt powinienm zostać zaklasyfikowany jako trójkąt. Jak zmienia się sytuacja, gdy przyjmiemy $k=5$?\n",
|
||||
"\n",
|
||||
"![Przykład 1](./220px-KnnClassification.svg.png)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Herbal Iris"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"*Herbal Iris* jest klasycznym zbiorem danych w uczeniu maszynowym, który powstał w 1936 roku. Zawiera on informacje na 150 egzemplarzy roślin, które należą do jednej z 3 odmian."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**zad. 2** Wczytaj do zmiennej ``data`` zbiór *Herbal Iris*, który znajduje się w pliku ``iris.data``. Jest to plik csv."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**zad. 3** Odpowiedz na poniższe pytania:\n",
|
||||
" 1. Które atrybuty są wejściowe, a w której kolumnie znajduje się klasa wyjściowa?\n",
|
||||
" 1. Ile jest różnych klas? Wypisz je ekran.\n",
|
||||
" 1. Jaka jest średnia wartość w kolumnie ``sepal_length``? Jak zachowuje się średnia, jeżeli policzymy ją dla każdej z klas osobno?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Wytrenujmy klasyfikator *KNN*, ale najpierw przygotujmy dane. Fukcja ``train_test_split`` dzieli zadany zbiór danych na dwie części. My wykorzystamy ją do podziału na zbiór treningowy (66%) i testowy (33%), służy do tego parametr ``test_size``."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 95,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"X = data.loc[:, 'sepal_length':'petal_width']\n",
|
||||
"Y = data['class']\n",
|
||||
"\n",
|
||||
"(train_X, test_X, train_Y, test_Y) = train_test_split(X, Y, test_size=0.33, random_state=42)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Trenowanie klasyfikatora wygląda bardzo podobnie do treningi modelu regresji liniowej:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
|
||||
" metric_params=None, n_jobs=1, n_neighbors=3, p=2,\n",
|
||||
" weights='uniform')"
|
||||
]
|
||||
},
|
||||
"execution_count": 96,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||
"\n",
|
||||
"model = KNeighborsClassifier(n_neighbors=3)\n",
|
||||
"model.fit(train_X, train_Y)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Mając wytrenowany model możemy wykorzystać go do predykcji na zbiorze testowym."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Zaklasyfikowane: Iris-versicolor, Orginalne: Iris-versicolor\n",
|
||||
"Zaklasyfikowane: Iris-setosa, Orginalne: Iris-setosa\n",
|
||||
"Zaklasyfikowane: Iris-virginica, Orginalne: Iris-virginica\n",
|
||||
"Zaklasyfikowane: Iris-versicolor, Orginalne: Iris-versicolor\n",
|
||||
"Zaklasyfikowane: Iris-versicolor, Orginalne: Iris-versicolor\n",
|
||||
"Zaklasyfikowane: Iris-setosa, Orginalne: Iris-setosa\n",
|
||||
"Zaklasyfikowane: Iris-versicolor, Orginalne: Iris-versicolor\n",
|
||||
"Zaklasyfikowane: Iris-virginica, Orginalne: Iris-virginica\n",
|
||||
"Zaklasyfikowane: Iris-versicolor, Orginalne: Iris-versicolor\n",
|
||||
"Zaklasyfikowane: Iris-versicolor, Orginalne: Iris-versicolor\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"predicted = model.predict(test_X)\n",
|
||||
"\n",
|
||||
"for i in range(10):\n",
|
||||
" print(\"Zaklasyfikowane: {}, Orginalne: {}\".format(predicted[i], test_Y.reset_index()['class'][i]))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Możemy obliczyć *accuracy*:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.98\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"print(accuracy_score(test_Y, predicted))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**zad. 4** Wytrenuj nowy model ``model_2`` zmieniając liczbę sąsiadów na 20. Czy zmieniły się wyniki?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**zad. 5** Wytrenuj model z $k=1$. Przeprowadź walidację na zbiorze trenującym zamiast na zbiorze testowym? Jakie wyniki otrzymałeś? Czy jest to wyjątek? Dlaczego tak się dzieje?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Walidacja krzyżowa"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Zbiór *herbal Iris* jest bardzo małym zbiorem. Wydzielenie z niego zbioru testowego jest obciążone dużą wariancją wyników, tj. w zależności od sposoby wyboru zbioru testowego wyniki mogą się bardzo różnic. Żeby temu zaradzić, stosuje się algorytm [walidacji krzyżowej](https://en.wikipedia.org/wiki/Cross-validation_(statistics). Algorytm wygląda następująco:\n",
|
||||
" 1. Podziel zbiór danych na $n$ części (losowo).\n",
|
||||
" 1. Dla każdego i od 1 do $n$ wykonaj:\n",
|
||||
" 1. Weź $i$-tą część jako zbiór testowy, pozostałe dane jako zbiór trenujący.\n",
|
||||
" 1. Wytrenuj model na zbiorze trenującym.\n",
|
||||
" 1. Uruchom model na danych testowych i zapisz wyniki.\n",
|
||||
" 1. Ostateczne wyniki to średnia z $n$ wyników częściowych. \n",
|
||||
" \n",
|
||||
" W Pythonie służy do tego funkcja ``cross_val_score``, która przyjmuje jako parametry (kolejno) model, zbiór X, zbiór Y. Możemy ustawić parametr ``cv``, który określa na ile części mamy podzielić zbiór danych oraz parametr ``scoring`` określający miarę.\n",
|
||||
" \n",
|
||||
" W poniższym przykładzie dzielimy zbiór danych na 10 części (10-krotna walidacja krzyżowa) i jako miarę ustawiany celność (ang. accuracy)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import cross_val_score\n",
|
||||
"\n",
|
||||
"knn = KNeighborsClassifier(n_neighbors=k)\n",
|
||||
"scores = cross_val_score(knn, X, Y, cv=10, scoring='accuracy')\n",
|
||||
"print(\"Wynik walidacji krzyżowej:\", scores.mean())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**zad. 6** Klasyfikator $k$ najbliższych sąsiadów posiada jeden parametr: $k$, który określa liczbę sąsiadów podczas klasyfikacji. Jak widzieliśmy, wybór $k$ może mieć duże znaczenie dla jakości klasyfikatora. Wykonaj:\n",
|
||||
" 1. Stwórz listę ``neighbors`` wszystkich liczb nieparzystych od 1 do 50.\n",
|
||||
" 1. Dla każdego elementu ``i`` z listy ``neighbors`` zbuduj klasyfikator *KNN* o liczbie sąsiadów równej ``i``. Nastepnie przeprowadz walidację krzyżową (parametry takie same jak powyżej) i zapisz wyniki do tablicy ``cv_scores``.\n",
|
||||
" 1. Znajdź ``k``, dla którego klasyfikator osiąga najwyższy wynik."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Wykres przedstawiający precent błedów w zależnosci od liczby sąsiadów."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"# changing to misclassification error\n",
|
||||
"MSE = [1 - x for x in cv_scores]\n",
|
||||
"\n",
|
||||
"# plot misclassification error vs k\n",
|
||||
"plt.plot(neighbors, MSE)\n",
|
||||
"plt.xlabel('Liczba sąsiadów')\n",
|
||||
"plt.ylabel('Procent błędów')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Przejdź teraz do arkusza z zadaniem domowym, gdzie zastosujemy klasyfikator *kNN* na zbiorze danych z pierwszych zajęć."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
571
labs05/sklearn cz. 3.ipynb
Normal file
571
labs05/sklearn cz. 3.ipynb
Normal file
File diff suppressed because one or more lines are too long
251
labs05/zad_01.ipynb
Normal file
251
labs05/zad_01.ipynb
Normal file
@ -0,0 +1,251 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. Zaimportuj bibliotkę pandas jako pd."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2. Wczytaj zbiór danych `311.csv` do zniennej data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3. Wyświetl 5 pierwszych wierszy z data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"4. Wyświetl nazwy kolumn."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"5. Wyświetl ile nasz zbiór danych ma kolumn i wierszy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"6. Wyświetl kolumnę 'City' z powyższego zbioru danych."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"7. Wyświetl jakie wartoścu przyjmuje kolumna 'City'."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"8. Wyświetl tabelę rozstawną kolumny City."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"9. Wyświetl tylko pierwsze 4 wiersze z wcześniejszego polecenia."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"10. Wyświetl, w ilu przypadkach kolumna City zawiera NaN."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"11. Wyświetl data.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"12. Wyświetl tylko kolumny Borough i Agency i tylko 5 ostatnich linii."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"13. Wyświetl tylko te dane, dla których wartość z kolumny Agency jest równa\n",
|
||||
"NYPD. Zlicz ile jest takich przykładów.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"14. Wyświetl wartość minimalną i maksymalną z kolumny Longitude."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"15. Dodaj kolumne diff, która powstanie przez sumowanie kolumn Longitude i Latitude."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"16. Wyświetl tablę rozstawną dla kolumny 'Descriptor', dla której Agency jest\n",
|
||||
"równe NYPD."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
175
labs05/zad_02.ipynb
Normal file
175
labs05/zad_02.ipynb
Normal file
@ -0,0 +1,175 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. Załaduj bibliotekę `pandas`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2. Wczytaj dane z pliku *mieszkania.csv* do zmiennej i wyświetl 5 pierwszych wierczy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3. Znajdź informacje ilu pokojowe mieszkania są najpopularniejsze i ile ich jest."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"4. Znajdź 10 najtańszych mieszkań."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"5. Napisz funkcje ``find_borough(desc)``, która przyjmuje 1 argument typu *string* i zwróci jedną z dzielnic zdefiniowaną w liście ``dzielnice``. Funkcja ma zwrócić pierwszą (wzgledem kolejności) nazwę dzielnicy, która jest zawarta w ``desc``. Jeżeli żadna nazwa nie została odnaleziona, zwróć napis *Inne*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def find_borough(desc):\n",
|
||||
" dzielnice = ['Stare Miasto',\n",
|
||||
" 'Wilda',\n",
|
||||
" 'Jeżyce',\n",
|
||||
" 'Rataje',\n",
|
||||
" 'Piątkowo',\n",
|
||||
" 'Winogrady',\n",
|
||||
" 'Miłostowo',\n",
|
||||
" 'Dębiec']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"6. Dodaj kolumnę ``Borough``, która będzie zawierać informacje o dzielnicach i powstanie z kolumny ``Localization``. Wykorzystaj do tego funkcję ``find_borough``."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"7. Wyświetl histogram przedstawiający liczbę ogłoszeń mieszkań z podziałem na dzielnice."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"8. Znajdź średnią cenę mieszkania n-pokojowego."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"9. Znajdź dzielnice, które zawierają oferty mieszkań na 13 piętrze."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"10. Znajdź wszystkie ogłoszenia mieszkań, które znajdują się na Winogradach, mają 3 pokoje i są położone na 1 piętrze."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue
Block a user