{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "d5d40ae6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (1.5.13)\n", "Requirement already satisfied: tqdm in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from kaggle) (4.62.3)\n", "Requirement already satisfied: certifi in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from kaggle) (2021.10.8)\n", "Requirement already satisfied: requests in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from kaggle) (2.26.0)\n", "Requirement already satisfied: six>=1.10 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from kaggle) (1.16.0)\n", "Requirement already satisfied: urllib3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from kaggle) (1.26.7)\n", "Requirement already satisfied: python-slugify in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from kaggle) (5.0.2)\n", "Requirement already satisfied: python-dateutil in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from kaggle) (2.8.2)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from requests->kaggle) (3.2)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from requests->kaggle) (2.0.4)\n", "Requirement already satisfied: colorama in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from tqdm->kaggle) (0.4.4)\n", "Requirement already satisfied: pandas in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (1.3.4)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from pandas) (2021.3)\n", "Requirement already satisfied: numpy>=1.17.3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from pandas) (1.20.3)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n" ] } ], "source": [ "#Instalacja potrzebnych bibliotek\n", "!pip install --user kaggle\n", "!pip install --user pandas" ] }, { "cell_type": "code", "execution_count": 4, "id": "85f88972", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading 2019-european-parliament-election-in-poland-data.zip to D:\\Studia\\IUM\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", " 0%| | 0.00/12.5M [00:00=1.17.3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from pandas) (1.20.3)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from pandas) (2021.3)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n", "Requirement already satisfied: seaborn in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (0.11.2)\n", "Requirement already satisfied: numpy>=1.15 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from seaborn) (1.20.3)\n", "Requirement already satisfied: scipy>=1.0 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from seaborn) (1.7.1)\n", "Requirement already satisfied: matplotlib>=2.2 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from seaborn) (3.4.3)\n", "Requirement already satisfied: pandas>=0.23 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from seaborn) (1.3.4)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n", "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.4.0)\n", "Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from matplotlib>=2.2->seaborn) (3.0.4)\n", "Requirement already satisfied: cycler>=0.10 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", "Requirement already satisfied: six in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.16.0)\n", "Requirement already satisfied: pytz>=2017.3 in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (from pandas>=0.23->seaborn) (2021.3)\n" ] } ], "source": [ "#Instalacja bibliotek\n", "!pip install --user pandas\n", "!pip install --user seaborn" ] }, { "cell_type": "code", "execution_count": 9, "id": "a25536a9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Kod terytorialny gminyNr obwodu głosowaniaTyp obszaruTyp obwoduSiedziba Obwodowej Komisji WyborczejGminaPowiatWojewództwoKomisja otrzymała kart do głosowaniaLiczba wyborców uprawnionych do głosowania...2 KNUTH Maciej3 JANOWICZ Kamila Marta4 WIŚNIEWSKI Radosław Andrzej5 PAWEŁCZAK Barnaba Bogusz6 STRUK Anna Marta7 WARDA Małgorzata Maja8 MAKRENEK Helena Cecylia9 DOMŻALSKA Elżbieta Maria10 GRAJKOWSKI PiotrRazem.6
02201011wieśstałyGminna Biblioteka i Ośrodek Kultury w Borzytuc...gm. Borzytuchombytowskipomorskie11991583...0100000004
12201012wieśstałySala wiejska, Dąbrówka 22, 77-100 Bytówgm. Borzytuchombytowskipomorskie349463...2000000002
22201013wieśstałySzkoła Podstawowa, Niedarzyno 21b, 77-141 Borz...gm. Borzytuchombytowskipomorskie319424...0000000002
32201021miastostałyZespół Szkół Ogólnokształcących, ul. Gdańska 5...gm. Bytówbytowskipomorskie11801550...0000000011
42201022miastostałyZespół Szkół Ponadgimnazjalnych, ul. Gen. Wład...gm. Bytówbytowskipomorskie10621392...0001000025
..................................................................
139122640117miastostałySpółdzielnia Mieszkaniowa im. J. I. Kraszewske...m. SopotSopotpomorskie12191615...10030100110
139222640118miastostałyZespół Szkół Handlowych, ul. Wejherowska 1, 81...m. SopotSopotpomorskie719954...1210110209
139322640119miastostałyMiejska Biblioteka Publiczna Filia Nr 8, ul. M...m. SopotSopotpomorskie10911417...0000100005
139422640120miastostałySzkoła Podstawowa z Oddziałami Integracyjnymi ...m. SopotSopotpomorskie10781437...11102000011
139522640121miastostałyPrzedszkole z Oddziałami Integracyjnymi Nr 12,...m. SopotSopotpomorskie10911438...0002000107
\n", "

1396 rows × 117 columns

\n", "
" ], "text/plain": [ " Kod terytorialny gminy Nr obwodu głosowania Typ obszaru Typ obwodu \\\n", "0 220101 1 wieś stały \n", "1 220101 2 wieś stały \n", "2 220101 3 wieś stały \n", "3 220102 1 miasto stały \n", "4 220102 2 miasto stały \n", "... ... ... ... ... \n", "1391 226401 17 miasto stały \n", "1392 226401 18 miasto stały \n", "1393 226401 19 miasto stały \n", "1394 226401 20 miasto stały \n", "1395 226401 21 miasto stały \n", "\n", " Siedziba Obwodowej Komisji Wyborczej Gmina \\\n", "0 Gminna Biblioteka i Ośrodek Kultury w Borzytuc... gm. Borzytuchom \n", "1 Sala wiejska, Dąbrówka 22, 77-100 Bytów gm. Borzytuchom \n", "2 Szkoła Podstawowa, Niedarzyno 21b, 77-141 Borz... gm. Borzytuchom \n", "3 Zespół Szkół Ogólnokształcących, ul. Gdańska 5... gm. Bytów \n", "4 Zespół Szkół Ponadgimnazjalnych, ul. Gen. Wład... gm. Bytów \n", "... ... ... \n", "1391 Spółdzielnia Mieszkaniowa im. J. I. Kraszewske... m. Sopot \n", "1392 Zespół Szkół Handlowych, ul. Wejherowska 1, 81... m. Sopot \n", "1393 Miejska Biblioteka Publiczna Filia Nr 8, ul. M... m. Sopot \n", "1394 Szkoła Podstawowa z Oddziałami Integracyjnymi ... m. Sopot \n", "1395 Przedszkole z Oddziałami Integracyjnymi Nr 12,... m. Sopot \n", "\n", " Powiat Województwo Komisja otrzymała kart do głosowania \\\n", "0 bytowski pomorskie 1199 \n", "1 bytowski pomorskie 349 \n", "2 bytowski pomorskie 319 \n", "3 bytowski pomorskie 1180 \n", "4 bytowski pomorskie 1062 \n", "... ... ... ... \n", "1391 Sopot pomorskie 1219 \n", "1392 Sopot pomorskie 719 \n", "1393 Sopot pomorskie 1091 \n", "1394 Sopot pomorskie 1078 \n", "1395 Sopot pomorskie 1091 \n", "\n", " Liczba wyborców uprawnionych do głosowania ... 2 KNUTH Maciej \\\n", "0 1583 ... 0 \n", "1 463 ... 2 \n", "2 424 ... 0 \n", "3 1550 ... 0 \n", "4 1392 ... 0 \n", "... ... ... ... \n", "1391 1615 ... 1 \n", "1392 954 ... 1 \n", "1393 1417 ... 0 \n", "1394 1437 ... 1 \n", "1395 1438 ... 0 \n", "\n", " 3 JANOWICZ Kamila Marta 4 WIŚNIEWSKI Radosław Andrzej \\\n", "0 1 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "... ... ... \n", "1391 0 0 \n", "1392 2 1 \n", "1393 0 0 \n", "1394 1 1 \n", "1395 0 0 \n", "\n", " 5 PAWEŁCZAK Barnaba Bogusz 6 STRUK Anna Marta 7 WARDA Małgorzata Maja \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 1 0 0 \n", "... ... ... ... \n", "1391 3 0 1 \n", "1392 0 1 1 \n", "1393 0 1 0 \n", "1394 0 2 0 \n", "1395 2 0 0 \n", "\n", " 8 MAKRENEK Helena Cecylia 9 DOMŻALSKA Elżbieta Maria \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "... ... ... \n", "1391 0 0 \n", "1392 0 2 \n", "1393 0 0 \n", "1394 0 0 \n", "1395 0 1 \n", "\n", " 10 GRAJKOWSKI Piotr Razem.6 \n", "0 0 4 \n", "1 0 2 \n", "2 0 2 \n", "3 1 1 \n", "4 2 5 \n", "... ... ... \n", "1391 1 10 \n", "1392 0 9 \n", "1393 0 5 \n", "1394 0 11 \n", "1395 0 7 \n", "\n", "[1396 rows x 117 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Testowe wczytanie pojedynczego arkusza z danymi (1 z 13)\n", "import pandas as pd\n", "data_district_1=pd.read_csv('2019-european-parliament-election-in-poland-data\\wyniki_gl_na_kand_po_obwodach_sheet_1.csv')\n", "data_district_1\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "3f9bd200", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Kod terytorialny gminyNr obwodu głosowaniaTyp obszaruTyp obwoduSiedziba Obwodowej Komisji WyborczejGminaPowiatWojewództwoKomisja otrzymała kart do głosowaniaLiczba wyborców uprawnionych do głosowania...2 KNUTH Maciej3 JANOWICZ Kamila Marta4 WIŚNIEWSKI Radosław Andrzej5 PAWEŁCZAK Barnaba Bogusz6 STRUK Anna Marta7 WARDA Małgorzata Maja8 MAKRENEK Helena Cecylia9 DOMŻALSKA Elżbieta Maria10 GRAJKOWSKI PiotrRazem.6
count1396.0000001396.0000001396139613961396139613961396.0000001396.000000...1396.0000001396.0000001396.0000001396.0000001396.0000001396.0000001396.0000001396.0000001396.0000001396.000000
uniqueNaNNaN281267123201NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
topNaNNaNmiastostałySzkoła Podstawowa Nr 2 z Oddziałami Sportowymi...m. GdańskGdańskpomorskieNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
freqNaNNaN759131451981981396NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
mean222342.94412625.264327NaNNaNNaNNaNNaNNaN963.1504301264.388252...0.5343840.5730660.3101720.2084530.5759310.4097420.1289400.2134670.5064476.035100
std2399.81674441.512458NaNNaNNaNNaNNaNNaN475.617459640.724087...0.9986911.1791340.6648880.4880950.9332380.7922690.3996330.5054450.8239005.392367
min220101.0000001.000000NaNNaNNaNNaNNaNNaN30.00000018.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%220604.0000003.000000NaNNaNNaNNaNNaNNaN593.750000776.250000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000002.000000
50%221302.0000008.000000NaNNaNNaNNaNNaNNaN970.0000001267.500000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000005.000000
75%226101.00000019.000000NaNNaNNaNNaNNaNNaN1295.0000001707.250000...1.0000001.0000000.0000000.0000001.0000001.0000000.0000000.0000001.0000009.000000
max226401.000000198.000000NaNNaNNaNNaNNaNNaN2452.0000003273.000000...11.00000018.0000007.0000003.0000007.0000006.0000004.0000004.0000005.00000035.000000
\n", "

11 rows × 117 columns

\n", "
" ], "text/plain": [ " Kod terytorialny gminy Nr obwodu głosowania Typ obszaru Typ obwodu \\\n", "count 1396.000000 1396.000000 1396 1396 \n", "unique NaN NaN 2 8 \n", "top NaN NaN miasto stały \n", "freq NaN NaN 759 1314 \n", "mean 222342.944126 25.264327 NaN NaN \n", "std 2399.816744 41.512458 NaN NaN \n", "min 220101.000000 1.000000 NaN NaN \n", "25% 220604.000000 3.000000 NaN NaN \n", "50% 221302.000000 8.000000 NaN NaN \n", "75% 226101.000000 19.000000 NaN NaN \n", "max 226401.000000 198.000000 NaN NaN \n", "\n", " Siedziba Obwodowej Komisji Wyborczej Gmina Powiat \\\n", "count 1396 1396 1396 \n", "unique 1267 123 20 \n", "top Szkoła Podstawowa Nr 2 z Oddziałami Sportowymi... m. Gdańsk Gdańsk \n", "freq 5 198 198 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " Województwo Komisja otrzymała kart do głosowania \\\n", "count 1396 1396.000000 \n", "unique 1 NaN \n", "top pomorskie NaN \n", "freq 1396 NaN \n", "mean NaN 963.150430 \n", "std NaN 475.617459 \n", "min NaN 30.000000 \n", "25% NaN 593.750000 \n", "50% NaN 970.000000 \n", "75% NaN 1295.000000 \n", "max NaN 2452.000000 \n", "\n", " Liczba wyborców uprawnionych do głosowania ... 2 KNUTH Maciej \\\n", "count 1396.000000 ... 1396.000000 \n", "unique NaN ... NaN \n", "top NaN ... NaN \n", "freq NaN ... NaN \n", "mean 1264.388252 ... 0.534384 \n", "std 640.724087 ... 0.998691 \n", "min 18.000000 ... 0.000000 \n", "25% 776.250000 ... 0.000000 \n", "50% 1267.500000 ... 0.000000 \n", "75% 1707.250000 ... 1.000000 \n", "max 3273.000000 ... 11.000000 \n", "\n", " 3 JANOWICZ Kamila Marta 4 WIŚNIEWSKI Radosław Andrzej \\\n", "count 1396.000000 1396.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 0.573066 0.310172 \n", "std 1.179134 0.664888 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 1.000000 0.000000 \n", "max 18.000000 7.000000 \n", "\n", " 5 PAWEŁCZAK Barnaba Bogusz 6 STRUK Anna Marta \\\n", "count 1396.000000 1396.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 0.208453 0.575931 \n", "std 0.488095 0.933238 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 1.000000 \n", "max 3.000000 7.000000 \n", "\n", " 7 WARDA Małgorzata Maja 8 MAKRENEK Helena Cecylia \\\n", "count 1396.000000 1396.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 0.409742 0.128940 \n", "std 0.792269 0.399633 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 1.000000 0.000000 \n", "max 6.000000 4.000000 \n", "\n", " 9 DOMŻALSKA Elżbieta Maria 10 GRAJKOWSKI Piotr Razem.6 \n", "count 1396.000000 1396.000000 1396.000000 \n", "unique NaN NaN NaN \n", "top NaN NaN NaN \n", "freq NaN NaN NaN \n", "mean 0.213467 0.506447 6.035100 \n", "std 0.505445 0.823900 5.392367 \n", "min 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 2.000000 \n", "50% 0.000000 0.000000 5.000000 \n", "75% 0.000000 1.000000 9.000000 \n", "max 4.000000 5.000000 35.000000 \n", "\n", "[11 rows x 117 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Opis danych z arkusza testowego\n", "data_district_1.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 35, "id": "074096ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Kod terytorialny gminy Nr obwodu głosowania Typ obszaru \\\n", "0 220101 1 wieś \n", "1 220101 2 wieś \n", "2 220101 3 wieś \n", "3 220102 1 miasto \n", "4 220102 2 miasto \n", "... ... ... ... \n", "1836 326301 23 miasto \n", "1837 326301 24 miasto \n", "1838 326301 25 miasto \n", "1839 326301 26 miasto \n", "1840 326301 27 miasto \n", "\n", " Typ obwodu Siedziba Obwodowej Komisji Wyborczej \\\n", "0 stały Gminna Biblioteka i Ośrodek Kultury w Borzytuc... \n", "1 stały Sala wiejska, Dąbrówka 22, 77-100 Bytów \n", "2 stały Szkoła Podstawowa, Niedarzyno 21b, 77-141 Borz... \n", "3 stały Zespół Szkół Ogólnokształcących, ul. Gdańska 5... \n", "4 stały Zespół Szkół Ponadgimnazjalnych, ul. Gen. Wład... \n", "... ... ... \n", "1836 stały Miejski Dom Kultury Filia Nr 2, Świnoujście, W... \n", "1837 stały Miejski Dom Kultury Filia Nr 3, Świnoujście, K... \n", "1838 stały Miejski Dom Kultury Filia Nr 1, Świnoujście, P... \n", "1839 zakład leczniczy Szpital Miejski im. Jana Garduły, ul. Mieszka ... \n", "1840 zakład leczniczy Samodzielny Publiczny Zakład Opieki Zdrowotnej... \n", "\n", " Gmina Powiat Województwo \\\n", "0 gm. Borzytuchom bytowski pomorskie \n", "1 gm. Borzytuchom bytowski pomorskie \n", "2 gm. Borzytuchom bytowski pomorskie \n", "3 gm. Bytów bytowski pomorskie \n", "4 gm. Bytów bytowski pomorskie \n", "... ... ... ... \n", "1836 m. Świnoujście Świnoujście zachodniopomorskie \n", "1837 m. Świnoujście Świnoujście zachodniopomorskie \n", "1838 m. Świnoujście Świnoujście zachodniopomorskie \n", "1839 m. Świnoujście Świnoujście zachodniopomorskie \n", "1840 m. Świnoujście Świnoujście zachodniopomorskie \n", "\n", " Komisja otrzymała kart do głosowania \\\n", "0 1199 \n", "1 349 \n", "2 319 \n", "3 1180 \n", "4 1062 \n", "... ... \n", "1836 776 \n", "1837 451 \n", "1838 750 \n", "1839 70 \n", "1840 32 \n", "\n", " Liczba wyborców uprawnionych do głosowania ... Konfederacja Wiosna \\\n", "0 1583 ... 20 56 \n", "1 463 ... 3 10 \n", "2 424 ... 5 12 \n", "3 1550 ... 17 55 \n", "4 1392 ... 33 38 \n", "... ... ... ... ... \n", "1836 1043 ... 14 34 \n", "1837 583 ... 4 19 \n", "1838 981 ... 12 31 \n", "1839 69 ... 1 2 \n", "1840 45 ... 0 0 \n", "\n", " Koalicja Europejska Prawo i Sprawiedliwość Lewica Razem Kukiz15 \\\n", "0 193 247 8 12 \n", "1 73 61 2 2 \n", "2 64 62 0 8 \n", "3 327 352 4 22 \n", "4 291 215 0 11 \n", "... ... ... ... ... \n", "1836 183 130 3 15 \n", "1837 90 87 2 4 \n", "1838 223 195 6 9 \n", "1839 17 6 1 0 \n", "1840 6 6 0 1 \n", "\n", " Polska Fair Play POLEXIT Jedność Narodu obwod \n", "0 4 0 0 1 \n", "1 2 0 0 1 \n", "2 2 0 0 1 \n", "3 1 0 0 1 \n", "4 5 0 0 1 \n", "... ... ... ... ... \n", "1836 0 0 0 13 \n", "1837 0 0 0 13 \n", "1838 0 0 0 13 \n", "1839 0 0 0 13 \n", "1840 0 0 0 13 \n", "\n", "[27285 rows x 45 columns]\n" ] } ], "source": [ "#Wczytanie i obróbka całości danych\n", "\n", "big_dataset = pd.DataFrame()\n", "\n", "#Trzeba wczytać każdy z 13 arkuszy z danymi dla poszczególnych okręgów wyborczych\n", "for i in range(1,14):\n", " filename = '2019-european-parliament-election-in-poland-data\\wyniki_gl_na_kand_po_obwodach_sheet_'+str(i)+'.csv'\n", " data_district = pd.read_csv(filename)\n", " data_district['obwod'] = i\n", " df = data_district.copy()\n", " cols_to_remove = []\n", " for c in range(35,len(df.columns)):\n", " cols_to_remove.append(c)\n", " #Wycięcie nadmiarowych danych - szczegółowych informacji o głosach na poszczególnych kandydatów w poszczególnych komisjach\n", " df.drop(df.columns[cols_to_remove],axis=1,inplace=True)\n", " #Uwzględnienie w danych zbiorczych wyników komitetów wyborczych w poszczególnych komisjach\n", " df['Konfederacja'] = data_district['Lista nr 1 - KWW KONFEDERACJA KORWIN BRAUN LIROY NARODOWCY']\n", " df['Wiosna'] = data_district['Lista nr 2 - KW WIOSNA ROBERTA BIEDRONIA']\n", " df['Koalicja Europejska'] = data_district['Lista nr 3 - KKW KOALICJA EUROPEJSKA PO PSL SLD .N ZIELONI']\n", " df['Prawo i Sprawiedliwość'] = data_district['Lista nr 4 - KW PRAWO I SPRAWIEDLIWOŚĆ']\n", " df['Lewica Razem'] = data_district['Lista nr 5 - KKW LEWICA RAZEM - RAZEM, UNIA PRACY, RSS']\n", " df['Kukiz15'] = data_district['Lista nr 6 - KWW KUKIZ\\'15']\n", " #Poniżej trzy komitety, które były zarejestrowane tylko w niektórych okręgach wyborczych\n", " if 'Lista nr 7 - KWW POLSKA FAIR PLAY BEZPARTYJNI GWIAZDOWSKI' in data_district.columns:\n", " df['Polska Fair Play'] = data_district['Lista nr 7 - KWW POLSKA FAIR PLAY BEZPARTYJNI GWIAZDOWSKI']\n", " else:\n", " df['Polska Fair Play'] = 0\n", " if 'Lista nr 9 - KKW POLEXIT - KOALICJA' in data_district.columns:\n", " df['POLEXIT'] = data_district['Lista nr 9 - KKW POLEXIT - KOALICJA']\n", " else:\n", " df['POLEXIT'] = 0\n", " if 'Lista nr 10 - KW JEDNOŚĆ NARODU' in data_district.columns:\n", " df['Jedność Narodu'] = data_district['Lista nr 10 - KW JEDNOŚĆ NARODU']\n", " else:\n", " df['Jedność Narodu'] = 0\n", " #Dodanie kolumny przechowującej informację o tym, w którym okręgu znajduje się dana komisja\n", " df['obwod'] = i\n", " \n", " #Doklejenie wczytanych dla okręgu i obrobionych danych do głównego zbioru\n", " big_dataset = pd.concat([big_dataset,df])\n", "\n", "print(big_dataset)\n" ] }, { "cell_type": "code", "execution_count": 36, "id": "189c1499", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Kod terytorialny gminyNr obwodu głosowaniaTyp obszaruTyp obwoduSiedziba Obwodowej Komisji WyborczejGminaPowiatWojewództwoKomisja otrzymała kart do głosowaniaLiczba wyborców uprawnionych do głosowania...KonfederacjaWiosnaKoalicja EuropejskaPrawo i SprawiedliwośćLewica RazemKukiz15Polska Fair PlayPOLEXITJedność Naroduobwod
count27285.00000027285.00000027285272852728527285272852728527285.00000027285.000000...27285.00000027285.00000027285.00000027285.00000027285.00000027285.00000027285.00000027285.00000027285.00000027285.000000
uniqueNaNNaN51124676241537216NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
topNaNNaNwieśstałySzkoła Podstawowa Nr 89, os. Piastów 34a, 31-6...m. KrakówWarszawamazowieckieNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
freqNaNNaN136532542674507873689NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
mean166300.61345138.968921NaNNaNNaNNaNNaNNaN848.4025661103.861169...22.76664830.308778192.411032226.9664656.18453418.4557082.7125890.2895360.0810347.594502
std89754.821954113.809837NaNNaNNaNNaNNaNNaN445.941482587.857259...19.42699832.947553180.300466132.4849006.52625414.3811755.1983460.9437850.4519843.636784
min20101.0000001.000000NaNNaNNaNNaNNaNNaN10.0000005.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
25%101004.0000003.000000NaNNaNNaNNaNNaNNaN500.000000654.000000...9.0000007.00000052.000000132.0000002.0000008.0000000.0000000.0000000.0000005.000000
50%146513.0000007.000000NaNNaNNaNNaNNaNNaN811.0000001068.000000...19.00000020.000000138.000000214.0000004.00000016.0000000.0000000.0000000.0000008.000000
75%241710.00000018.000000NaNNaNNaNNaNNaNNaN1170.0000001527.000000...33.00000043.000000285.000000306.0000009.00000026.0000003.0000000.0000000.00000011.000000
max326301.0000001150.000000NaNNaNNaNNaNNaNNaN4200.0000003771.000000...496.000000744.0000001427.0000001366.000000138.000000191.00000061.00000021.0000009.00000013.000000
\n", "

11 rows × 45 columns

\n", "
" ], "text/plain": [ " Kod terytorialny gminy Nr obwodu głosowania Typ obszaru Typ obwodu \\\n", "count 27285.000000 27285.000000 27285 27285 \n", "unique NaN NaN 5 11 \n", "top NaN NaN wieś stały \n", "freq NaN NaN 13653 25426 \n", "mean 166300.613451 38.968921 NaN NaN \n", "std 89754.821954 113.809837 NaN NaN \n", "min 20101.000000 1.000000 NaN NaN \n", "25% 101004.000000 3.000000 NaN NaN \n", "50% 146513.000000 7.000000 NaN NaN \n", "75% 241710.000000 18.000000 NaN NaN \n", "max 326301.000000 1150.000000 NaN NaN \n", "\n", " Siedziba Obwodowej Komisji Wyborczej Gmina \\\n", "count 27285 27285 \n", "unique 24676 2415 \n", "top Szkoła Podstawowa Nr 89, os. Piastów 34a, 31-6... m. Kraków \n", "freq 7 450 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " Powiat Województwo Komisja otrzymała kart do głosowania \\\n", "count 27285 27285 27285.000000 \n", "unique 372 16 NaN \n", "top Warszawa mazowieckie NaN \n", "freq 787 3689 NaN \n", "mean NaN NaN 848.402566 \n", "std NaN NaN 445.941482 \n", "min NaN NaN 10.000000 \n", "25% NaN NaN 500.000000 \n", "50% NaN NaN 811.000000 \n", "75% NaN NaN 1170.000000 \n", "max NaN NaN 4200.000000 \n", "\n", " Liczba wyborców uprawnionych do głosowania ... Konfederacja \\\n", "count 27285.000000 ... 27285.000000 \n", "unique NaN ... NaN \n", "top NaN ... NaN \n", "freq NaN ... NaN \n", "mean 1103.861169 ... 22.766648 \n", "std 587.857259 ... 19.426998 \n", "min 5.000000 ... 0.000000 \n", "25% 654.000000 ... 9.000000 \n", "50% 1068.000000 ... 19.000000 \n", "75% 1527.000000 ... 33.000000 \n", "max 3771.000000 ... 496.000000 \n", "\n", " Wiosna Koalicja Europejska Prawo i Sprawiedliwość \\\n", "count 27285.000000 27285.000000 27285.000000 \n", "unique NaN NaN NaN \n", "top NaN NaN NaN \n", "freq NaN NaN NaN \n", "mean 30.308778 192.411032 226.966465 \n", "std 32.947553 180.300466 132.484900 \n", "min 0.000000 0.000000 0.000000 \n", "25% 7.000000 52.000000 132.000000 \n", "50% 20.000000 138.000000 214.000000 \n", "75% 43.000000 285.000000 306.000000 \n", "max 744.000000 1427.000000 1366.000000 \n", "\n", " Lewica Razem Kukiz15 Polska Fair Play POLEXIT \\\n", "count 27285.000000 27285.000000 27285.000000 27285.000000 \n", "unique NaN NaN NaN NaN \n", "top NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN \n", "mean 6.184534 18.455708 2.712589 0.289536 \n", "std 6.526254 14.381175 5.198346 0.943785 \n", "min 0.000000 0.000000 0.000000 0.000000 \n", "25% 2.000000 8.000000 0.000000 0.000000 \n", "50% 4.000000 16.000000 0.000000 0.000000 \n", "75% 9.000000 26.000000 3.000000 0.000000 \n", "max 138.000000 191.000000 61.000000 21.000000 \n", "\n", " Jedność Narodu obwod \n", "count 27285.000000 27285.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 0.081034 7.594502 \n", "std 0.451984 3.636784 \n", "min 0.000000 1.000000 \n", "25% 0.000000 5.000000 \n", "50% 0.000000 8.000000 \n", "75% 0.000000 11.000000 \n", "max 9.000000 13.000000 \n", "\n", "[11 rows x 45 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Opis głównego zbioru\n", "big_dataset.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 38, "id": "77338a72", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Upewnienie się, że w danych nie ma wartości NaN\n", "big_dataset.isnull().values.any()" ] }, { "cell_type": "code", "execution_count": 65, "id": "e93ca922", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "621188\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAEECAYAAAABJn7JAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXnklEQVR4nO3de5QcZZ3G8e9DEi7hloUMLAsJg+EuSoBZ5LKw3A4bAXF1QWUBBcGIuwiIqKh7OFHPIrqKqwdBIyKggHJXooawSEyAJDKB3AmiJEIUzEBA7iQkv/3jfTtTmXQyPcn05CV5Puf0merq6upfVVc99fZb1T2KCMzMrFwbresCzMxs9RzUZmaFc1CbmRXOQW1mVjgHtZlZ4RzUZmaFa1pQS7pG0kJJsxqc/gOS5kiaLenGZtVlZvZWo2ZdRy3pcOBl4PqI2KebaXcDbgaOiojnJW0XEQubUpiZ2VtM01rUETEBWFQdJ2mYpLGSpkqaKGnP/NDHgO9GxPP5uQ5pM7Osr/uoRwOfjIgDgIuAK/P43YHdJT0gabKkEX1cl5lZsfr31QtJ2gI4BLhFUm30JpU6dgOOAHYCJkraJyJe6Kv6zMxK1WdBTWq9vxARw+s8tgCYHBFLgHmSHiMF90N9WJ+ZWZH6rOsjIl4khfDJAEr2zQ/fCRyZxw8mdYU80Ve1mZmVrJmX590ETAL2kLRA0lnAqcBZkqYDs4H35snvBp6TNAe4D/hMRDzXrNrMzN5KmnZ5npmZ9Q5/M9HMrHBNOZk4ePDgaG1tbcaszczWS1OnTn02IlrqPdaUoG5tbaW9vb0ZszYzWy9J+tOqHnPXh5lZ4RzUZmaFc1CbmRXOQW1mVjgHtZlZ4RzUZmaFc1CbmRXOQW1mVjgHtZlZ4fry96jNitR68S/XdQkNmX/Z8eu6BFtH3KI2Myucg9rMrHANBbWkQZJulTRX0qOSDm52YWZmljTaR/1tYGxEnCRpY2BgE2syM7OKboNa0lbA4cAZABGxGFjc3LLMzKymka6PtwEdwI8kPSLpakmbd51I0khJ7ZLaOzo6er1QM7MNVSNB3R/YH7gqIvYDXgEu7jpRRIyOiLaIaGtpqftPCszMbA00EtQLgAURMSXfv5UU3GZm1ge6DeqIeAZ4StIeedTRwJymVmVmZss1etXHJ4Eb8hUfTwBnNq8kMzOraiioI2Ia0NbcUszMrB5/M9HMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK1+g/tzVbrvXiX67rEhoy/7Lj13UJZr3CLWozs8I5qM3MCuegNjMrnIPazKxwDZ1MlDQfeAlYCrwZEW3NLMrMzDr15KqPIyPi2aZVYmZmdbnrw8yscI0GdQDjJE2VNLKZBZmZ2Yoa7fo4NCL+Imk74B5JcyNiQnWCHOAjAYYOHdrLZZqZbbgaalFHxF/y34XAHcCBdaYZHRFtEdHW0tLSu1WamW3Aug1qSZtL2rI2DBwLzGp2YWZmljTS9bE9cIek2vQ3RsTYplZlZmbLdRvUEfEEsG8f1GJmZnX48jwzs8I5qM3MCuegNjMrnIPazKxwDmozs8I5qM3MCuegNjMrnIPazKxwDmozs8I5qM3MCuegNjMrnIPazKxwDmozs8I5qM3MCuegNjMrnIPazKxwDmozs8I5qM3MCuegNjMrnIPazKxwDmozs8I5qM3MCuegNjMrnIPazKxwDmozs8I1HNSS+kl6RNKYZhZkZmYr6kmL+nzg0WYVYmZm9TUU1JJ2Ao4Hrm5uOWZm1lWjLer/BT4LLFvVBJJGSmqX1N7R0dEbtZmZGQ0EtaQTgIURMXV100XE6Ihoi4i2lpaWXivQzGxD10iL+lDgREnzgZ8CR0n6SVOrMjOz5boN6oj4fETsFBGtwIeA30TEaU2vzMzMAF9HbWZWvP49mTgixgPjm1KJmZnV5Ra1mVnhHNRmZoVzUJuZFc5BbWZWOAe1mVnhHNRmZoVzUJuZFc5BbWZWOAe1mVnhHNRmZoVzUJuZFc5BbWZWOAe1mVnhHNRmZoVzUJuZFc5BbWZWOAe1mVnhHNRmZoVzUJuZFc5BbWZWOAe1mVnhHNRmZoVzUJuZFc5BbWZWuG6DWtKmkn4nabqk2ZK+1BeFmZlZ0r+Bad4AjoqIlyUNAO6X9OuImNzk2szMjAaCOiICeDnfHZBv0cyizMysU0N91JL6SZoGLATuiYgpdaYZKaldUntHR0cvl2lmtuFqKKgjYmlEDAd2Ag6UtE+daUZHRFtEtLW0tPRymWZmG64eXfURES8A44ERzSjGzMxW1shVHy2SBuXhzYBjgLlNrsvMzLJGrvrYAbhOUj9SsN8cEWOaW5aZmdU0ctXHDGC/PqjFzMzq8DcTzcwK56A2Myucg9rMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK56A2Myucg9rMrHAOajOzwjmozcwK121QSxoi6T5Jj0qaLen8vijMzMyS/g1M8ybw6Yh4WNKWwFRJ90TEnCbXZmZmNNCijoinI+LhPPwS8CiwY7MLMzOzpEd91JJagf2AKXUeGympXVJ7R0dHL5VnZmYNB7WkLYDbgAsi4sWuj0fE6Ihoi4i2lpaW3qzRzGyD1lBQSxpACukbIuL25pZkZmZVjVz1IeCHwKMRcXnzSzIzs6pGWtSHAqcDR0malm/HNbkuMzPLur08LyLuB9QHtZiZWR3+ZqKZWeEc1GZmhXNQm5kVzkFtZlY4B7WZWeEc1GZmhXNQm5kVzkFtZlY4B7WZWeEc1GZmhXNQm5kVzkFtZlY4B7WZWeEc1GZmhXNQm5kVzkFtZlY4B7WZWeEc1GZmhXNQm5kVzkFtZlY4B7WZWeEc1GZmhXNQm5kVzkFtZla4boNa0jWSFkqa1RcFmZnZihppUV8LjGhyHWZmtgrdBnVETAAW9UEtZmZWR6/1UUsaKaldUntHR0dvzdbMbIPXa0EdEaMjoi0i2lpaWnprtmZmGzxf9WFmVjgHtZlZ4Rq5PO8mYBKwh6QFks5qfllmZlbTv7sJIuKUvijEzMzq6zaobe20XvzLdV1CQ+Zfdvy6LsHMVsF91GZmhXNQm5kVzkFtZlY4B7WZWeEc1GZmhXNQm5kVrrjL83w5m5nZityiNjMrnIPazKxwDmozs8IV10dtZmvP53rWL25Rm5kVzkFtZlY4B7WZWeEc1GZmhXNQm5kVzkFtZlY4B7WZWeEc1GZmhXNQm5kVzkFtZlY4f4XczIq3oX8l3i1qM7PCOajNzArXUFBLGiHpMUl/kHRxs4syM7NO3Qa1pH7Ad4F3A3sDp0jau9mFmZlZ0kiL+kDgDxHxREQsBn4KvLe5ZZmZWY0iYvUTSCcBIyLi7Hz/dOBdEXFul+lGAiPz3T2Ax3q/3DU2GHh2XRfRi9a35YH1b5nWt+WB9W+ZSluenSOipd4DjVyepzrjVkr3iBgNjO5hYX1CUntEtK3rOnrL+rY8sP4t0/q2PLD+LdNbaXka6fpYAAyp3N8J+EtzyjEzs64aCeqHgN0k7SJpY+BDwC+aW5aZmdV02/UREW9KOhe4G+gHXBMRs5teWe8qsktmLaxvywPr3zKtb8sD698yvWWWp9uTiWZmtm75m4lmZoVzUJuZFa5PglrSy5Xh4yQ9LmnoGsxnT0nTJD0iaViDzzlD0hU9fa0e1vVlScc0MN23JF1QuX+3pKsr978p6ZI1/Zp+L67n+ZIG5+EHu5m2oWWvTD9K0p/z+1i7Deppjb2hu9olXZu/R/CW1dNtQtJ4SW2V+++X9D1Jv6q+T5I+KKm1cv+/JT1Vfb08/gxJHZX3+uyu06zl8p3YGz9rkZf7MUnTJT0kaXgvlNd7IqLpN+Dl/Pdo4I/AsDWcz8XAl3r4nDOAK3owff8mroeTgZvz8EbAVGBS5fFJpC8Trev1PB8Y3KR1MApYBkwDZgG3AAN78Px+vVTHl4Fj6ow/CJiS63uh9n71xQ24Gti7B9MfAYzJw8u3c+Ac4MNrsk0A44G2PDwY+DmwcZdpPgx8us5626H2epXxK+1/Xacp4dZluc8E7lnXNVVvfdb1Iekw4AfA8RHxxzzuQkmz8u2CPK5V0qOSfiBptqRxkjaTdBxwAXC2pPvytKdJ+l0+Un8//y4Jks6U9HtJvwUOrdTQIum2fMR8SNKhefwoSaMljQOuzzVMlPRwvh1SmcdnJc3MR97L8rjlLa/cIn4oL9NoSdUvDD0A1Ob1dlJQvSTp7yRtAuwF7Fv7BCBpZ0n3SpqR/w7N40/O858uaUIedwawqaQpwFjgvsp6fkDSa5JelzS2six3Spqa13PtW6Vd37dqi2xtlr1qSUQMj4h9gMXAOap88pHUT9IYSUfUasit3ynAwavZbuZKui6vr1slDcyPHSDpt3lZ75a0Q0RcApxWqf0ySXNIO+zciBhOCqkH8+Nfycu6kaSrJLXn9falyvrpt4rlbUhEnB0Rc9ZmHnk+34uI6yt1rbDv5XU1q/L4RZJGVechaSPgm8DMiFis/ClL0jnAhcDpkubV9sWImBwRT69pzZKGSRqb36OJSp+e+0l6QskgScskHZ6nnyhp1y7bzfaS7sjb5/TaftvIdt7FJGDH/Nwt8r73cN7235vHn6POTwnL14OkYyVNytPfImmLPH6+pEvzY+2S9s/b4h/zOl29PjpaLQEWAe+sjDsAmAlsDmwBzAb2A1qBN4HhebqbgdPy8Cjgojy8F3AXMCDfv5J0pN8BeBJoATYmhWOtpXEj8E95eCjwaGW+U4HN8v2BwKZ5eDegPQ+/m7TjDsz3t8l/rwVOqo7Lwz8G3tNlXczPr/1xUsvnK8BxpAPKBFZsGd0FfCQPfxS4Mw/PBHbMw4Ois+USwPNAG/An0heVDgDm5PW8FfAK8IEu9W9GOmhsW6lxcFRaP72x7JV1XWtRTyP91MCVwGWkL1fdmOsdA9yf35cAvp+f/zmgIy/PZ0hBvx9weJ7uUFLr8TngaeBHpB2vJT//g8A1ue77gJOAbXIdyutv1+qyAV8H2vMy/YbUMv0Y6XLVacDvanXn592Z654NjMzjPgBcnofPB57Iw8OA++u06o7NdT9M+tSxRR4/Apib1813qN+iHgVcBGyX18mivBxB2vZagTdI2/nOwOPAX4F782selOf/V2A6abucn+f3DdL2NwP4Aytv3/Va1E/n6W8lbZMrtajza++Wh98F/CYPjyU1ak4gfafji8AmwLw6y/0z4II83A/YenXbeZfXr677C4BL83B/YKs8PDgvsyrPGwBMBN6TH58AbF7ZVi+p7FOfyMPfyutjS1JOLewuQ/vqP7wsIe3kZ5E2UoB/Au6IiFcAJN0OHEb6Ms28iJiWp5tK2rC6OpoUQg/lhttmwELSmzw+IjryfH8G7J6fcwywd6Wht5WkLfPwLyLitTw8ALhCqZ9qaZfn/ygiXgWIiEV16jpS0mdJO8E2pJ31rsrjtVb1IcDlpCP3IcDf8jqqOhh4fx7+MSkwavO4VtLNwO2V6Zfmx04nhd3OeR3NI21Mtff7ONIB8DxJ78vjhpAOSs/VWabeWvaaJRExXFJ/4DbSjr8zsD3wxYiYJ2kM8LWIuEvSm8ChkrYlvTeLI+IVSe8i/VbDCcCrwIuk7eVx4FN53W1LCvJ78vvejxQc1W/Xvgi8Tup6GEvapsbnei4BJpMOHO8jhdh/kH5R8lPAPwCbAh+MiHl5fh+NiEWSNsvzuo20A38mP34Y8JykHUn7wcTqylE6P/BfpK6ZVyR9DrhQ0tdJLeOjSIHxszrrdrmIWCgpSAeS/yQdbA4jhcbSiHg1t0bbSQeqJ0nb2Pfzsu8fEX9W6pueRmoI7ZLX53eAlyKi3vtbdRdwU0S8kVuO13WdILc6DwFuqeybm+S/E0kH4V2Ar5IOkL8lhXZXR+UaiYilpH0KGt/Ob5C0OWkb2b9WHnBpbskvI+2v2wPP5Me/TTqo3CXpBNIvjD6Ql2Nj0oGvpvZFwZmkA+9LpE/Ur0saFBEv1KkJ6LurPpaRWhT/KOkLedyqPhZDOtrXLKX+F3MEXBfpI/TwiNgjIkblx1Z1cfhGwMGV5+yYVxaklmbNp0itiX1JrdONK6+5ygvPJW1Kah2eFBHvIO1Um3aZ7EHSRvkO0tF9MimQDyGF7OoEQEScQ9qRhwDTcoBBZT2TNuz+pKA6GDg6It5J2sH7K3UrHJPXx77AI3VqXWHxemHZawZImkYKiCeBH5Le54WVsNsU+DdJ0/Nr13awl4GN8wF2CGkdDsvL/DrpB8HmkVrnQdo5Xqm85++IiGOrxUTEm6RfibyN1KiYC4wD3kZq8RxAagj8HPh74BOkA+QXSO/fvErdkIJhen5sCKml+AywRaXuG0kBdBhdgpp0MKjt8NOAj5AOZHvm13o8UtPsJ6tYv1VLge+RtrfH8mvuTzqwQdo2Hs3DPwa2Jm2jb5K6AWufHAD+Oc/rtFzP57t78Yh4LiJq+/MPSOuyq42AFyrv0fCI2Cs/NpG0jg4EfgUMIvXNT+jutQF6uJ2fStpvbiQdiGvjWoADInWH/bX2fKXuxp2BWveXSH3btWXYOyLOqsy/th6WsWLGLaObLx/2WR91bomdAJwq6SzSiv5XSQPzUex9rLzBrs69wEmStgOQtI2knUkngo6QtK2kAaQTeDXjgOW/+qdVn9ndGng6IpaRWqe1DXUc8FF19n1u0+V5tQ3g2dxKqHfFwAOk9bAoIpbmlukg0g4zqcu0D5K+sg9pg7k/v+6wiJgSqZ/1WSq/xVJZzzuSWs6zSSGzWNIupI+Rv8/L+HxuVe1JCofV6Y1lr1lS2Zg/Gennc58BNlPqAx6S18cB+e9rdO5gE0jvx0hSF8TbSC34/UkfzffNr3FKXl9PAZtIOjjXPUDS26vF5Hq3johfkT727hERV5G+jbslcAXw76TWfK37aHEePpDKTtdNMEwinah6jM4AOpiVD9Cr2+FXebBchaW5xvmklvAxpNAeUDnAV//RX5AOnJeTGihDSa3pjXJdu5K6VU7L+8dqSdqhcvdEOg8KnS8Y8SIwT9LJ+TmSVHsfp5AaMcsi4vVcy8epnxX3kg6itfMcW9HD7TwilpAaQQdJ2is/f2FELJF0JCmYkXRAnfUwmfTJb9c8zUBJu6/0ImugT6+jzqE0gs7W4LWkj2VTgKsj4pEezGtOns84STOAe4AdIp3QGEXaKf6P1MdXcx7QpnSyaQ6pj7ieK4GPSJpM6vZ4Jb/mWFILrT23dC5asaR4gdRqmEnqp6z38WwmqS9rcpdxf4uIrj+5eB5wZl6+0+nsNvqffGJjFim4pndZN4tI6/R0UitzJumj3gxScD9J+ojfP8/7K13qWUkvLXvNAK14eV5rft6r+e83gCdIH61fJQXEQfmFHiYF6FdJLbzvkoJnMSkEDsvTDgGuIgX2lcDXcit3Gp0ndCEF05bAmLwuHiZ9ooIUxMuA6/P4c0l9/3PyfD9MatFXrS4YJuT1NoEU4EcCb0TE37rMY1U7/FxgF3VemnrKKtZv1VJSC3guad/bgdQoupS0jVRbd6fS2V3wc1J//K6kxkB/Up/+50jdWvflbfDqXOPXJS0ABkpaoM6Tk+cpncSbTtqez6hMU7tdmF/7rDzdbPJv3ufW+FN0bp8TSe/XzDrLej6p+20mqQvs7fRwO8+v+RrpROpFwA2kzGjPNc7Nk51bWQ/TJF2du1vPAG7KrzeZ9Clo7XXXie1bQydL7wKOXNd1vFWWnfonk44gnxjL9zcBfk06uNxCOtlzRH5sGClgd8/3x5FOGs7K948mBeFM0onDTeq83rWkrpcju4z/KekTx7T8+L/k8aNIvw1xL6kP/GO9VPd3Ks8dT+cJraNIB7sZ+XZiHl89mXgZqzmZWJnvk3Se1PwCMKPyWCspkGfkZRuax9+e198sUj+sSGF9OelANR04d11vfxvKzb/1sZYkXUP6ODQi0semDUZJy55b5WMiXfLXyPQ9qj23EF+OiG+sTZ1ma8JBbdYAB7WtSw5qM7PC+UeZzMwK56A2Myucg9rMrHAOajOzwjmozcwK9/+Pw71Kcn1vjgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#Prosta wizualizacja ogólnopolskich wyników głównych komitetów\n", "import matplotlib.pyplot as plt\n", "\n", "cols = ['Konfederacja', 'Wiosna','Koalicja Europejska', 'Prawo i Sprawiedliwość', 'Kukiz15', 'Lewica Razem']\n", "plt.bar(cols,big_dataset[cols].sum())" ] }, { "cell_type": "code", "execution_count": 41, "id": "00a7eccc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: sklearn in c:\\users\\ggap9\\anaconda31\\lib\\site-packages (0.0.post1)\n" ] } ], "source": [ "!pip install --user sklearn" ] }, { "cell_type": "code", "execution_count": 43, "id": "ae16e335", "metadata": {}, "outputs": [], "source": [ "import sklearn" ] }, { "cell_type": "code", "execution_count": 50, "id": "c231960b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10 3106\n", "12 2572\n", "11 2446\n", "7 1968\n", "3 1860\n", "5 1847\n", "8 1691\n", "13 1634\n", "9 1623\n", "6 1574\n", "2 1498\n", "4 1493\n", "1 1273\n", "Name: obwod, dtype: int64" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Wyodrębnienie z danych zbioru testowego\n", "from sklearn.model_selection import train_test_split\n", "data_train, data_test = sklearn.model_selection.train_test_split(big_dataset, test_size=2700, random_state=1)\n", "data_train['obwod'].value_counts()" ] }, { "cell_type": "code", "execution_count": 51, "id": "5a17d64f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11 311\n", "10 298\n", "12 256\n", "7 224\n", "3 214\n", "8 211\n", "13 207\n", "9 187\n", "5 185\n", "4 164\n", "6 161\n", "2 159\n", "1 123\n", "Name: obwod, dtype: int64" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Prezentacja ilości komisji z poszczególnych okręgów wyborczych w zbiorze testowym\n", "data_test[\"obwod\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 52, "id": "c3ad7a38", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10 347\n", "12 281\n", "11 262\n", "7 220\n", "5 218\n", "9 195\n", "3 194\n", "2 193\n", "4 173\n", "8 167\n", "6 166\n", "13 146\n", "1 138\n", "Name: obwod, dtype: int64" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Wyodrębnienie zbioru walidacyjnego z pozostałego zbioru treningowego\n", "data_train, data_val = sklearn.model_selection.train_test_split(data_train, test_size=2700, random_state=1)\n", "data_val['obwod'].value_counts()" ] }, { "cell_type": "code", "execution_count": 53, "id": "da767057", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10 2759\n", "12 2291\n", "11 2184\n", "7 1748\n", "3 1666\n", "5 1629\n", "8 1524\n", "13 1488\n", "9 1428\n", "6 1408\n", "4 1320\n", "2 1305\n", "1 1135\n", "Name: obwod, dtype: int64" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_train['obwod'].value_counts()" ] }, { "cell_type": "code", "execution_count": 54, "id": "d72548ef", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Kod terytorialny gminyNr obwodu głosowaniaTyp obszaruTyp obwoduSiedziba Obwodowej Komisji WyborczejGminaPowiatWojewództwoKomisja otrzymała kart do głosowaniaLiczba wyborców uprawnionych do głosowania...KonfederacjaWiosnaKoalicja EuropejskaPrawo i SprawiedliwośćLewica RazemKukiz15Polska Fair PlayPOLEXITJedność Naroduobwod
count21885.00000021885.00000021885218852188521885218852188521885.00000021885.000000...21885.00000021885.00000021885.00000021885.00000021885.0000021885.00000021885.00000021885.00000021885.00000021885.000000
uniqueNaNNaN51120141241037216NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
topNaNNaNwieśstałySzkoła Podstawowa Nr 89, os. Piastów 34a, 31-6...m. KrakówWarszawamazowieckieNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
freqNaNNaN109982042763706292949NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
mean166256.48110638.850171NaNNaNNaNNaNNaNNaN847.2960021102.516655...22.74882330.289331192.186201226.6012346.1812218.3830932.6955910.2977380.0813347.600914
std89750.888879113.301763NaNNaNNaNNaNNaNNaN445.264456586.052675...19.60105032.984209179.846822131.9008466.5505214.2526655.1644760.9659230.4548343.639637
min20101.0000001.000000NaNNaNNaNNaNNaNNaN10.0000005.000000...0.0000000.0000000.0000000.0000000.000000.0000000.0000000.0000000.0000001.000000
25%101009.0000003.000000NaNNaNNaNNaNNaNNaN500.000000653.000000...9.0000007.00000052.000000132.0000002.000008.0000000.0000000.0000000.0000005.000000
50%146512.0000007.000000NaNNaNNaNNaNNaNNaN811.0000001069.000000...19.00000020.000000138.000000214.0000004.0000015.0000000.0000000.0000000.0000008.000000
75%241707.00000018.000000NaNNaNNaNNaNNaNNaN1165.0000001523.000000...33.00000043.000000286.000000305.0000009.0000026.0000003.0000000.0000000.00000011.000000
max326301.0000001150.000000NaNNaNNaNNaNNaNNaN4200.0000003550.000000...496.000000744.0000001427.0000001366.000000138.00000175.00000061.00000021.0000009.00000013.000000
\n", "

11 rows × 45 columns

\n", "
" ], "text/plain": [ " Kod terytorialny gminy Nr obwodu głosowania Typ obszaru Typ obwodu \\\n", "count 21885.000000 21885.000000 21885 21885 \n", "unique NaN NaN 5 11 \n", "top NaN NaN wieś stały \n", "freq NaN NaN 10998 20427 \n", "mean 166256.481106 38.850171 NaN NaN \n", "std 89750.888879 113.301763 NaN NaN \n", "min 20101.000000 1.000000 NaN NaN \n", "25% 101009.000000 3.000000 NaN NaN \n", "50% 146512.000000 7.000000 NaN NaN \n", "75% 241707.000000 18.000000 NaN NaN \n", "max 326301.000000 1150.000000 NaN NaN \n", "\n", " Siedziba Obwodowej Komisji Wyborczej Gmina \\\n", "count 21885 21885 \n", "unique 20141 2410 \n", "top Szkoła Podstawowa Nr 89, os. Piastów 34a, 31-6... m. Kraków \n", "freq 6 370 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " Powiat Województwo Komisja otrzymała kart do głosowania \\\n", "count 21885 21885 21885.000000 \n", "unique 372 16 NaN \n", "top Warszawa mazowieckie NaN \n", "freq 629 2949 NaN \n", "mean NaN NaN 847.296002 \n", "std NaN NaN 445.264456 \n", "min NaN NaN 10.000000 \n", "25% NaN NaN 500.000000 \n", "50% NaN NaN 811.000000 \n", "75% NaN NaN 1165.000000 \n", "max NaN NaN 4200.000000 \n", "\n", " Liczba wyborców uprawnionych do głosowania ... Konfederacja \\\n", "count 21885.000000 ... 21885.000000 \n", "unique NaN ... NaN \n", "top NaN ... NaN \n", "freq NaN ... NaN \n", "mean 1102.516655 ... 22.748823 \n", "std 586.052675 ... 19.601050 \n", "min 5.000000 ... 0.000000 \n", "25% 653.000000 ... 9.000000 \n", "50% 1069.000000 ... 19.000000 \n", "75% 1523.000000 ... 33.000000 \n", "max 3550.000000 ... 496.000000 \n", "\n", " Wiosna Koalicja Europejska Prawo i Sprawiedliwość \\\n", "count 21885.000000 21885.000000 21885.000000 \n", "unique NaN NaN NaN \n", "top NaN NaN NaN \n", "freq NaN NaN NaN \n", "mean 30.289331 192.186201 226.601234 \n", "std 32.984209 179.846822 131.900846 \n", "min 0.000000 0.000000 0.000000 \n", "25% 7.000000 52.000000 132.000000 \n", "50% 20.000000 138.000000 214.000000 \n", "75% 43.000000 286.000000 305.000000 \n", "max 744.000000 1427.000000 1366.000000 \n", "\n", " Lewica Razem Kukiz15 Polska Fair Play POLEXIT \\\n", "count 21885.00000 21885.000000 21885.000000 21885.000000 \n", "unique NaN NaN NaN NaN \n", "top NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN \n", "mean 6.18122 18.383093 2.695591 0.297738 \n", "std 6.55052 14.252665 5.164476 0.965923 \n", "min 0.00000 0.000000 0.000000 0.000000 \n", "25% 2.00000 8.000000 0.000000 0.000000 \n", "50% 4.00000 15.000000 0.000000 0.000000 \n", "75% 9.00000 26.000000 3.000000 0.000000 \n", "max 138.00000 175.000000 61.000000 21.000000 \n", "\n", " Jedność Narodu obwod \n", "count 21885.000000 21885.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 0.081334 7.600914 \n", "std 0.454834 3.639637 \n", "min 0.000000 1.000000 \n", "25% 0.000000 5.000000 \n", "50% 0.000000 8.000000 \n", "75% 0.000000 11.000000 \n", "max 9.000000 13.000000 \n", "\n", "[11 rows x 45 columns]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Opis finalnego zbioru treningowego\n", "data_train.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 55, "id": "5755bc2e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Kod terytorialny gminyNr obwodu głosowaniaTyp obszaruTyp obwoduSiedziba Obwodowej Komisji WyborczejGminaPowiatWojewództwoKomisja otrzymała kart do głosowaniaLiczba wyborców uprawnionych do głosowania...KonfederacjaWiosnaKoalicja EuropejskaPrawo i SprawiedliwośćLewica RazemKukiz15Polska Fair PlayPOLEXITJedność Naroduobwod
count2700.0000002700.0000002700270027002700270027002700.0000002700.000000...2700.0000002700.000002700.0000002700.0000002700.0000002700.0000002700.0000002700.0000002700.0000002700.000000
uniqueNaNNaN492663134036416NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
topNaNNaNwieśstałySzkoła Podstawowa Nr 2 z Oddziałami Sportowymi...m. KrakówWarszawamazowieckieNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
freqNaNNaN1358249734383391NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
mean163242.94444441.382222NaNNaNNaNNaNNaNNaN852.1640741106.854444...22.94037030.24037192.878889230.4692596.09000018.7703702.8037040.2696300.0907417.458148
std88874.631900120.214646NaNNaNNaNNaNNaNNaN447.375340593.650865...19.14689832.97802183.694451137.5815956.32634614.3730495.3585060.8499310.4772593.625088
min20101.0000001.000000NaNNaNNaNNaNNaNNaN20.00000010.000000...0.0000000.000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
25%100704.7500003.000000NaNNaNNaNNaNNaNNaN500.000000653.000000...9.0000007.0000050.000000130.0000002.0000008.0000000.0000000.0000000.0000004.000000
50%146506.0000007.000000NaNNaNNaNNaNNaNNaN805.5000001059.000000...19.00000019.00000133.000000216.0000004.00000016.0000000.0000000.0000000.0000008.000000
75%241130.50000019.000000NaNNaNNaNNaNNaNNaN1188.5000001545.000000...33.00000042.00000285.000000313.0000008.00000027.0000004.0000000.0000000.00000011.000000
max326301.0000001142.000000NaNNaNNaNNaNNaNNaN2619.0000003771.000000...186.000000310.000001159.0000001087.00000072.000000123.00000060.0000007.0000007.00000013.000000
\n", "

11 rows × 45 columns

\n", "
" ], "text/plain": [ " Kod terytorialny gminy Nr obwodu głosowania Typ obszaru Typ obwodu \\\n", "count 2700.000000 2700.000000 2700 2700 \n", "unique NaN NaN 4 9 \n", "top NaN NaN wieś stały \n", "freq NaN NaN 1358 2497 \n", "mean 163242.944444 41.382222 NaN NaN \n", "std 88874.631900 120.214646 NaN NaN \n", "min 20101.000000 1.000000 NaN NaN \n", "25% 100704.750000 3.000000 NaN NaN \n", "50% 146506.000000 7.000000 NaN NaN \n", "75% 241130.500000 19.000000 NaN NaN \n", "max 326301.000000 1142.000000 NaN NaN \n", "\n", " Siedziba Obwodowej Komisji Wyborczej Gmina \\\n", "count 2700 2700 \n", "unique 2663 1340 \n", "top Szkoła Podstawowa Nr 2 z Oddziałami Sportowymi... m. Kraków \n", "freq 3 43 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " Powiat Województwo Komisja otrzymała kart do głosowania \\\n", "count 2700 2700 2700.000000 \n", "unique 364 16 NaN \n", "top Warszawa mazowieckie NaN \n", "freq 83 391 NaN \n", "mean NaN NaN 852.164074 \n", "std NaN NaN 447.375340 \n", "min NaN NaN 20.000000 \n", "25% NaN NaN 500.000000 \n", "50% NaN NaN 805.500000 \n", "75% NaN NaN 1188.500000 \n", "max NaN NaN 2619.000000 \n", "\n", " Liczba wyborców uprawnionych do głosowania ... Konfederacja \\\n", "count 2700.000000 ... 2700.000000 \n", "unique NaN ... NaN \n", "top NaN ... NaN \n", "freq NaN ... NaN \n", "mean 1106.854444 ... 22.940370 \n", "std 593.650865 ... 19.146898 \n", "min 10.000000 ... 0.000000 \n", "25% 653.000000 ... 9.000000 \n", "50% 1059.000000 ... 19.000000 \n", "75% 1545.000000 ... 33.000000 \n", "max 3771.000000 ... 186.000000 \n", "\n", " Wiosna Koalicja Europejska Prawo i Sprawiedliwość Lewica Razem \\\n", "count 2700.00000 2700.000000 2700.000000 2700.000000 \n", "unique NaN NaN NaN NaN \n", "top NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN \n", "mean 30.24037 192.878889 230.469259 6.090000 \n", "std 32.97802 183.694451 137.581595 6.326346 \n", "min 0.00000 0.000000 0.000000 0.000000 \n", "25% 7.00000 50.000000 130.000000 2.000000 \n", "50% 19.00000 133.000000 216.000000 4.000000 \n", "75% 42.00000 285.000000 313.000000 8.000000 \n", "max 310.00000 1159.000000 1087.000000 72.000000 \n", "\n", " Kukiz15 Polska Fair Play POLEXIT Jedność Narodu \\\n", "count 2700.000000 2700.000000 2700.000000 2700.000000 \n", "unique NaN NaN NaN NaN \n", "top NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN \n", "mean 18.770370 2.803704 0.269630 0.090741 \n", "std 14.373049 5.358506 0.849931 0.477259 \n", "min 0.000000 0.000000 0.000000 0.000000 \n", "25% 8.000000 0.000000 0.000000 0.000000 \n", "50% 16.000000 0.000000 0.000000 0.000000 \n", "75% 27.000000 4.000000 0.000000 0.000000 \n", "max 123.000000 60.000000 7.000000 7.000000 \n", "\n", " obwod \n", "count 2700.000000 \n", "unique NaN \n", "top NaN \n", "freq NaN \n", "mean 7.458148 \n", "std 3.625088 \n", "min 1.000000 \n", "25% 4.000000 \n", "50% 8.000000 \n", "75% 11.000000 \n", "max 13.000000 \n", "\n", "[11 rows x 45 columns]" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Opis zbioru walidacyjnego\n", "data_val.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 56, "id": "53f83757", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Kod terytorialny gminyNr obwodu głosowaniaTyp obszaruTyp obwoduSiedziba Obwodowej Komisji WyborczejGminaPowiatWojewództwoKomisja otrzymała kart do głosowaniaLiczba wyborców uprawnionych do głosowania...KonfederacjaWiosnaKoalicja EuropejskaPrawo i SprawiedliwośćLewica RazemKukiz15Polska Fair PlayPOLEXITJedność Naroduobwod
count2700.0000002700.0000002700270027002700270027002700.0000002700.000000...2700.0000002700.0000002700.0000002700.0000002700.0000002700.0000002700.0000002700.0000002700.0000002700.000000
uniqueNaNNaN492655133336616NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
topNaNNaNmiastostałySzkoła Podstawowa Nr 36, ul. Siemiradzkiego 9,...m. KrakówWarszawamazowieckieNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
freqNaNNaN1373250233775349NaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
mean169715.99963037.518148NaNNaNNaNNaNNaNNaN853.6103701111.765926...22.73740730.534815193.765556226.4240746.30592618.7296302.7592590.2429630.0688897.678889
std90574.898136111.314638NaNNaNNaNNaNNaNNaN450.079208596.722257...18.25871932.629471180.607844132.0120746.52658115.3890955.3089620.8448820.3996973.623060
min20101.0000001.000000NaNNaNNaNNaNNaNNaN13.00000013.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
25%100978.5000003.000000NaNNaNNaNNaNNaNNaN503.750000662.750000...9.0000007.00000053.000000131.0000002.0000008.0000000.0000000.0000000.0000005.000000
50%160302.5000007.000000NaNNaNNaNNaNNaNNaN820.0000001072.500000...19.00000021.000000143.000000215.0000005.00000016.0000000.0000000.0000000.0000008.000000
75%246601.00000019.000000NaNNaNNaNNaNNaNNaN1180.0000001541.250000...33.00000043.000000281.000000304.0000009.00000026.0000004.0000000.0000000.00000011.000000
max326301.0000001145.000000NaNNaNNaNNaNNaNNaN2776.0000003208.000000...276.000000318.0000001289.000000996.00000056.000000191.00000060.0000008.0000005.00000013.000000
\n", "

11 rows × 45 columns

\n", "
" ], "text/plain": [ " Kod terytorialny gminy Nr obwodu głosowania Typ obszaru Typ obwodu \\\n", "count 2700.000000 2700.000000 2700 2700 \n", "unique NaN NaN 4 9 \n", "top NaN NaN miasto stały \n", "freq NaN NaN 1373 2502 \n", "mean 169715.999630 37.518148 NaN NaN \n", "std 90574.898136 111.314638 NaN NaN \n", "min 20101.000000 1.000000 NaN NaN \n", "25% 100978.500000 3.000000 NaN NaN \n", "50% 160302.500000 7.000000 NaN NaN \n", "75% 246601.000000 19.000000 NaN NaN \n", "max 326301.000000 1145.000000 NaN NaN \n", "\n", " Siedziba Obwodowej Komisji Wyborczej Gmina \\\n", "count 2700 2700 \n", "unique 2655 1333 \n", "top Szkoła Podstawowa Nr 36, ul. Siemiradzkiego 9,... m. Kraków \n", "freq 3 37 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " Powiat Województwo Komisja otrzymała kart do głosowania \\\n", "count 2700 2700 2700.000000 \n", "unique 366 16 NaN \n", "top Warszawa mazowieckie NaN \n", "freq 75 349 NaN \n", "mean NaN NaN 853.610370 \n", "std NaN NaN 450.079208 \n", "min NaN NaN 13.000000 \n", "25% NaN NaN 503.750000 \n", "50% NaN NaN 820.000000 \n", "75% NaN NaN 1180.000000 \n", "max NaN NaN 2776.000000 \n", "\n", " Liczba wyborców uprawnionych do głosowania ... Konfederacja \\\n", "count 2700.000000 ... 2700.000000 \n", "unique NaN ... NaN \n", "top NaN ... NaN \n", "freq NaN ... NaN \n", "mean 1111.765926 ... 22.737407 \n", "std 596.722257 ... 18.258719 \n", "min 13.000000 ... 0.000000 \n", "25% 662.750000 ... 9.000000 \n", "50% 1072.500000 ... 19.000000 \n", "75% 1541.250000 ... 33.000000 \n", "max 3208.000000 ... 276.000000 \n", "\n", " Wiosna Koalicja Europejska Prawo i Sprawiedliwość \\\n", "count 2700.000000 2700.000000 2700.000000 \n", "unique NaN NaN NaN \n", "top NaN NaN NaN \n", "freq NaN NaN NaN \n", "mean 30.534815 193.765556 226.424074 \n", "std 32.629471 180.607844 132.012074 \n", "min 0.000000 0.000000 0.000000 \n", "25% 7.000000 53.000000 131.000000 \n", "50% 21.000000 143.000000 215.000000 \n", "75% 43.000000 281.000000 304.000000 \n", "max 318.000000 1289.000000 996.000000 \n", "\n", " Lewica Razem Kukiz15 Polska Fair Play POLEXIT \\\n", "count 2700.000000 2700.000000 2700.000000 2700.000000 \n", "unique NaN NaN NaN NaN \n", "top NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN \n", "mean 6.305926 18.729630 2.759259 0.242963 \n", "std 6.526581 15.389095 5.308962 0.844882 \n", "min 0.000000 0.000000 0.000000 0.000000 \n", "25% 2.000000 8.000000 0.000000 0.000000 \n", "50% 5.000000 16.000000 0.000000 0.000000 \n", "75% 9.000000 26.000000 4.000000 0.000000 \n", "max 56.000000 191.000000 60.000000 8.000000 \n", "\n", " Jedność Narodu obwod \n", "count 2700.000000 2700.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 0.068889 7.678889 \n", "std 0.399697 3.623060 \n", "min 0.000000 1.000000 \n", "25% 0.000000 5.000000 \n", "50% 0.000000 8.000000 \n", "75% 0.000000 11.000000 \n", "max 5.000000 13.000000 \n", "\n", "[11 rows x 45 columns]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Opis zbioru testowego\n", "data_test.describe(include='all')" ] }, { "cell_type": "code", "execution_count": null, "id": "6a0d0c8f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }