{ "cells": [ { "cell_type": "code", "execution_count": 95, "id": "blocked-battle", "metadata": {}, "outputs": [], "source": [ "# !pip install kaggle\n", "# !pip install pandas" ] }, { "cell_type": "code", "execution_count": 96, "id": "civic-martin", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading covid-world-vaccination-progress.zip to E:\\Na studia\\Magisterka\\Inżynieria uczenia maszynowego\\IUM_434804\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", " 0%| | 0.00/160k [00:00, ?B/s]\n", "100%|##########| 160k/160k [00:00<00:00, 1.20MB/s]\n", "100%|##########| 160k/160k [00:00<00:00, 1.19MB/s]\n" ] } ], "source": [ "# !kaggle datasets download -d gpreda/covid-world-vaccination-progress" ] }, { "cell_type": "code", "execution_count": 97, "id": "minus-belly", "metadata": {}, "outputs": [], "source": [ "import zipfile\n", "with zipfile.ZipFile('covid-world-vaccination-progress.zip', 'r') as zip_ref:\n", " zip_ref.extractall(\".\") " ] }, { "cell_type": "code", "execution_count": 108, "id": "norman-british", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "df = pd.read_csv('country_vaccinations.csv')\n", "# podział danych na train/validate/test (6:2:2) za pomocą biblioteki numpy i pandas\n", "train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])" ] }, { "cell_type": "code", "execution_count": 99, "id": "twenty-wednesday", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Whole set size 110055\n", "Train set size: 66030\n", "Validate set size: 22005\n", "Test set size: 22020\n" ] } ], "source": [ "# Wypisanie ilości elementów w poszczególnych ramkach danych\n", "print(\"Whole set size\".ljust(20), df.size)\n", "print(\"Train set size: \".ljust(20), train.size)\n", "print(\"Validate set size: \".ljust(20), validate.size)\n", "print(\"Test set size: \".ljust(20), test.size)" ] }, { "cell_type": "code", "execution_count": 100, "id": "sustained-active", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | country | \n", "iso_code | \n", "date | \n", "total_vaccinations | \n", "people_vaccinated | \n", "people_fully_vaccinated | \n", "daily_vaccinations_raw | \n", "daily_vaccinations | \n", "total_vaccinations_per_hundred | \n", "people_vaccinated_per_hundred | \n", "people_fully_vaccinated_per_hundred | \n", "daily_vaccinations_per_million | \n", "vaccines | \n", "source_name | \n", "source_website | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "7337 | \n", "7337 | \n", "7337 | \n", "4.552000e+03 | \n", "4.053000e+03 | \n", "2.749000e+03 | \n", "3.830000e+03 | \n", "7.150000e+03 | \n", "4552.000000 | \n", "4053.000000 | \n", "2749.000000 | \n", "7150.000000 | \n", "7337 | \n", "7337 | \n", "7337 | \n", "
unique | \n", "150 | \n", "150 | \n", "97 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "25 | \n", "91 | \n", "145 | \n", "
top | \n", "Canada | \n", "GBR | \n", "2021-03-09 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Moderna, Oxford/AstraZeneca, Pfizer/BioNTech | \n", "Ministry of Health | \n", "https://coronavirus.data.gov.uk/details/health... | \n", "
freq | \n", "96 | \n", "96 | \n", "129 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1798 | \n", "2329 | \n", "480 | \n", "
mean | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2.361121e+06 | \n", "1.918598e+06 | \n", "7.999520e+05 | \n", "8.744129e+04 | \n", "5.825144e+04 | \n", "9.398541 | \n", "7.237774 | \n", "3.361342 | \n", "2675.625594 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
std | \n", "NaN | \n", "NaN | \n", "NaN | \n", "8.421579e+06 | \n", "6.249484e+06 | \n", "3.230805e+06 | \n", "2.693155e+05 | \n", "1.992295e+05 | \n", "16.995766 | \n", "11.614673 | \n", "7.262965 | \n", "4229.243670 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
min | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000e+00 | \n", "0.000000e+00 | \n", "1.000000e+00 | \n", "0.000000e+00 | \n", "1.000000e+00 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
25% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "3.741475e+04 | \n", "3.457400e+04 | \n", "1.799500e+04 | \n", "2.732000e+03 | \n", "9.882500e+02 | \n", "0.717500 | \n", "0.720000 | \n", "0.370000 | \n", "355.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
50% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2.536690e+05 | \n", "2.334230e+05 | \n", "9.966600e+04 | \n", "1.365700e+04 | \n", "5.952500e+03 | \n", "3.465000 | \n", "3.050000 | \n", "1.360000 | \n", "1247.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
75% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.195748e+06 | \n", "9.467810e+05 | \n", "4.625030e+05 | \n", "5.718200e+04 | \n", "2.680500e+04 | \n", "10.080000 | \n", "7.890000 | \n", "3.000000 | \n", "3026.750000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
max | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.183138e+08 | \n", "7.723006e+07 | \n", "4.193463e+07 | \n", "4.575496e+06 | \n", "2.541597e+06 | \n", "151.860000 | \n", "88.790000 | \n", "63.070000 | \n", "54264.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
\n", " | country | \n", "iso_code | \n", "date | \n", "total_vaccinations | \n", "people_vaccinated | \n", "people_fully_vaccinated | \n", "daily_vaccinations_raw | \n", "daily_vaccinations | \n", "total_vaccinations_per_hundred | \n", "people_vaccinated_per_hundred | \n", "people_fully_vaccinated_per_hundred | \n", "daily_vaccinations_per_million | \n", "vaccines | \n", "source_name | \n", "source_website | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Afghanistan | \n", "AFG | \n", "2021-02-22 | \n", "0.0 | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "Oxford/AstraZeneca | \n", "Government of Afghanistan | \n", "http://www.xinhuanet.com/english/asiapacific/2... | \n", "
1 | \n", "Afghanistan | \n", "AFG | \n", "2021-02-23 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000537 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000645 | \n", "Oxford/AstraZeneca | \n", "Government of Afghanistan | \n", "http://www.xinhuanet.com/english/asiapacific/2... | \n", "
2 | \n", "Afghanistan | \n", "AFG | \n", "2021-02-24 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000537 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000645 | \n", "Oxford/AstraZeneca | \n", "Government of Afghanistan | \n", "http://www.xinhuanet.com/english/asiapacific/2... | \n", "
3 | \n", "Afghanistan | \n", "AFG | \n", "2021-02-25 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000537 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000645 | \n", "Oxford/AstraZeneca | \n", "Government of Afghanistan | \n", "http://www.xinhuanet.com/english/asiapacific/2... | \n", "
4 | \n", "Afghanistan | \n", "AFG | \n", "2021-02-26 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000537 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000645 | \n", "Oxford/AstraZeneca | \n", "Government of Afghanistan | \n", "http://www.xinhuanet.com/english/asiapacific/2... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
7332 | \n", "Zimbabwe | \n", "ZWE | \n", "2021-03-15 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Sinopharm/Beijing | \n", "Ministry of Health | \n", "https://twitter.com/MoHCCZim/status/1373023610... | \n", "
7333 | \n", "Zimbabwe | \n", "ZWE | \n", "2021-03-16 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Sinopharm/Beijing | \n", "Ministry of Health | \n", "https://twitter.com/MoHCCZim/status/1373023610... | \n", "
7334 | \n", "Zimbabwe | \n", "ZWE | \n", "2021-03-17 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Sinopharm/Beijing | \n", "Ministry of Health | \n", "https://twitter.com/MoHCCZim/status/1373023610... | \n", "
7335 | \n", "Zimbabwe | \n", "ZWE | \n", "2021-03-18 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Sinopharm/Beijing | \n", "Ministry of Health | \n", "https://twitter.com/MoHCCZim/status/1373023610... | \n", "
7336 | \n", "Zimbabwe | \n", "ZWE | \n", "2021-03-19 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Sinopharm/Beijing | \n", "Ministry of Health | \n", "https://twitter.com/MoHCCZim/status/1373023610... | \n", "
7337 rows × 15 columns
\n", "\n", " | country | \n", "iso_code | \n", "date | \n", "total_vaccinations | \n", "people_vaccinated | \n", "people_fully_vaccinated | \n", "daily_vaccinations_raw | \n", "daily_vaccinations | \n", "total_vaccinations_per_hundred | \n", "people_vaccinated_per_hundred | \n", "people_fully_vaccinated_per_hundred | \n", "daily_vaccinations_per_million | \n", "vaccines | \n", "source_name | \n", "source_website | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
46 | \n", "Albania | \n", "ALB | \n", "2021-02-02 | \n", "0.000027 | \n", "0.000033 | \n", "0.000015 | \n", "0.000295 | \n", "0.000100 | \n", "0.000751 | \n", "0.000906 | \n", "0.000344 | \n", "0.001622 | \n", "Pfizer/BioNTech | \n", "Ministry of Health | \n", "https://shendetesia.gov.al/covid19-ministria-e... | \n", "
234 | \n", "Antigua and Barbuda | \n", "ATG | \n", "2021-03-13 | \n", "0.002351 | \n", "0.003385 | \n", "0.000409 | \n", "0.003888 | \n", "0.004605 | \n", "0.004030 | \n", "0.006229 | \n", "0.000688 | \n", "0.004773 | \n", "Oxford/AstraZeneca | \n", "Ministry of Health | \n", "https://www.facebook.com/investingforwellness/... | \n", "
235 | \n", "Antigua and Barbuda | \n", "ATG | \n", "2021-03-14 | \n", "0.002474 | \n", "0.003454 | \n", "0.000629 | \n", "0.003033 | \n", "0.004431 | \n", "0.004235 | \n", "0.006342 | \n", "0.001033 | \n", "0.004589 | \n", "Oxford/AstraZeneca | \n", "Ministry of Health | \n", "https://www.facebook.com/investingforwellness/... | \n", "
236 | \n", "Antigua and Barbuda | \n", "ATG | \n", "2021-03-15 | \n", "0.002548 | \n", "0.003514 | \n", "0.000730 | \n", "0.001849 | \n", "0.004376 | \n", "0.004371 | \n", "0.006455 | \n", "0.001033 | \n", "0.004533 | \n", "Oxford/AstraZeneca | \n", "Ministry of Health | \n", "https://www.facebook.com/investingforwellness/... | \n", "
237 | \n", "Argentina | \n", "ARG | \n", "2020-12-29 | \n", "0.002583 | \n", "0.003530 | \n", "0.000800 | \n", "0.000865 | \n", "0.004069 | \n", "0.004440 | \n", "0.006569 | \n", "0.001205 | \n", "0.004220 | \n", "Oxford/AstraZeneca, Sinopharm/Beijing, Sputnik V | \n", "Ministry of Health | \n", "http://datos.salud.gob.ar/dataset/vacunas-cont... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
6965 | \n", "United Arab Emirates | \n", "ARE | \n", "2021-03-10 | \n", "0.011805 | \n", "0.014719 | \n", "0.006252 | \n", "0.008788 | \n", "0.010273 | \n", "0.289051 | \n", "0.389468 | \n", "0.136465 | \n", "0.152606 | \n", "Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm... | \n", "National Emergency Crisis and Disaster Managem... | \n", "http://covid19.ncema.gov.ae/en | \n", "
6966 | \n", "United Arab Emirates | \n", "ARE | \n", "2021-03-11 | \n", "0.012128 | \n", "0.015115 | \n", "0.006437 | \n", "0.007986 | \n", "0.011229 | \n", "0.296974 | \n", "0.400000 | \n", "0.140423 | \n", "0.166814 | \n", "Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm... | \n", "National Emergency Crisis and Disaster Managem... | \n", "http://covid19.ncema.gov.ae/en | \n", "
6967 | \n", "United Arab Emirates | \n", "ARE | \n", "2021-03-12 | \n", "0.012272 | \n", "0.015243 | \n", "0.006608 | \n", "0.003560 | \n", "0.011531 | \n", "0.300526 | \n", "0.403398 | \n", "0.144209 | \n", "0.171292 | \n", "Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm... | \n", "National Emergency Crisis and Disaster Managem... | \n", "http://covid19.ncema.gov.ae/en | \n", "
6968 | \n", "United Arab Emirates | \n", "ARE | \n", "2021-03-13 | \n", "0.012499 | \n", "0.015473 | \n", "0.006826 | \n", "0.005609 | \n", "0.011996 | \n", "0.306058 | \n", "0.409400 | \n", "0.149028 | \n", "0.178221 | \n", "Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm... | \n", "National Emergency Crisis and Disaster Managem... | \n", "http://covid19.ncema.gov.ae/en | \n", "
6969 | \n", "United Arab Emirates | \n", "ARE | \n", "2021-03-14 | \n", "0.012796 | \n", "0.015709 | \n", "0.007232 | \n", "0.007341 | \n", "0.012412 | \n", "0.313367 | \n", "0.415629 | \n", "0.157804 | \n", "0.184395 | \n", "Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm... | \n", "National Emergency Crisis and Disaster Managem... | \n", "http://covid19.ncema.gov.ae/en | \n", "
2367 rows × 15 columns
\n", "