ium_434784/UM1.ipynb

846 lines
265 KiB
Plaintext
Raw Normal View History

2021-03-21 20:28:34 +01:00
{
2021-03-21 20:43:50 +01:00
"cells": [
{
"cell_type": "code",
2021-03-22 00:03:37 +01:00
"execution_count": 27,
"id": "decreased-eight",
2021-03-21 20:43:50 +01:00
"metadata": {},
2021-03-21 20:44:09 +01:00
"outputs": [
{
2021-03-22 00:03:37 +01:00
"name": "stdout",
2021-03-21 20:44:09 +01:00
"output_type": "stream",
"text": [
2021-03-22 00:03:37 +01:00
"Requirement already satisfied: kaggle in /home/maciej/.local/lib/python3.8/site-packages (1.5.12)\n",
"Requirement already satisfied: certifi in /usr/lib/python3/dist-packages (from kaggle) (2019.11.28)\n",
"Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kaggle) (1.14.0)\n",
"Requirement already satisfied: requests in /usr/lib/python3/dist-packages (from kaggle) (2.22.0)\n",
"Requirement already satisfied: tqdm in /home/maciej/.local/lib/python3.8/site-packages (from kaggle) (4.59.0)\n",
"Requirement already satisfied: urllib3 in /usr/lib/python3/dist-packages (from kaggle) (1.25.8)\n",
"Requirement already satisfied: python-dateutil in /usr/lib/python3/dist-packages (from kaggle) (2.7.3)\n",
"Requirement already satisfied: python-slugify in /home/maciej/.local/lib/python3.8/site-packages (from kaggle) (4.0.1)\n",
"Requirement already satisfied: text-unidecode>=1.3 in /home/maciej/.local/lib/python3.8/site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: pandas in /home/maciej/.local/lib/python3.8/site-packages (1.2.3)\n",
"Requirement already satisfied: numpy>=1.16.5 in /home/maciej/.local/lib/python3.8/site-packages (from pandas) (1.20.1)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/lib/python3/dist-packages (from pandas) (2019.3)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/lib/python3/dist-packages (from pandas) (2.7.3)\n",
"Requirement already satisfied: sklearn in /home/maciej/.local/lib/python3.8/site-packages (0.0)\n",
"Requirement already satisfied: scikit-learn in /home/maciej/.local/lib/python3.8/site-packages (from sklearn) (0.24.1)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /home/maciej/.local/lib/python3.8/site-packages (from scikit-learn->sklearn) (2.1.0)\n",
"Requirement already satisfied: joblib>=0.11 in /home/maciej/.local/lib/python3.8/site-packages (from scikit-learn->sklearn) (1.0.1)\n",
"Requirement already satisfied: numpy>=1.13.3 in /home/maciej/.local/lib/python3.8/site-packages (from scikit-learn->sklearn) (1.20.1)\n",
"Requirement already satisfied: scipy>=0.19.1 in /home/maciej/.local/lib/python3.8/site-packages (from scikit-learn->sklearn) (1.6.1)\n",
"Collecting matplotlib\n",
" Downloading matplotlib-3.3.4-cp38-cp38-manylinux1_x86_64.whl (11.6 MB)\n",
"\u001b[K |████████████████████████████████| 11.6 MB 39 kB/s eta 0:00:01 |██████████▏ | 3.7 MB 5.2 MB/s eta 0:00:02 |██████████████▏ | 5.1 MB 5.2 MB/s eta 0:00:02 |██████████████▋ | 5.3 MB 5.2 MB/s eta 0:00:02 |████████████████████████▏ | 8.7 MB 2.1 MB/s eta 0:00:02 |████████████████████████████ | 10.1 MB 2.1 MB/s eta 0:00:01\n",
"\u001b[?25hCollecting cycler>=0.10\n",
" Downloading cycler-0.10.0-py2.py3-none-any.whl (6.5 kB)\n",
"Requirement already satisfied: python-dateutil>=2.1 in /usr/lib/python3/dist-packages (from matplotlib) (2.7.3)\n",
"Collecting kiwisolver>=1.0.1\n",
" Downloading kiwisolver-1.3.1-cp38-cp38-manylinux1_x86_64.whl (1.2 MB)\n",
"\u001b[K |████████████████████████████████| 1.2 MB 9.9 MB/s eta 0:00:01\n",
"\u001b[?25hRequirement already satisfied: pillow>=6.2.0 in /usr/lib/python3/dist-packages (from matplotlib) (7.0.0)\n",
"Requirement already satisfied: numpy>=1.15 in /home/maciej/.local/lib/python3.8/site-packages (from matplotlib) (1.20.1)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /home/maciej/.local/lib/python3.8/site-packages (from matplotlib) (2.4.7)\n",
"Requirement already satisfied: six in /usr/lib/python3/dist-packages (from cycler>=0.10->matplotlib) (1.14.0)\n",
"Installing collected packages: cycler, kiwisolver, matplotlib\n",
"Successfully installed cycler-0.10.0 kiwisolver-1.3.1 matplotlib-3.3.4\n"
]
}
],
"source": [
"import sys\n",
"!{sys.executable} -m pip install kaggle\n",
"!{sys.executable} -m pip install pandas\n",
"!{sys.executable} -m pip install sklearn\n",
"!{sys.executable} -m pip install matplotlib"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "sharp-september",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading who-suicide-statistics.zip to /home/maciej/Desktop/INZ/ium_434784\r\n",
"\r",
" 0%| | 0.00/304k [00:00<?, ?B/s]\r\n",
"\r",
"100%|████████████████████████████████████████| 304k/304k [00:00<00:00, 17.9MB/s]\r\n"
]
}
],
"source": [
"# Zadanie 1\n",
"!kaggle datasets download -d szamil/who-suicide-statistics"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "different-stack",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>year</th>\n",
" <th>sex</th>\n",
" <th>age</th>\n",
" <th>suicides_no</th>\n",
" <th>population</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Albania</td>\n",
" <td>1985</td>\n",
" <td>female</td>\n",
" <td>15-24 years</td>\n",
" <td>NaN</td>\n",
" <td>277900.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Albania</td>\n",
" <td>1985</td>\n",
" <td>female</td>\n",
" <td>25-34 years</td>\n",
" <td>NaN</td>\n",
" <td>246800.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Albania</td>\n",
" <td>1985</td>\n",
" <td>female</td>\n",
" <td>35-54 years</td>\n",
" <td>NaN</td>\n",
" <td>267500.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Albania</td>\n",
" <td>1985</td>\n",
" <td>female</td>\n",
" <td>5-14 years</td>\n",
" <td>NaN</td>\n",
" <td>298300.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Albania</td>\n",
" <td>1985</td>\n",
" <td>female</td>\n",
" <td>55-74 years</td>\n",
" <td>NaN</td>\n",
" <td>138700.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43771</th>\n",
" <td>Zimbabwe</td>\n",
" <td>1990</td>\n",
" <td>male</td>\n",
" <td>25-34 years</td>\n",
" <td>150.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43772</th>\n",
" <td>Zimbabwe</td>\n",
" <td>1990</td>\n",
" <td>male</td>\n",
" <td>35-54 years</td>\n",
" <td>132.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43773</th>\n",
" <td>Zimbabwe</td>\n",
" <td>1990</td>\n",
" <td>male</td>\n",
" <td>5-14 years</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43774</th>\n",
" <td>Zimbabwe</td>\n",
" <td>1990</td>\n",
" <td>male</td>\n",
" <td>55-74 years</td>\n",
" <td>74.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43775</th>\n",
" <td>Zimbabwe</td>\n",
" <td>1990</td>\n",
" <td>male</td>\n",
" <td>75+ years</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>43776 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" country year sex age suicides_no population\n",
"0 Albania 1985 female 15-24 years NaN 277900.0\n",
"1 Albania 1985 female 25-34 years NaN 246800.0\n",
"2 Albania 1985 female 35-54 years NaN 267500.0\n",
"3 Albania 1985 female 5-14 years NaN 298300.0\n",
"4 Albania 1985 female 55-74 years NaN 138700.0\n",
"... ... ... ... ... ... ...\n",
"43771 Zimbabwe 1990 male 25-34 years 150.0 NaN\n",
"43772 Zimbabwe 1990 male 35-54 years 132.0 NaN\n",
"43773 Zimbabwe 1990 male 5-14 years 6.0 NaN\n",
"43774 Zimbabwe 1990 male 55-74 years 74.0 NaN\n",
"43775 Zimbabwe 1990 male 75+ years 13.0 NaN\n",
"\n",
"[43776 rows x 6 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"sc = pd.read_csv('who_suicide_statistics.csv')\n",
"sc"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "sonic-reduction",
"metadata": {},
"outputs": [],
"source": [
"# Zadanie 2\n",
"# Podzial na 3 podzbiory w proporcji 6:2:2\n",
"import numpy as np\n",
"train, validate, test = np.split(sc.sample(frac=1, random_state=42),\n",
" [int(.6*len(sc)), int(.8*len(sc))])\n"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "nuclear-bandwidth",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train set: 157590\n",
"Validate set: 52530\n",
"Test set: 52536\n",
" country year sex age \\\n",
"count 26265 26265.000000 26265 26265 \n",
"unique 141 NaN 2 6 \n",
"top United States of America NaN female 55-74 years \n",
"freq 298 NaN 13170 4420 \n",
"mean NaN 1998.562688 NaN NaN \n",
"std NaN 10.310004 NaN NaN \n",
"min NaN 1979.000000 NaN NaN \n",
"25% NaN 1990.000000 NaN NaN \n",
"50% NaN 1999.000000 NaN NaN \n",
"75% NaN 2007.000000 NaN NaN \n",
"max NaN 2016.000000 NaN NaN \n",
"\n",
" suicides_no population \n",
"count 24919.000000 2.298300e+04 \n",
"unique NaN NaN \n",
"top NaN NaN \n",
"freq NaN NaN \n",
"mean 194.504113 1.684849e+06 \n",
"std 789.159429 3.667651e+06 \n",
"min 0.000000 2.780000e+02 \n",
"25% 1.000000 8.678000e+04 \n",
"50% 14.000000 3.861960e+05 \n",
"75% 93.000000 1.333594e+06 \n",
"max 22338.000000 4.380521e+07 \n",
"United States of America 298\n",
"Sweden 292\n",
"Romania 292\n",
"Hungary 289\n",
"Iceland 283\n",
" ... \n",
"Cabo Verde 10\n",
"Iraq 9\n",
"Monaco 9\n",
"Macau 8\n",
"Zimbabwe 6\n",
"Name: country, Length: 141, dtype: int64\n"
]
}
],
"source": [
"# Zadanie 3\n",
"import matplotlib.pyplot as plt\n",
"print(\"Train set: \", train.size)\n",
"print(\"Validate set: \", validate.size)\n",
"print(\"Test set: \", test.size)\n",
"print(train.describe(include='all'))\n",
"print(train.country.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "thermal-proposal",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" country year sex age suicides_no population\n",
"count 8755 8755.000000 8755 8755 8299.000000 7.707000e+03\n",
"unique 141 NaN 2 6 NaN NaN\n",
"top Mauritius NaN male 5-14 years NaN NaN\n",
"freq 108 NaN 4461 1506 NaN NaN\n",
"mean NaN 1998.390520 NaN NaN 197.230389 1.640237e+06\n",
"std NaN 10.441815 NaN NaN 880.620233 3.628585e+06\n",
"min NaN 1979.000000 NaN NaN 0.000000 2.590000e+02\n",
"25% NaN 1989.000000 NaN NaN 1.000000 8.303000e+04\n",
"50% NaN 1999.000000 NaN NaN 13.000000 3.798980e+05\n",
"75% NaN 2007.500000 NaN NaN 90.000000 1.307090e+06\n",
"max NaN 2016.000000 NaN NaN 21706.000000 4.324090e+07\n",
"Mauritius 108\n",
"Hong Kong SAR 106\n",
"United Kingdom 106\n",
"Russian Federation 103\n",
"Belgium 103\n",
" ... \n",
"Tunisia 3\n",
"Iran (Islamic Rep of) 3\n",
"Macau 2\n",
"Iraq 2\n",
"Cabo Verde 1\n",
"Name: country, Length: 141, dtype: int64\n"
]
}
],
"source": [
"# Zadanie 3\n",
"print(validate.describe(include='all'))\n",
"print(validate.country.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "average-climb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" country year sex age suicides_no population\n",
"count 8756 8756.000000 8756 8756 8302.000000 7.626000e+03\n",
"unique 141 NaN 2 6 NaN NaN\n",
"top Lithuania NaN female 75+ years NaN NaN\n",
"freq 102 NaN 4424 1501 NaN NaN\n",
"mean NaN 1998.433760 NaN NaN 185.833775 1.625640e+06\n",
"std NaN 10.320908 NaN NaN 749.047182 3.604071e+06\n",
"min NaN 1979.000000 NaN NaN 0.000000 2.790000e+02\n",
"25% NaN 1990.000000 NaN NaN 1.000000 8.113700e+04\n",
"50% NaN 1999.000000 NaN NaN 13.000000 3.660465e+05\n",
"75% NaN 2007.000000 NaN NaN 87.000000 1.241382e+06\n",
"max NaN 2016.000000 NaN NaN 17355.000000 4.299788e+07\n",
"Lithuania 102\n",
"Denmark 102\n",
"Israel 100\n",
"Luxembourg 100\n",
"Ireland 99\n",
" ... \n",
"Saudi Arabia 3\n",
"Zimbabwe 2\n",
"Macau 2\n",
"Cabo Verde 1\n",
"Iraq 1\n",
"Name: country, Length: 141, dtype: int64\n"
]
}
],
"source": [
"# Zadanie 3\n",
"print(test.describe(include='all'))\n",
"print(test.country.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "comparable-company",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAHQCAYAAACSr3/KAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAADr+klEQVR4nOydd7geRfXHPyeFBEKHgPSA9N47hqLSpIN0sCAgSBEbitJEQFEQUFSkSFOK9CqIodf00AlJ6CVAIIWacH5/fM9k933ve5Mb4P4S4HyeZ597d9/d2dkpZ86cOTNj7k6SJEmSJEny6dJlekcgSZIkSZLk80gqWUmSJEmSJJ1AKllJkiRJkiSdQCpZSZIkSZIknUAqWUmSJEmSJJ1AKllJkiRJkiSdQLfpHQGAeeed1/v06TO9o5EkSZIkSTJVBgwY8Lq7957afTOEktWnTx/69+8/vaORJEmSJEkyVczs2Y7cl8OFSZIkSZIknUAqWUmSJEmSJJ1AKllJkiRJkiSdQCpZSZIkSZIknUAqWUmSJEmSJJ3AVJUsM+tpZg+Z2RAze9TMjovri5vZg2Y23MwuM7OZ4nqPOB8ev/fp5G9IkiRJkiSZ4eiIJet9YFN3XwVYFdjCzNYFfguc5u5LAmOA78b93wXGxPXT4r4kSZIkSZIvFFNVslyMj9PucTiwKfDvuH4BsH38v12cE79vZmb2aUU4SZIkSZLks0CHfLLMrKuZDQZeA24DngHecveJccsLwELx/0LA8wDx+9vAPJ9inJMkSZIkSWZ4OqRkufskd18VWBhYG1j2k77YzPY3s/5m1n/06NGfNLgkSZIkSZIZimmaXejubwH9gPWAOc2sbMuzMPBi/P8isAhA/D4H8EaLsM529zXdfc3evae6/U+SJEmSJMlnio7MLuxtZnPG/zMDXwMeR8rWznHbvsC18f91cU78/j9396m9p8+RN9LnyBunKfJJkiRJkiQzKh3ZIHoB4AIz64qUssvd/QYzewy41MxOAAYB58b95wIXmdlw4E1gt06Id5IkSZIkyQzNVJUsdx8KrNbi+gjkn9V8/T1gl08ldkmSJEmSJJ9RcsX3JEmSJEmSTiCVrCRJkiRJkk5ghlWymh3h0zE+SZIkSZLPEjOskpUkSZIkSfJZJpWsJEmSJEmSTuAzq2Tl8GGSJEmSJDMyn1klK0mSJEmSZEYmlawkSZIkSZJOIJWsJEmSJEmSTiCVrCRJkiRJkk7gc6NkpSN8kiRJkiQzEp8bJauZVLqSJEmSJJmefG6VrCRJkiRJkulJKllJkiRJkiSdwBdGyZraXog5vJgkSZIkyafJF0bJmlZS6UqSJEmS5JOQSlYHSaUrSZIkSZJpIZWsj0kONyZJkiRJMiVSyUqSJEmSJOkEUslKkiRJkiTpBFLJ6iRyNmOSJEmSfLFJJWsGIRWwJEmSJPl8kUpWkiRJkiRJJ5BKVpIkSZIkSSeQStZngPTnSpIkSZLPHqlkfQ5IpStJkiRJZjxSyfockpavJEmSJJn+pJL1BWRal5dIpS1JkiRJpp1UspJPTCpdSZIkSdKWVLKST520fCVJkiRJKlnJdCCVriRJkuSLQCpZyXQnla4kSZLk80gqWckMRypdSZIkyeeBVLKSGZ5UupIkSZLPIqlkJZ850rE+SZIk+SwwVSXLzBYxs35m9piZPWpmh8X1Y83sRTMbHMdWtWd+bmbDzexJM9u8Mz8gSZrJdb6SJEmSGYFuHbhnIvAjdx9oZrMBA8zstvjtNHf/ff1mM1se2A1YAVgQ+K+ZLe3ukz7NiCfJp0FRtkadvHWHzpMkSZKko0zVkuXuL7v7wPh/HPA4sNAUHtkOuNTd33f3kcBwYO1PI7JJkiRJkiSfFabJJ8vM+gCrAQ/GpR+Y2VAzO8/M5oprCwHP1x57gSkrZUmSJEmSJJ87OqxkmdmswJXA4e4+FvgL8GVgVeBl4A/T8mIz29/M+ptZ/9GjR0/Lo0ky3Uh/riRJkqSjdEjJMrPuSMG6xN2vAnD3V919krt/BPydakjwRWCR2uMLx7UG3P1sd1/T3dfs3bv3J/mGJEmSJEmSGY6OzC404FzgcXc/tXZ9gdptOwCPxP/XAbuZWQ8zWxxYCnjo04tyksw4pGUrSZIkaY+OzC7cANgbGGZmg+PaL4DdzWxVwIFRwAEA7v6omV0OPIZmJh6cMwuTLwo5GzFJkiQpTFXJcvd7AGvx001TeOY3wG8+QbyS5HNBKl1JkiRfXHLF9yT5fyQXRk2SJPnikEpWksxApBKWJEny+SGVrCRJkiRJkk4glawk+QwxrfsypiUsSZJk+pFKVpJ8gUilLEmS5P+PVLKSJEmSJEk6gVSykiRpl7RsJUmSfHxSyUqSpMOk0pUkSdJxUslKkiRJkiTpBFLJSpLkY5OWrSRJkvZJJStJkk+NVLqSJEkqUslKkqTTSKUrSZIvMqlkJUny/0YupJokyReJVLKSJEmSJEk6gVSykiRJkiRJOoFUspIk+UyQw4tJknzWSCUrSZLPJamEJUkyvUklK0mSJEmSpBNIJStJkiRJkqQTSCUrSZIkSZKkE0glK0mSJEmSpBNIJStJki8k6QifJElnk0pWkiRJkiRJJ5BKVpIkCWnZSpLk0yeVrCRJkhbkOltJknxSUslKkiRJkiTpBFLJSpIk+RikZStJkqmRSlaSJMmnQCpdSZI0k0pWkiRJJ5BKV5IkqWQlSZIkSZJ0AqlkJUmSJEmSdAKpZCVJkiRJknQCqWQlSZIkSZJ0AqlkJUmSJEmSdAJTVbLMbBEz62dmj5nZo2Z2WFyf28xuM7On4+9ccd3M7AwzG25mQ81s9c7+iCRJkhmdnG2YJF88OmLJmgj8yN2XB9YFDjaz5YEjgdvdfSng9jgH2BJYKo79gb986rFOkiT5jJNKV5J8/pmqkuXuL7v7wPh/HPA4sBCwHXBB3HYBsH38vx1woYsHgDnNbIFPO+JJkiSfJ1LpSpLPH9Pkk2VmfYDVgAeB+d395fjpFWD++H8h4PnaYy/EtSRJkiRJki8MHVayzGxW4ErgcHcfW//N3R3waXmxme1vZv3NrP/o0aOn5dEkSZIkSZIZng4pWWbWHSlYl7j7VXH51TIMGH9fi+svAovUHl84rjXg7me7+5ruvmbv3r0/bvyTJEk+l+TwYZJ89unI7EIDzgUed/dTaz9dB+wb/+8LXFu7vk/MMlwXeLs2rJgkSZJ8DJqVrlTCkmTGp1sH7tkA2BsYZmaD49ovgJOBy83su8CzwDfjt5uArYDhwDvAtz/NCCdJkiRJknwWmKqS5e73ANbOz5u1uN+Bgz9hvJIkSZIkST7T5IrvSZIkSZIknUAqWUmSJEmSJJ1AKllJkiRJkiSdQCpZSZIkSZIknUAqWUmSJEmSJJ1AKllJkiSfA3LdrCSZ8UglK0mS5HNIKl1JMv1JJStJkuRzTipcSTJ9SCUrSZIkSZKkE0glK0mSJEmSpBNIJStJkiRJkqQTSCUrSZIkSZKkE0glK0mSJEmSpBNIJStJkuQLRvNsw6mdJ0ny8UglK0mSJEmSpBNIJStJkiSZImnZSpKPRypZSZIkSZIknUAqWUmSJEmSJJ1AKllJkiRJkiSdQCpZSZIkSZIknUAqWUmSJMk0kY7wSdIxUslKkiRJkiTpBFLJSpIkSZIk6QRSyUqSJEmSJOkEUslKkiRJkiTpBFLJSpIkSZIk6QRSyUqSJEmSJOkEUslKkiRJkiTpBFLJSpIkSZIk6QRSyUqSJEmSJOkEUslKkiRJkiTpBFLJSpIkSZIk6QRSyUqSJEmSJOkEUslKkiRJkiTpBFLJSpIkSZIk6QSmqmSZ2Xlm9pqZPVK7dqyZvWhmg+PYqvbbz81suJk9aWabd1bEkyRJkiRJZmQ6Ysn6B7BFi+unufuqcdwEYGbLA7sBK8QzZ5lZ108rskmSJMmMR58jb6TPkTdO72gkyQzHVJUsd78LeLOD4W0HXOru77v7SGA4sPYniF+SJEmSJMlnkk/ik/UDMxsaw4lzxbWFgOdr97wQ15IkSZIvCM2WrbR0JV9UPq6S9Rfgy8CqwMvAH6Y
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pd.value_counts(train['country']).plot.bar()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "numerous-truck",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAHQCAYAAACSr3/KAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAADo10lEQVR4nOydefydw/XH35NFNiJC7CR2VbUEtVZsVVpVa+0UpX6tViktbVG6UF1sRWmJfd/3LWJfs0mQIBL7FlskEZHI+f3xOZNn7v3ebxKSWwnn/Xo9r3ufe59nnnlmOXPmzJmZZGYEQRAEQRAEs5c2X3QEgiAIgiAIvoyEkhUEQRAEQdAEQskKgiAIgiBoAqFkBUEQBEEQNIFQsoIgCIIgCJpAKFlBEARBEARNoN0XHQGAhRZayHr16vVFRyMIgiAIgmCGDBw48B0z6zGj6+YIJatXr14MGDDgi45GEARBEATBDEkpvTQz18VwYRAEQRAEQRMIJSsIgiAIgqAJhJIVBEEQBEHQBELJCoIgCIIgaAKhZAVBEARBEDSBULKCIAiCIAiaQChZQRAEQRAETSCUrCAIgiAIgiYQSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEARB0ATmGCWr15G30OvIW77oaARBEARBEMwW5hglKwiCIAiC4MtEKFlBEARBEARNIJSsIAiCIAiCJhBKVhAEQRAEQROYY5Wsekf4cIwPgiAIgmBuYo5VsoIgCIIgCOZmQskKgiAIgiBoAqFkBUEQBEEQNIFQsoIgCIIgCJpAKFlBEARBEARNIJSsIAiCIAiCJjDXKlmxxEMQBEEQBHMyc62SFQRBEARBMCcTSlYQBEEQBEETmKGSlVI6L6X0dkrpqeK37imlu1JKz/vnAv57SimdllIamVIamlLq3czIB0EQBEEQzKnMjCXrfGCrut+OBPqZ2QpAPz8H2BpYwY8DgbNmTzSDIAiCIAjmLmaoZJnZ/cB7dT//ALjAv18AbFf8fqGJR4FuKaXFZlNcPxPhCB8EQRAEwRfJ5/XJWsTM3vDvbwKL+PclgFeK617134IgCIIgCL5SzLLju5kZYJ/1vpTSgSmlASmlAWPGjJnVaMyQWPIhCIIgCIL/JZ9XyXorDwP659v++2vAUsV1S/pvLTCzc8xsbTNbu0ePHp8zGkEQBEEQBHMmn1fJuhHYx7/vA9xQ/L63zzJcDxhbDCsGQRAEQRB8ZWg3owtSSpcBmwALpZReBY4FTgSuTCntD7wE/NAvvxX4LjAS+AjYtwlxbgp56PDFE7/3BcckCIIgCIIvAzNUssxst1b+2rzBtQb8bFYjFQRBEARBMLcTK763QjjGB0EQBEEwK4SSFQRBEARB0ARCyZpJYgmIIAiCIAg+C6FkBUEQBEEQNIFQsoIgCIIgCJpAKFmziRg+DIIgCIKgJJSsIAiCIAiCJhBKVpMIq1YQBEEQfLUJJSsIgiAIgqAJhJIVBEEQBEHQBELJ+h8Qa2wFQRAEwVePULKCIAiCIAiaQChZcwBh6QqCIAiCLx+hZAVBEARBEDSBULLmAsLSFQRBEARzH6FkBUEQBEEQNIFQsoIgCIIgCJpAKFlfAmY0nBjDi0EQBEHwvyeUrCAIgiAIgiYQStZXkLBsBUEQBEHzCSUrCIIgCIKgCYSSFQRBEARB0ARCyQrCUT4IgiAImkAoWUEQBEEQBE0glKxghnzWJSLCEhYEQRAEoWQFQRAEQRA0hVCygqYTlq8gCILgq0goWUEQBEEQBE0glKwgCIIgCIImEEpWMMcRw4lBEATBl4FQsoIgCIIgCJpAKFnBXEdYuoIgCIK5gVCygiAIgiAImkAoWUEQBEEQBE0glKzgS0WsTh8EQRDMKYSSFQRBEARB0ARCyQqCgrB8BUEQBLOLULKCIAiCIAiaQChZQTALhOUrCIIgaI1ZUrJSSoemlJ5OKT2VUrospdQxpbRMSumxlNLIlNIVKaV5ZldkgyAIgiAI5hY+t5KVUloC+AWwtpmtCrQFdgX+CpxsZssD7wP7z46IBkEQBEEQzE3M6nBhO6BTSqkd0Bl4A9gMuNr/vwDYbhafEQRfGmJ4MQiC4KvD51ayzOw14O/Ay0i5GgsMBD4wsyl+2avAErMaySAIgiAIgrmNWRkuXAD4AbAMsDjQBdjqM9x/YEppQEppwJgxYz5vNILgS01YtoIgCOZeZmW4cAtgtJmNMbPJwLXAhkA3Hz4EWBJ4rdHNZnaOma1tZmv36NFjFqIRBEEQBEEw5zErStbLwHoppc4ppQRsDjwD9Ad28mv2AW6YtSgGQRAEQRDMfcyKT9ZjyMF9EDDMwzoH+A1wWEppJLAgcO5siGcQBEEQBMFcRbsZX9I6ZnYscGzdz6OAb85KuEEQBEEQBHM7seJ7EMxFhCN8EATB3EMoWUEQBEEQBE0glKwgmIv5rIubxuKnQRAE/ztCyQqCIAiCIGgCoWQFQRAEQRA0gVCygiCYRgwnBkEQzD5CyQqCIAiCIGgCoWQFQTDTNLJ0BUEQBI0JJSsIgiAIgqAJhJIVBEEQBEHQBELJCoJgthBrdAVBENQSSlYQBEEQBEETCCUrCII5grBsBUHwZSOUrCAIgiAIgiYQSlYQBHMkYdkKgmBuJ5SsIAiCIAiCJhBKVhAEQRAEQRMIJSsIgrmCWBIiCIK5jVCygiAIgiAImkAoWUEQfCmJxVGDIPiiCSUrCIIgCIKgCYSSFQRBEARB0ARCyQqCIGhADCcGQTCrhJIVBEEQBEHQBELJCoIg+ByEZSsIghkRSlYQBEEQBEETCCUrCIJgNhCWrSAI6gklKwiCIAiCoAmEkhUEQRAEQdAEQskKgiBoAjF8GARBKFlBEARBEARNIJSsIAiC/wGxd2IQfPUIJSsIgiAIgqAJhJIVBEEQBEHQBELJCoIgmAOJ4cQgmPsJJSsIgiAIgqAJhJIVBEEQBEHQBELJCoIgCIIgaAKhZAVBEMxlhL9WEMwdzJKSlVLqllK6OqU0IqU0PKW0fkqpe0rprpTS8/65wOyKbBAEQRAEwdzCrFqyTgVuN7OVgdWB4cCRQD8zWwHo5+dBEARBEARfKT63kpVSmh/YGDgXwMw+MbMPgB8AF/hlFwDbzVoUgyAIgiAI5j5mxZK1DDAG6JtSGpxS+m9KqQuwiJm94de8CSwyq5EMgiAIgiCY25gVJasd0Bs4y8zWBCZQNzRoZgZYo5tTSgemlAaklAaMGTNmFqIRBEEQlMQ+iUEwZzArStarwKtm9pifX42UrrdSSosB+OfbjW42s3PMbG0zW7tHjx6zEI0gCIIgCII5j8+tZJnZm8ArKaWV/KfNgWeAG4F9/Ld9gBtmKYZBEARBEARzIe1m8f6fA5eklOYBRgH7IsXtypTS/sBLwA9n8RlBEARBEARzHbOkZJnZEGDtBn9tPivhBkEQBEEQzO3Eiu9BEARBEARNIJSsIAiCIAiCJhBKVhAEwVecWOIhCJpDKFlBEARBEARNIJSsIAiCIAiCJhBKVhAEQRAEQRMIJSsIgiAIgqAJhJIVBEEQTJdwjA+Cz0coWUEQBEEQBE0glKwgCIJglpiRpSssX8FXlVCygiAIgiAImkAoWUEQBEEQBE0glKwgCILgf0oMHwZfFULJCoIgCIIgaAKhZAVBEARBEDSBULKCIAiCIAiaQChZQRAEQRAETSCUrCAIgiAIgiYQSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEARB0ARCyQqCIAiCIGgCoWQFQRAEQRA0gVCygiAIgiAImkAoWUEQBEEQBE0glKwgCIIgCIImEEpWEARB8IXS68hb6HXkLV90NIJgthNKVhAEQRAEQRMIJSsIgiAIgqAJhJIVBEEQzFHE8GHwZSGUrCAIgiAIgiYQSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEMzR1Ptohc9WMLcQSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEARB0ARCyQqCIAiCIGgCoWQFQRAEQRA0gVlWslJKbVNKg1NKN/v5Mimlx1JKI1NKV6SU5pn1aAZBEARBEMxdzA5L1iHA8OL8r8DJZrY
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pd.value_counts(validate['country']).plot.bar()"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "tamil-democrat",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAHQCAYAAACSr3/KAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAADpQklEQVR4nOydd/zf0/XHnzdLJBISYpPYo2bsPauUovaqWaNVVbSo/hSl6DLbUiulNWqPoigxYku+GYgRkdgSBEkIIuf3x+vcvO/nk883iSSfCs7z8fg8Pus97vuOc88599x7k5kRBEEQBEEQzFrafNkJCIIgCIIg+DoSSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEARB0ARCyQqCIAiCIGgCoWQFQRAEQRA0gXZfdgIA5ptvPuvVq9eXnYwgCIIgCIJp0r9//3fMrMe0jpstlKxevXrx1FNPfdnJCIIgCIIgmCYppZHTc1wMFwZBEARBEDSBULKCIAiCIAiaQChZQRAEQRAETSCUrCAIgiAIgiYQSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEARB0ARCyQqCIAiCIGgCoWQFQRAEQRA0gVCygiAIgiAImkAoWUEQBEEQBE1gtlGyeh1/O72Ov73V70EQBEEQBF8lpqlkpZQuSymNSik9XfzWPaV0T0rpRX/v5r+nlNJ5KaVhKaXBKaXezUx8EARBEATB7Mr0eLL+Dmxd99vxwL1mtgxwr38H2AZYxl+HABfMmmQGQRAEQRB8tZimkmVmDwLv1f28A3C5f74c2LH4/QoTjwHzpJQWmkVpDYIgCIIg+MowozFZC5jZm/75LWAB/7wI8Gpx3Gv+WxAEQRAEwTeKmQ58NzMD7Iuel1I6JKX0VErpqdGjR0/z+AiMD4IgCILgq8SMKllv52FAfx/lv78OLFYct6j/NgVmdpGZrWlma/bo0WMGkxEEQRAEQTB7MqNK1q3Afv55P+CW4vd9fZbhusAHxbBiEARBEATBN4Z20zogpXQ1sCkwX0rpNeAk4Ezg2pTSQcBIYDc//A7gu8Aw4CPggCakuSF56HDEmdv+r24ZBEEQBEHQKtNUssxsz1b+2qLBsQYcPrOJCoIgCIIg+Koz26z4PquJwPggCIIgCL5MvrZKVhAEQRAEwZdJKFlBEARBEARN4BujZMU6W0EQBEEQ/C/5xihZQRAEQRAE/0tCyXLC0xUEQRAEwawklKwgCIIgCIImEEpWEARBEARBEwglazqZ1nBiDC8GQRAEQVASSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEARB0ARCyQqCIAiCIGgCoWQ1iQiSD4IgCIJvNqFkBUEQBEEQNIFQsoIgCIIgCJpAKFlBEARBEARNIJSsIAiCIAiCJhBK1mxArCYfBEEQBF8/QskKgiAIgiBoAqFkBUEQBEEQNIFQsr6CfNHhxRhuDIIgCIL/PaFkBUEQBEEQNIFQsr6BhKcrCIIgCJpPKFlBEARBEARNIJSsIAiCIAiCJhBKVjAFEUgfBEEQBDNPKFlBEARBEARNIJSsYKYJz1cQBEEQTEkoWUEQBEEQBE0glKwgCIIgCIImEEpW8D9nZlesj+HHIAiC4KtAKFlBEARBEARNIJSs4CtP6dkKL1cQBEEwuxBKVhAEQRAEQRMIJSsIgiAIgqAJhJIVfK2J4cMgCILgyyKUrCAIgiAIgiYQSlbwjSKWgwiCIAj+V4SSFQRBEARB0ARCyQqCIAiCIGgCoWQFQUGsPh8EQRDMKmZKyUopHZVSeial9HRK6eqUUseU0hIppcdTSsNSSv9KKXWYVYkNgiAIgiD4qjDDSlZKaRHgp8CaZrYS0BbYA/gdcLaZLQ2MAQ6aFQkNgq8Cs3pfxvCcBUEQfHWZ2eHCdsCcKaV2QCfgTWBz4Hr//3Jgx5m8RxAEQRAEwVeOGVayzOx14I/AK0i5+gDoD7xvZhP9sNeARWY2kUEQBEEQBF81Zma4sBuwA7AEsDDQGdj6C5x/SErpqZTSU6NHj57RZARBEARBEMyWzMxw4ZbAy2Y22sw+A24ENgDm8eFDgEWB1xudbGYXmdmaZrZmjx49ZiIZQRAEQRAEsx8zo2S9AqybUuqUUkrAFsCzQF9gFz9mP+CWmUtiEAStEYHxQRAEsy8zE5P1OApwHwAM8WtdBBwHHJ1SGgbMC1w6C9IZBEEQBEHwlaLdtA9pHTM7CTip7ufhwNozc90gCIIgCIKvOrHiexB8jYl1t4IgCL48QskKgiAIgiBoAqFkBUEQBEEQNIFQsoIgmEwMJwZBEMw6QskKgiAIgiBoAqFkBUEw3TTydE3tvwi6D4Lgm0woWUEQBEEQBE0glKwgCIIgCIImEEpWEASzBTG8GATB141QsoIgCIIgCJpAKFlBEHwlCU9XEASzO6FkBUEQBEEQNIFQsoIgCIIgCJpAKFlBEHwtiUD6IAi+bELJCoIgCIIgaAKhZAVBEBCeriAIZj2hZAVBEARBEDSBULKCIAiCIAiaQChZQRAE00EMHwZB8EUJJSsIgiAIgqAJhJIVBEEwA0SgfBAE0yKUrCAIgiAIgiYQSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEARB0ARCyQqCIGgCEQgfBEEoWUEQBEEQBE0glKwgCIIgCIImEEpWEATB/4AYPgyCbx6hZAVBEARBEDSBULKCIAiCIAiaQChZQRAEQRAETSCUrCAIgiAIgiYQSlYQBMGXQGwwHQRff0LJCoIgCIIgaAKhZAVBEARBEDSBULKCIAhmQ2I4MQi++oSSFQRBEARB0ARCyQqCIPiKEV6tIPhqEEpWEARBEARBEwglKwiCIAiCoAmEkhUEQfAVJ4Lkg2D2ZKaUrJTSPCml61NKz6WUhqaU1kspdU8p3ZNSetHfu82qxAZBEARBEHxVmFlP1rnAf8xseWBVYChwPHCvmS0D3OvfgyAIgi+J8HQFwZfDDCtZKaW5gY2BSwHM7FMzex/YAbjcD7sc2HHmkhgEQRAEQfDVY2Y8WUsAo4E+KaWWlNIlKaXOwAJm9qYf8xawwMwmMgiCIAiC4KvGzChZ7YDewAVmtjownrqhQTMzwBqdnFI6JKX0VErpqdGjR89EMoIgCIKZIYYTg6A5zIyS9Rrwmpk97t+vR0rX2ymlhQD8fVSjk83sIjNb08zW7NGjx0wkIwiCIAiCYPZjhpUsM3sLeDWltJz/tAXwLHArsJ//th9wy0ylMAiCIPhSCc9WEMwY7Wby/COAK1NKHYDhwAFIcbs2pXQQMBLYbSbvEQRBEARB8JVjppQsMxsIrNngry1m5rpBEARBEARfdWLF9yAIguALEcOHQTB9hJIVBEEQBEHQBELJCoIgCIIgaAKhZAVBEARBEDSBULKCIAiCIAiaQChZQRAEQRAETSCUrCAIgiAIgiYQSlYQBEEwU8Teh0HQmFCygiAIgiAImkAoWUEQBEEQBE0glKwgCIIgCIImEEpWEARBEARBEwglKwiCIAiCoAmEkhUEQRAEQdAEQskKgiAIgiBoAqFkBUEQBE0l1s0KvqmEkhUEQRAEQdAEQskKgiAI/qeEZyv4phBKVhAEQRAEQRMIJSsIgiAIgqAJhJIVBEEQBEHQBELJCoIgCIIgaAKhZAVBEARBEDSBULKCIAiCIAiaQChZQRAEQRAETSCUrCAIguBLJdbNCr6uhJIVBEEQBEHQBELJCoIgCIIgaAKhZAVBEASzFTF8GHxdCCUrCIIgCIKgCYSSFQRBEMzW1Hu2wtMVfFUIJSsIgiAIgqAJhJIVBEEQBEHQBELJCoIgCL7SxPBhMLsSSlYQBEEQBEETCCUrCIIgCIKgCYSSFQRBEARB0ARCyQqCIAiCIGgCoWQFQRAEQRA0gVCygiAIgiAImkAoWUEQBEEQBE1gppWslFLblFJLSunf/n2JlNLjKaVhKaV/pZQ6zHwygyAIgiAIvlrMCk/WkcDQ4vvvgLPNbGlgDHDQLLhHEARBEATBV4qZUrJSSosC2wKX+PcEbA5c74dcDuw4M/cIgiAIgi9CrAAfzC7MrCfrHOBYYJJ/nxd438wm+vfXgEV
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pd.value_counts(test['country']).plot.bar()"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "reflected-shore",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>year</th>\n",
" <th>sex</th>\n",
" <th>age</th>\n",
" <th>suicides_no</th>\n",
" <th>population</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10289</th>\n",
" <td>Cuba</td>\n",
" <td>1993</td>\n",
" <td>female</td>\n",
" <td>75+ years</td>\n",
" <td>84.0</td>\n",
" <td>208800.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18495</th>\n",
" <td>Hungary</td>\n",
" <td>2004</td>\n",
" <td>female</td>\n",
" <td>5-14 years</td>\n",
" <td>2.0</td>\n",
" <td>544457.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1930</th>\n",
" <td>Aruba</td>\n",
" <td>1987</td>\n",
" <td>male</td>\n",
" <td>55-74 years</td>\n",
" <td>0.0</td>\n",
" <td>3118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20315</th>\n",
" <td>Italy</td>\n",
" <td>2001</td>\n",
" <td>male</td>\n",
" <td>75+ years</td>\n",
" <td>560.0</td>\n",
" <td>1675192.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15269</th>\n",
" <td>Georgia</td>\n",
" <td>1993</td>\n",
" <td>female</td>\n",
" <td>75+ years</td>\n",
" <td>NaN</td>\n",
" <td>133600.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35206</th>\n",
" <td>Singapore</td>\n",
" <td>1981</td>\n",
" <td>male</td>\n",
" <td>55-74 years</td>\n",
" <td>18.0</td>\n",
" <td>108600.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33416</th>\n",
" <td>Saint Kitts and Nevis</td>\n",
" <td>1987</td>\n",
" <td>male</td>\n",
" <td>35-54 years</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7622</th>\n",
" <td>Bulgaria</td>\n",
" <td>2011</td>\n",
" <td>female</td>\n",
" <td>35-54 years</td>\n",
" <td>41.0</td>\n",
" <td>1036483.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37277</th>\n",
" <td>Suriname</td>\n",
" <td>1982</td>\n",
" <td>female</td>\n",
" <td>75+ years</td>\n",
" <td>1.0</td>\n",
" <td>3100.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13448</th>\n",
" <td>El Salvador</td>\n",
" <td>2014</td>\n",
" <td>male</td>\n",
" <td>35-54 years</td>\n",
" <td>85.0</td>\n",
" <td>586412.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26265 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" country year sex age suicides_no \\\n",
"10289 Cuba 1993 female 75+ years 84.0 \n",
"18495 Hungary 2004 female 5-14 years 2.0 \n",
"1930 Aruba 1987 male 55-74 years 0.0 \n",
"20315 Italy 2001 male 75+ years 560.0 \n",
"15269 Georgia 1993 female 75+ years NaN \n",
"... ... ... ... ... ... \n",
"35206 Singapore 1981 male 55-74 years 18.0 \n",
"33416 Saint Kitts and Nevis 1987 male 35-54 years 0.0 \n",
"7622 Bulgaria 2011 female 35-54 years 41.0 \n",
"37277 Suriname 1982 female 75+ years 1.0 \n",
"13448 El Salvador 2014 male 35-54 years 85.0 \n",
"\n",
" population \n",
"10289 208800.0 \n",
"18495 544457.0 \n",
"1930 3118.0 \n",
"20315 1675192.0 \n",
"15269 133600.0 \n",
"... ... \n",
"35206 108600.0 \n",
"33416 NaN \n",
"7622 1036483.0 \n",
"37277 3100.0 \n",
"13448 586412.0 \n",
"\n",
"[26265 rows x 6 columns]"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "second-crime",
"metadata": {},
"outputs": [],
"source": [
"# Zadanie 4\n",
"# Wydaje mi sie ze w moim zbiorze nie jest wymagania zadna normalizacja danych."
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "spread-yield",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"country 0\n",
"year 0\n",
"sex 0\n",
"age 0\n",
"suicides_no 1346\n",
"population 3282\n",
"dtype: int64\n",
"country 0\n",
"year 0\n",
"sex 0\n",
"age 0\n",
"suicides_no 456\n",
"population 1048\n",
"dtype: int64\n",
"country 0\n",
"year 0\n",
"sex 0\n",
"age 0\n",
"suicides_no 454\n",
"population 1130\n",
"dtype: int64\n"
]
}
],
"source": [
"# zadanie 5\n",
"print(train.isnull().sum())\n",
"print(validate.isnull().sum())\n",
"print(test.isnull().sum())"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "conventional-orleans",
"metadata": {},
"outputs": [],
"source": [
"train.dropna(inplace=True)\n",
"validate.dropna(inplace=True)\n",
"test.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "secret-coffee",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" country year sex age suicides_no population\n",
"10289 Cuba 1993 female 75+ years 84.0 208800.0\n",
"18495 Hungary 2004 female 5-14 years 2.0 544457.0\n",
"1930 Aruba 1987 male 55-74 years 0.0 3118.0\n",
"20315 Italy 2001 male 75+ years 560.0 1675192.0\n",
"23505 Luxembourg 1984 male 5-14 years 0.0 22100.0\n",
"... ... ... ... ... ... ...\n",
"18031 Hong Kong SAR 2002 male 25-34 years 145.0 487800.0\n",
"35206 Singapore 1981 male 55-74 years 18.0 108600.0\n",
"7622 Bulgaria 2011 female 35-54 years 41.0 1036483.0\n",
"37277 Suriname 1982 female 75+ years 1.0 3100.0\n",
"13448 El Salvador 2014 male 35-54 years 85.0 586412.0\n",
"\n",
"[21637 rows x 6 columns]\n",
" country year sex age suicides_no population\n",
"19952 Israel 2009 male 35-54 years 91.0 836965.0\n",
"36424 South Africa 2001 female 55-74 years 6.0 2053745.0\n",
"23461 Luxembourg 1981 female 25-34 years 3.0 28300.0\n",
"16512 Grenada 2009 female 15-24 years 0.0 11815.0\n",
"12873 Ecuador 2015 male 5-14 years 35.0 1569519.0\n",
"... ... ... ... ... ... ...\n",
"7523 Bulgaria 2002 male 75+ years 181.0 198560.0\n",
"42715 Uruguay 2009 male 25-34 years 79.0 238754.0\n",
"36799 Spain 1995 male 25-34 years 398.0 3196300.0\n",
"1559 Armenia 1986 male 75+ years 2.0 29000.0\n",
"13313 El Salvador 2003 female 75+ years 1.0 71062.0\n",
"\n",
"[7251 rows x 6 columns]\n",
" country year sex age suicides_no population\n",
"13528 Estonia 1988 female 55-74 years 40.0 169100.0\n",
"25017 Mauritius 1991 male 5-14 years 0.0 103900.0\n",
"19317 Ireland 1992 male 5-14 years 3.0 339800.0\n",
"7928 Canada 1999 male 35-54 years 1442.0 4743615.0\n",
"2107 Aruba 2011 male 25-34 years 0.0 5440.0\n",
"... ... ... ... ... ... ...\n",
"37194 Sri Lanka 2001 male 15-24 years 508.0 1811743.0\n",
"16850 Guatemala 1984 female 35-54 years 0.0 596000.0\n",
"6265 Brazil 1984 female 25-34 years 233.0 10566400.0\n",
"860 Antigua and Barbuda 1995 male 35-54 years 0.0 7809.0\n",
"15795 Germany 2011 female 5-14 years 9.0 3641215.0\n",
2021-03-21 20:44:09 +01:00
"\n",
2021-03-22 00:03:37 +01:00
"[7172 rows x 6 columns]\n"
2021-03-21 20:44:09 +01:00
]
}
],
2021-03-21 20:43:50 +01:00
"source": [
2021-03-22 00:03:37 +01:00
"print(train)\n",
"print(validate)\n",
"print(test)"
2021-03-21 20:43:50 +01:00
]
2021-03-21 20:44:09 +01:00
},
{
"cell_type": "code",
"execution_count": null,
2021-03-22 00:03:37 +01:00
"id": "engaged-enough",
2021-03-21 20:44:09 +01:00
"metadata": {},
"outputs": [],
"source": []
2021-03-21 20:43:50 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2021-03-22 00:03:37 +01:00
"version": "3.8.5"
2021-03-21 20:43:50 +01:00
}
},
2021-03-21 20:28:34 +01:00
"nbformat": 4,
"nbformat_minor": 5
}