Add comments

This commit is contained in:
theta00 2022-03-20 19:23:11 +01:00
parent 209f5d7d71
commit 66cc8301ee

View File

@ -3,18 +3,17 @@
{
"cell_type": "code",
"execution_count": null,
"id": "aware-aberdeen",
"id": "sorted-malawi",
"metadata": {},
"outputs": [],
"source": [
"# ~/.local/bin/kaggle datasets download -d tejashvi14/travel-insurance-prediction-data\n",
"!kaggle datasets download -d tejashvi14/travel-insurance-prediction-data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "flexible-representation",
"id": "material-export",
"metadata": {},
"outputs": [],
"source": [
@ -24,7 +23,7 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "former-testing",
"id": "excited-midnight",
"metadata": {},
"outputs": [
{
@ -1004,7 +1003,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "korean-cuisine",
"id": "coordinate-count",
"metadata": {},
"outputs": [],
"source": [
@ -1015,7 +1014,7 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "polish-shift",
"id": "stock-fabric",
"metadata": {},
"outputs": [
{
@ -1998,23 +1997,251 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "split-little",
"id": "authentic-christmas",
"metadata": {},
"outputs": [],
"source": [
"# podział na podzbiory train/dev/test\n",
"import sklearn\n",
"from sklearn.model_selection import train_test_split\n",
"travel_insurance_train, travel_insurance_rest = sklearn.model_selection.train_test_split(travel_insurance, test_size=0.4, random_state=1)\n",
"travel_insurance_test, travel_insurance_dev = sklearn.model_selection.train_test_split(travel_insurance_rest, test_size=0.5, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "coordinate-swimming",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>Employment Type</th>\n",
" <th>GraduateOrNot</th>\n",
" <th>AnnualIncome</th>\n",
" <th>FamilyMembers</th>\n",
" <th>ChronicDiseases</th>\n",
" <th>FrequentFlyer</th>\n",
" <th>EverTravelledAbroad</th>\n",
" <th>TravelInsurance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1987.0</td>\n",
" <td>1987</td>\n",
" <td>1988</td>\n",
" <td>1.987000e+03</td>\n",
" <td>1988.000000</td>\n",
" <td>1988.000000</td>\n",
" <td>1988</td>\n",
" <td>1988</td>\n",
" <td>1988.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>11.0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>28.0</td>\n",
" <td>private sector/self employed</td>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>no</td>\n",
" <td>no</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>506.0</td>\n",
" <td>1417</td>\n",
" <td>1693</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1571</td>\n",
" <td>1608</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9.327630e+05</td>\n",
" <td>4.753018</td>\n",
" <td>0.277666</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.357646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.768557e+05</td>\n",
" <td>1.609254</td>\n",
" <td>0.447960</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.479428</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.000000e+05</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.000000e+05</td>\n",
" <td>4.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9.000000e+05</td>\n",
" <td>5.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.250000e+06</td>\n",
" <td>6.000000</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.800000e+06</td>\n",
" <td>9.000000</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age Employment Type GraduateOrNot AnnualIncome \\\n",
"count 1987.0 1987 1988 1.987000e+03 \n",
"unique 11.0 2 2 NaN \n",
"top 28.0 private sector/self employed yes NaN \n",
"freq 506.0 1417 1693 NaN \n",
"mean NaN NaN NaN 9.327630e+05 \n",
"std NaN NaN NaN 3.768557e+05 \n",
"min NaN NaN NaN 3.000000e+05 \n",
"25% NaN NaN NaN 6.000000e+05 \n",
"50% NaN NaN NaN 9.000000e+05 \n",
"75% NaN NaN NaN 1.250000e+06 \n",
"max NaN NaN NaN 1.800000e+06 \n",
"\n",
" FamilyMembers ChronicDiseases FrequentFlyer EverTravelledAbroad \\\n",
"count 1988.000000 1988.000000 1988 1988 \n",
"unique NaN NaN 2 2 \n",
"top NaN NaN no no \n",
"freq NaN NaN 1571 1608 \n",
"mean 4.753018 0.277666 NaN NaN \n",
"std 1.609254 0.447960 NaN NaN \n",
"min 2.000000 0.000000 NaN NaN \n",
"25% 4.000000 0.000000 NaN NaN \n",
"50% 5.000000 0.000000 NaN NaN \n",
"75% 6.000000 1.000000 NaN NaN \n",
"max 9.000000 1.000000 NaN NaN \n",
"\n",
" TravelInsurance \n",
"count 1988.000000 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 0.357646 \n",
"std 0.479428 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 0.000000 \n",
"75% 1.000000 \n",
"max 1.000000 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"travel_insurance.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "blank-developer",
"id": "enhanced-logic",
"metadata": {},
"outputs": [],
"source": [
"# zwracanie informacji o danym zbiorze \n",
"\n",
"import seaborn as sns\n",
"\n",
"def printInformation(data):\n",
@ -2031,7 +2258,7 @@
{
"cell_type": "code",
"execution_count": 24,
"id": "reliable-operations",
"id": "introductory-zimbabwe",
"metadata": {},
"outputs": [
{
@ -2099,7 +2326,7 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "chronic-noise",
"id": "working-championship",
"metadata": {},
"outputs": [
{
@ -2169,7 +2396,7 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "elect-dragon",
"id": "ancient-junction",
"metadata": {},
"outputs": [
{
@ -2239,7 +2466,7 @@
{
"cell_type": "code",
"execution_count": 13,
"id": "younger-hollow",
"id": "cordless-hawaiian",
"metadata": {},
"outputs": [
{
@ -2309,7 +2536,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "judicial-patient",
"id": "stretch-career",
"metadata": {},
"outputs": [],
"source": []