diff --git a/preparation.ipynb b/preparation.ipynb index fd6ff92..ce918ef 100644 --- a/preparation.ipynb +++ b/preparation.ipynb @@ -3,18 +3,17 @@ { "cell_type": "code", "execution_count": null, - "id": "aware-aberdeen", + "id": "sorted-malawi", "metadata": {}, "outputs": [], "source": [ - "# ~/.local/bin/kaggle datasets download -d tejashvi14/travel-insurance-prediction-data\n", "!kaggle datasets download -d tejashvi14/travel-insurance-prediction-data" ] }, { "cell_type": "code", "execution_count": null, - "id": "flexible-representation", + "id": "material-export", "metadata": {}, "outputs": [], "source": [ @@ -24,7 +23,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "former-testing", + "id": "excited-midnight", "metadata": {}, "outputs": [ { @@ -1004,7 +1003,7 @@ { "cell_type": "code", "execution_count": null, - "id": "korean-cuisine", + "id": "coordinate-count", "metadata": {}, "outputs": [], "source": [ @@ -1015,7 +1014,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "polish-shift", + "id": "stock-fabric", "metadata": {}, "outputs": [ { @@ -1998,23 +1997,251 @@ { "cell_type": "code", "execution_count": 8, - "id": "split-little", + "id": "authentic-christmas", "metadata": {}, "outputs": [], "source": [ + "# podziaƂ na podzbiory train/dev/test\n", "import sklearn\n", "from sklearn.model_selection import train_test_split\n", "travel_insurance_train, travel_insurance_rest = sklearn.model_selection.train_test_split(travel_insurance, test_size=0.4, random_state=1)\n", "travel_insurance_test, travel_insurance_dev = sklearn.model_selection.train_test_split(travel_insurance_rest, test_size=0.5, random_state=1)" ] }, + { + "cell_type": "code", + "execution_count": 27, + "id": "coordinate-swimming", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeEmployment TypeGraduateOrNotAnnualIncomeFamilyMembersChronicDiseasesFrequentFlyerEverTravelledAbroadTravelInsurance
count1987.0198719881.987000e+031988.0000001988.000000198819881988.000000
unique11.022NaNNaNNaN22NaN
top28.0private sector/self employedyesNaNNaNNaNnonoNaN
freq506.014171693NaNNaNNaN15711608NaN
meanNaNNaNNaN9.327630e+054.7530180.277666NaNNaN0.357646
stdNaNNaNNaN3.768557e+051.6092540.447960NaNNaN0.479428
minNaNNaNNaN3.000000e+052.0000000.000000NaNNaN0.000000
25%NaNNaNNaN6.000000e+054.0000000.000000NaNNaN0.000000
50%NaNNaNNaN9.000000e+055.0000000.000000NaNNaN0.000000
75%NaNNaNNaN1.250000e+066.0000001.000000NaNNaN1.000000
maxNaNNaNNaN1.800000e+069.0000001.000000NaNNaN1.000000
\n", + "
" + ], + "text/plain": [ + " Age Employment Type GraduateOrNot AnnualIncome \\\n", + "count 1987.0 1987 1988 1.987000e+03 \n", + "unique 11.0 2 2 NaN \n", + "top 28.0 private sector/self employed yes NaN \n", + "freq 506.0 1417 1693 NaN \n", + "mean NaN NaN NaN 9.327630e+05 \n", + "std NaN NaN NaN 3.768557e+05 \n", + "min NaN NaN NaN 3.000000e+05 \n", + "25% NaN NaN NaN 6.000000e+05 \n", + "50% NaN NaN NaN 9.000000e+05 \n", + "75% NaN NaN NaN 1.250000e+06 \n", + "max NaN NaN NaN 1.800000e+06 \n", + "\n", + " FamilyMembers ChronicDiseases FrequentFlyer EverTravelledAbroad \\\n", + "count 1988.000000 1988.000000 1988 1988 \n", + "unique NaN NaN 2 2 \n", + "top NaN NaN no no \n", + "freq NaN NaN 1571 1608 \n", + "mean 4.753018 0.277666 NaN NaN \n", + "std 1.609254 0.447960 NaN NaN \n", + "min 2.000000 0.000000 NaN NaN \n", + "25% 4.000000 0.000000 NaN NaN \n", + "50% 5.000000 0.000000 NaN NaN \n", + "75% 6.000000 1.000000 NaN NaN \n", + "max 9.000000 1.000000 NaN NaN \n", + "\n", + " TravelInsurance \n", + "count 1988.000000 \n", + "unique NaN \n", + "top NaN \n", + "freq NaN \n", + "mean 0.357646 \n", + "std 0.479428 \n", + "min 0.000000 \n", + "25% 0.000000 \n", + "50% 0.000000 \n", + "75% 1.000000 \n", + "max 1.000000 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "travel_insurance.describe(include='all')" + ] + }, { "cell_type": "code", "execution_count": 23, - "id": "blank-developer", + "id": "enhanced-logic", "metadata": {}, "outputs": [], "source": [ + "# zwracanie informacji o danym zbiorze \n", + "\n", "import seaborn as sns\n", "\n", "def printInformation(data):\n", @@ -2031,7 +2258,7 @@ { "cell_type": "code", "execution_count": 24, - "id": "reliable-operations", + "id": "introductory-zimbabwe", "metadata": {}, "outputs": [ { @@ -2099,7 +2326,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "chronic-noise", + "id": "working-championship", "metadata": {}, "outputs": [ { @@ -2169,7 +2396,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "elect-dragon", + "id": "ancient-junction", "metadata": {}, "outputs": [ { @@ -2239,7 +2466,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "younger-hollow", + "id": "cordless-hawaiian", "metadata": {}, "outputs": [ { @@ -2309,7 +2536,7 @@ { "cell_type": "code", "execution_count": null, - "id": "judicial-patient", + "id": "stretch-career", "metadata": {}, "outputs": [], "source": []