diff --git a/preparation.ipynb b/preparation.ipynb index fd6ff92..ce918ef 100644 --- a/preparation.ipynb +++ b/preparation.ipynb @@ -3,18 +3,17 @@ { "cell_type": "code", "execution_count": null, - "id": "aware-aberdeen", + "id": "sorted-malawi", "metadata": {}, "outputs": [], "source": [ - "# ~/.local/bin/kaggle datasets download -d tejashvi14/travel-insurance-prediction-data\n", "!kaggle datasets download -d tejashvi14/travel-insurance-prediction-data" ] }, { "cell_type": "code", "execution_count": null, - "id": "flexible-representation", + "id": "material-export", "metadata": {}, "outputs": [], "source": [ @@ -24,7 +23,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "former-testing", + "id": "excited-midnight", "metadata": {}, "outputs": [ { @@ -1004,7 +1003,7 @@ { "cell_type": "code", "execution_count": null, - "id": "korean-cuisine", + "id": "coordinate-count", "metadata": {}, "outputs": [], "source": [ @@ -1015,7 +1014,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "polish-shift", + "id": "stock-fabric", "metadata": {}, "outputs": [ { @@ -1998,23 +1997,251 @@ { "cell_type": "code", "execution_count": 8, - "id": "split-little", + "id": "authentic-christmas", "metadata": {}, "outputs": [], "source": [ + "# podziaĆ na podzbiory train/dev/test\n", "import sklearn\n", "from sklearn.model_selection import train_test_split\n", "travel_insurance_train, travel_insurance_rest = sklearn.model_selection.train_test_split(travel_insurance, test_size=0.4, random_state=1)\n", "travel_insurance_test, travel_insurance_dev = sklearn.model_selection.train_test_split(travel_insurance_rest, test_size=0.5, random_state=1)" ] }, + { + "cell_type": "code", + "execution_count": 27, + "id": "coordinate-swimming", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Age | \n", + "Employment Type | \n", + "GraduateOrNot | \n", + "AnnualIncome | \n", + "FamilyMembers | \n", + "ChronicDiseases | \n", + "FrequentFlyer | \n", + "EverTravelledAbroad | \n", + "TravelInsurance | \n", + "
---|---|---|---|---|---|---|---|---|---|
count | \n", + "1987.0 | \n", + "1987 | \n", + "1988 | \n", + "1.987000e+03 | \n", + "1988.000000 | \n", + "1988.000000 | \n", + "1988 | \n", + "1988 | \n", + "1988.000000 | \n", + "
unique | \n", + "11.0 | \n", + "2 | \n", + "2 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "2 | \n", + "2 | \n", + "NaN | \n", + "
top | \n", + "28.0 | \n", + "private sector/self employed | \n", + "yes | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "no | \n", + "no | \n", + "NaN | \n", + "
freq | \n", + "506.0 | \n", + "1417 | \n", + "1693 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "1571 | \n", + "1608 | \n", + "NaN | \n", + "
mean | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "9.327630e+05 | \n", + "4.753018 | \n", + "0.277666 | \n", + "NaN | \n", + "NaN | \n", + "0.357646 | \n", + "
std | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "3.768557e+05 | \n", + "1.609254 | \n", + "0.447960 | \n", + "NaN | \n", + "NaN | \n", + "0.479428 | \n", + "
min | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "3.000000e+05 | \n", + "2.000000 | \n", + "0.000000 | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "
25% | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "6.000000e+05 | \n", + "4.000000 | \n", + "0.000000 | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "
50% | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "9.000000e+05 | \n", + "5.000000 | \n", + "0.000000 | \n", + "NaN | \n", + "NaN | \n", + "0.000000 | \n", + "
75% | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "1.250000e+06 | \n", + "6.000000 | \n", + "1.000000 | \n", + "NaN | \n", + "NaN | \n", + "1.000000 | \n", + "
max | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "1.800000e+06 | \n", + "9.000000 | \n", + "1.000000 | \n", + "NaN | \n", + "NaN | \n", + "1.000000 | \n", + "