diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..2e732b0 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,11 @@ +pipeline { + agent any + stages { + stage('Stage 1') { + steps { + echo 'Hello world!' + } + } + } +} + diff --git a/zad_02.ipynb b/zad_02.ipynb index 6d91d57..98f78b5 100644 --- a/zad_02.ipynb +++ b/zad_02.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "f5229180", + "id": "12dba44a", "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "2d3b5bee", + "id": "1d480e94", "metadata": {}, "outputs": [ { @@ -31,8 +31,8 @@ }, { "cell_type": "code", - "execution_count": 34, - "id": "fbbeb52d", + "execution_count": 41, + "id": "13a40d88", "metadata": { "scrolled": true }, @@ -225,7 +225,7 @@ "[5001 rows x 8 columns]" ] }, - "execution_count": 34, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -237,7 +237,7 @@ }, { "cell_type": "markdown", - "id": "1f9629f1", + "id": "51c05e9a", "metadata": {}, "source": [ "Wyczyści zbiór z artefaktów (np. puste linie, przykłady z niepoprawnymi wartościami)" @@ -245,28 +245,20 @@ }, { "cell_type": "code", - "execution_count": 37, - "id": "ad18b250", + "execution_count": 42, + "id": "c70571df", "metadata": {}, "outputs": [], "source": [ "def clean_data(data):\n", " data.dropna(inplace=True)\n", - "\n", - " # usuń wiersze z niepoprawnymi wartościami\n", - " for col in data.columns:\n", - " if data[col].dtype == float:\n", - " data = data[(data[col] >= 0.0) & (data[col] <= 1.0)]\n", - " elif data[col].dtype == int:\n", - " data = data[(data[col] >= 0)]\n", - "\n", " return data" ] }, { "cell_type": "code", - "execution_count": 38, - "id": "8154dfd8", + "execution_count": 43, + "id": "0481b0dd", "metadata": {}, "outputs": [ { @@ -301,17 +293,163 @@ " \n", " \n", " \n", + " \n", + " 0\n", + " 1\n", + " 11.8\n", + " 6.1\n", + " 1\n", + " 0\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 1\n", + " 0\n", + " 14.0\n", + " 5.4\n", + " 0\n", + " 0\n", + " 1\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 2\n", + " 0\n", + " 11.8\n", + " 6.3\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 3\n", + " 0\n", + " 14.4\n", + " 6.1\n", + " 0\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 4\n", + " 1\n", + " 13.5\n", + " 5.9\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 4996\n", + " 1\n", + " 13.6\n", + " 5.1\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 4997\n", + " 1\n", + " 11.9\n", + " 5.4\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 4998\n", + " 1\n", + " 12.9\n", + " 5.7\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 4999\n", + " 1\n", + " 13.2\n", + " 6.2\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 5000\n", + " 1\n", + " 15.4\n", + " 5.4\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", " \n", "\n", + "

5001 rows × 8 columns

\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [long_hair, forehead_width_cm, forehead_height_cm, nose_wide, nose_long, lips_thin, distance_nose_to_lip_long, gender]\n", - "Index: []" + " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", + "0 1 11.8 6.1 1 0 \n", + "1 0 14.0 5.4 0 0 \n", + "2 0 11.8 6.3 1 1 \n", + "3 0 14.4 6.1 0 1 \n", + "4 1 13.5 5.9 0 0 \n", + "... ... ... ... ... ... \n", + "4996 1 13.6 5.1 0 0 \n", + "4997 1 11.9 5.4 0 0 \n", + "4998 1 12.9 5.7 0 0 \n", + "4999 1 13.2 6.2 0 0 \n", + "5000 1 15.4 5.4 1 1 \n", + "\n", + " lips_thin distance_nose_to_lip_long gender \n", + "0 1 1 Male \n", + "1 1 0 Female \n", + "2 1 1 Male \n", + "3 1 1 Male \n", + "4 0 0 Female \n", + "... ... ... ... \n", + "4996 0 0 Female \n", + "4997 0 0 Female \n", + "4998 0 0 Female \n", + "4999 0 0 Female \n", + "5000 1 1 Male \n", + "\n", + "[5001 rows x 8 columns]" ] }, - "execution_count": 38, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -323,7 +461,7 @@ }, { "cell_type": "markdown", - "id": "bd27b530", + "id": "717fab23", "metadata": {}, "source": [ "Dokona normalizacji danych w zbiorze (np. normalizacja wartości float do zakresu 0.0 - 1.0)" @@ -331,8 +469,8 @@ }, { "cell_type": "code", - "execution_count": 19, - "id": "b81c3005", + "execution_count": 44, + "id": "7fcacf03", "metadata": {}, "outputs": [], "source": [ @@ -347,8 +485,8 @@ }, { "cell_type": "code", - "execution_count": 27, - "id": "611929ca", + "execution_count": 45, + "id": "bfd844ad", "metadata": {}, "outputs": [], "source": [ @@ -357,8 +495,8 @@ }, { "cell_type": "code", - "execution_count": 31, - "id": "64724998", + "execution_count": 46, + "id": "2d0b8499", "metadata": {}, "outputs": [ { @@ -393,17 +531,163 @@ " \n", " \n", " \n", + " \n", + " 0\n", + " 1\n", + " 0.097561\n", + " 0.50\n", + " 1\n", + " 0\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 1\n", + " 0\n", + " 0.634146\n", + " 0.15\n", + " 0\n", + " 0\n", + " 1\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 2\n", + " 0\n", + " 0.097561\n", + " 0.60\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 3\n", + " 0\n", + " 0.731707\n", + " 0.50\n", + " 0\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 4\n", + " 1\n", + " 0.512195\n", + " 0.40\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 4996\n", + " 1\n", + " 0.536585\n", + " 0.00\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 4997\n", + " 1\n", + " 0.121951\n", + " 0.15\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 4998\n", + " 1\n", + " 0.365854\n", + " 0.30\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 4999\n", + " 1\n", + " 0.439024\n", + " 0.55\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 5000\n", + " 1\n", + " 0.975610\n", + " 0.15\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", " \n", "\n", + "

5001 rows × 8 columns

\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [long_hair, forehead_width_cm, forehead_height_cm, nose_wide, nose_long, lips_thin, distance_nose_to_lip_long, gender]\n", - "Index: []" + " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", + "0 1 0.097561 0.50 1 0 \n", + "1 0 0.634146 0.15 0 0 \n", + "2 0 0.097561 0.60 1 1 \n", + "3 0 0.731707 0.50 0 1 \n", + "4 1 0.512195 0.40 0 0 \n", + "... ... ... ... ... ... \n", + "4996 1 0.536585 0.00 0 0 \n", + "4997 1 0.121951 0.15 0 0 \n", + "4998 1 0.365854 0.30 0 0 \n", + "4999 1 0.439024 0.55 0 0 \n", + "5000 1 0.975610 0.15 1 1 \n", + "\n", + " lips_thin distance_nose_to_lip_long gender \n", + "0 1 1 Male \n", + "1 1 0 Female \n", + "2 1 1 Male \n", + "3 1 1 Male \n", + "4 0 0 Female \n", + "... ... ... ... \n", + "4996 0 0 Female \n", + "4997 0 0 Female \n", + "4998 0 0 Female \n", + "4999 0 0 Female \n", + "5000 1 1 Male \n", + "\n", + "[5001 rows x 8 columns]" ] }, - "execution_count": 31, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -414,7 +698,7 @@ }, { "cell_type": "markdown", - "id": "7013b00e", + "id": "61fbcddc", "metadata": {}, "source": [ "2. Jeśli brak w zbiorze gotowego podziału na podzbiory train/dev/test, to dokona takiego podziału" @@ -422,8 +706,8 @@ }, { "cell_type": "code", - "execution_count": 28, - "id": "9eb24b71", + "execution_count": 47, + "id": "dc386189", "metadata": {}, "outputs": [], "source": [ @@ -437,8 +721,8 @@ }, { "cell_type": "code", - "execution_count": 30, - "id": "81d1cd62", + "execution_count": 48, + "id": "9f888962", "metadata": {}, "outputs": [ { @@ -473,17 +757,163 @@ " \n", " \n", " \n", + " \n", + " 4432\n", + " 1\n", + " 0.512195\n", + " 0.10\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 2162\n", + " 1\n", + " 0.243902\n", + " 0.70\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 2396\n", + " 1\n", + " 0.512195\n", + " 0.15\n", + " 1\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 4769\n", + " 1\n", + " 0.853659\n", + " 0.10\n", + " 1\n", + " 1\n", + " 0\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 2271\n", + " 1\n", + " 0.292683\n", + " 0.70\n", + " 0\n", + " 1\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 846\n", + " 1\n", + " 0.097561\n", + " 0.45\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 2551\n", + " 0\n", + " 0.243902\n", + " 0.35\n", + " 1\n", + " 1\n", + " 1\n", + " 1\n", + " Male\n", + " \n", + " \n", + " 2928\n", + " 1\n", + " 0.634146\n", + " 0.20\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 117\n", + " 1\n", + " 0.707317\n", + " 0.50\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", + " \n", + " 645\n", + " 1\n", + " 0.195122\n", + " 0.05\n", + " 1\n", + " 0\n", + " 0\n", + " 0\n", + " Female\n", + " \n", " \n", "\n", + "

750 rows × 8 columns

\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [long_hair, forehead_width_cm, forehead_height_cm, nose_wide, nose_long, lips_thin, distance_nose_to_lip_long, gender]\n", - "Index: []" + " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", + "4432 1 0.512195 0.10 1 1 \n", + "2162 1 0.243902 0.70 1 1 \n", + "2396 1 0.512195 0.15 1 0 \n", + "4769 1 0.853659 0.10 1 1 \n", + "2271 1 0.292683 0.70 0 1 \n", + "... ... ... ... ... ... \n", + "846 1 0.097561 0.45 1 1 \n", + "2551 0 0.243902 0.35 1 1 \n", + "2928 1 0.634146 0.20 0 0 \n", + "117 1 0.707317 0.50 0 0 \n", + "645 1 0.195122 0.05 1 0 \n", + "\n", + " lips_thin distance_nose_to_lip_long gender \n", + "4432 1 1 Male \n", + "2162 1 1 Male \n", + "2396 0 0 Female \n", + "4769 0 1 Male \n", + "2271 0 0 Female \n", + "... ... ... ... \n", + "846 1 1 Male \n", + "2551 1 1 Male \n", + "2928 0 0 Female \n", + "117 0 0 Female \n", + "645 0 0 Female \n", + "\n", + "[750 rows x 8 columns]" ] }, - "execution_count": 30, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -494,8 +924,8 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "851d9aa0", + "execution_count": 49, + "id": "4598cea1", "metadata": {}, "outputs": [ { @@ -504,13 +934,13 @@ "text": [ " long_hair forehead_width_cm forehead_height_cm nose_wide \\\n", "count 5001.000000 5001.000000 5001.000000 5001.000000 \n", - "mean 0.869626 13.181484 5.946311 0.493901 \n", - "std 0.336748 1.107128 0.541268 0.500013 \n", - "min 0.000000 11.400000 5.100000 0.000000 \n", - "25% 1.000000 12.200000 5.500000 0.000000 \n", - "50% 1.000000 13.100000 5.900000 0.000000 \n", - "75% 1.000000 14.000000 6.400000 1.000000 \n", - "max 1.000000 15.500000 7.100000 1.000000 \n", + "mean 0.869626 0.434508 0.423155 0.493901 \n", + "std 0.336748 0.270031 0.270634 0.500013 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 1.000000 0.195122 0.200000 0.000000 \n", + "50% 1.000000 0.414634 0.400000 0.000000 \n", + "75% 1.000000 0.634146 0.650000 1.000000 \n", + "max 1.000000 1.000000 1.000000 1.000000 \n", "\n", " nose_long lips_thin distance_nose_to_lip_long \n", "count 5001.000000 5001.000000 5001.000000 \n", @@ -523,13 +953,13 @@ "max 1.000000 1.000000 1.000000 \n", " long_hair forehead_width_cm forehead_height_cm nose_wide \\\n", "count 3500.000000 3500.000000 3500.000000 3500.000000 \n", - "mean 0.870000 13.187686 5.951800 0.505714 \n", - "std 0.336351 1.109019 0.542695 0.500039 \n", - "min 0.000000 11.400000 5.100000 0.000000 \n", - "25% 1.000000 12.200000 5.500000 0.000000 \n", - "50% 1.000000 13.100000 5.900000 1.000000 \n", - "75% 1.000000 14.000000 6.400000 1.000000 \n", - "max 1.000000 15.500000 7.100000 1.000000 \n", + "mean 0.870000 0.436021 0.425900 0.505714 \n", + "std 0.336351 0.270492 0.271348 0.500039 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 1.000000 0.195122 0.200000 0.000000 \n", + "50% 1.000000 0.414634 0.400000 1.000000 \n", + "75% 1.000000 0.634146 0.650000 1.000000 \n", + "max 1.000000 1.000000 1.000000 1.000000 \n", "\n", " nose_long lips_thin distance_nose_to_lip_long \n", "count 3500.000000 3500.000000 3500.000000 \n", @@ -542,13 +972,13 @@ "max 1.000000 1.000000 1.000000 \n", " long_hair forehead_width_cm forehead_height_cm nose_wide \\\n", "count 750.000000 750.000000 750.000000 750.000000 \n", - "mean 0.870667 13.119067 5.933867 0.472000 \n", - "std 0.335792 1.084345 0.538999 0.499549 \n", - "min 0.000000 11.400000 5.100000 0.000000 \n", - "25% 1.000000 12.200000 5.500000 0.000000 \n", - "50% 1.000000 13.100000 5.900000 0.000000 \n", - "75% 1.000000 14.000000 6.375000 1.000000 \n", - "max 1.000000 15.500000 7.100000 1.000000 \n", + "mean 0.870667 0.419285 0.416933 0.472000 \n", + "std 0.335792 0.264474 0.269500 0.499549 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 1.000000 0.195122 0.200000 0.000000 \n", + "50% 1.000000 0.414634 0.400000 0.000000 \n", + "75% 1.000000 0.634146 0.637500 1.000000 \n", + "max 1.000000 1.000000 1.000000 1.000000 \n", "\n", " nose_long lips_thin distance_nose_to_lip_long \n", "count 750.000000 750.000000 750.000000 \n", @@ -561,13 +991,13 @@ "max 1.000000 1.000000 1.000000 \n", " long_hair forehead_width_cm forehead_height_cm nose_wide \\\n", "count 751.000000 751.000000 751.000000 751.000000 \n", - "mean 0.866844 13.214913 5.933156 0.460719 \n", - "std 0.339969 1.119877 0.537134 0.498787 \n", - "min 0.000000 11.400000 5.100000 0.000000 \n", - "25% 1.000000 12.200000 5.500000 0.000000 \n", - "50% 1.000000 13.200000 5.900000 0.000000 \n", - "75% 1.000000 14.100000 6.300000 1.000000 \n", - "max 1.000000 15.500000 7.100000 1.000000 \n", + "mean 0.866844 0.442662 0.416578 0.460719 \n", + "std 0.339969 0.273141 0.268567 0.498787 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 1.000000 0.195122 0.200000 0.000000 \n", + "50% 1.000000 0.439024 0.400000 0.000000 \n", + "75% 1.000000 0.658537 0.600000 1.000000 \n", + "max 1.000000 1.000000 1.000000 1.000000 \n", "\n", " nose_long lips_thin distance_nose_to_lip_long \n", "count 751.000000 751.000000 751.000000 \n", @@ -586,416 +1016,10 @@ " print( d.describe())" ] }, - { - "cell_type": "code", - "execution_count": 20, - "id": "f52a79aa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
long_hairforehead_width_cmforehead_height_cmnose_widenose_longlips_thindistance_nose_to_lip_longgender
150110.4390240.301111Male
258610.5609760.450000Female
265300.3658540.100001Female
105510.4390240.401111Male
70500.9268290.251111Male
...........................
208710.0487800.450110Female
188910.0487800.150000Female
462310.5365850.200000Female
159111.0000000.951010Male
134610.5365850.350000Female
\n", - "

3500 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", - "1501 1 0.439024 0.30 1 1 \n", - "2586 1 0.560976 0.45 0 0 \n", - "2653 0 0.365854 0.10 0 0 \n", - "1055 1 0.439024 0.40 1 1 \n", - "705 0 0.926829 0.25 1 1 \n", - "... ... ... ... ... ... \n", - "2087 1 0.048780 0.45 0 1 \n", - "1889 1 0.048780 0.15 0 0 \n", - "4623 1 0.536585 0.20 0 0 \n", - "1591 1 1.000000 0.95 1 0 \n", - "1346 1 0.536585 0.35 0 0 \n", - "\n", - " lips_thin distance_nose_to_lip_long gender \n", - "1501 1 1 Male \n", - "2586 0 0 Female \n", - "2653 0 1 Female \n", - "1055 1 1 Male \n", - "705 1 1 Male \n", - "... ... ... ... \n", - "2087 1 0 Female \n", - "1889 0 0 Female \n", - "4623 0 0 Female \n", - "1591 1 0 Male \n", - "1346 0 0 Female \n", - "\n", - "[3500 rows x 8 columns]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "normalize_data(train)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "2653e41d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
long_hairforehead_width_cmforehead_height_cmnose_widenose_longlips_thindistance_nose_to_lip_longgender
150110.4390240.301111Male
258610.5609760.450000Female
265300.3658540.100001Female
105510.4390240.401111Male
70500.9268290.251111Male
...........................
208710.0487800.450110Female
188910.0487800.150000Female
462310.5365850.200000Female
159111.0000000.951010Male
134610.5365850.350000Female
\n", - "

3500 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", - "1501 1 0.439024 0.30 1 1 \n", - "2586 1 0.560976 0.45 0 0 \n", - "2653 0 0.365854 0.10 0 0 \n", - "1055 1 0.439024 0.40 1 1 \n", - "705 0 0.926829 0.25 1 1 \n", - "... ... ... ... ... ... \n", - "2087 1 0.048780 0.45 0 1 \n", - "1889 1 0.048780 0.15 0 0 \n", - "4623 1 0.536585 0.20 0 0 \n", - "1591 1 1.000000 0.95 1 0 \n", - "1346 1 0.536585 0.35 0 0 \n", - "\n", - " lips_thin distance_nose_to_lip_long gender \n", - "1501 1 1 Male \n", - "2586 0 0 Female \n", - "2653 0 1 Female \n", - "1055 1 1 Male \n", - "705 1 1 Male \n", - "... ... ... ... \n", - "2087 1 0 Female \n", - "1889 0 0 Female \n", - "4623 0 0 Female \n", - "1591 1 0 Male \n", - "1346 0 0 Female \n", - "\n", - "[3500 rows x 8 columns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clean_data(train)" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "bb1439e3", + "id": "8fa84a56", "metadata": {}, "outputs": [], "source": []