{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Import bibliotek" ], "metadata": { "id": "fbReA72OlQ_Q" } }, { "cell_type": "code", "source": [ "import sklearn\n", "from sklearn.preprocessing import OneHotEncoder\n", "from sklearn.model_selection import train_test_split\n", "from google.colab import files\n", "import pandas as pd" ], "metadata": { "id": "lIs7iUiKlVvA" }, "execution_count": 1, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Pobranie danych" ], "metadata": { "id": "PFLEmQ76IauU" } }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "plw8exY_D-2b", "outputId": "6cd21e52-fbfc-432e-e7f3-019e2ad2416c" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.5.16)\n", "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle) (2024.2.2)\n", "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.8.2)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.31.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.66.2)\n", "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.4)\n", "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.0.7)\n", "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle) (6.1.0)\n", "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle) (0.5.1)\n", "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.6)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n", "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" ] } ], "source": [ "#Zainstalujmy potrzebne biblioteki\n", "!pip install --user kaggle #API Kaggle, do pobrania zbioru\n", "!pip install --user pandas" ] }, { "cell_type": "code", "source": [ "files.upload()\n", "! mkdir ~/.kaggle\n", "! cp kaggle.json ~/.kaggle/\n", "! chmod 600 ~/.kaggle/kaggle.json" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 88 }, "id": "vKwe6YuNFV0K", "outputId": "23d34751-9086-4508-bf1b-162d8b770e28" }, "execution_count": 3, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Saving kaggle.json to kaggle (4).json\n", "mkdir: cannot create directory ‘/root/.kaggle’: File exists\n" ] } ] }, { "cell_type": "code", "source": [ "!kaggle datasets download -d muhammadbinimran/housing-price-prediction-data" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "V0tjpXGnHprW", "outputId": "8ab72502-fd6f-4e12-966e-4bd135225b92" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "housing-price-prediction-data.zip: Skipping, found more recently modified local copy (use --force to force download)\n" ] } ] }, { "cell_type": "code", "source": [ "!unzip -o housing-price-prediction-data.zip" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KFdbSDSGH5hK", "outputId": "fe5639b9-9ff8-4c0c-c9f3-d0fd86f09c39" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Archive: housing-price-prediction-data.zip\n", " inflating: housing_price_dataset.csv \n" ] } ] }, { "cell_type": "markdown", "source": [ "# Wczytanie zbioru" ], "metadata": { "id": "tH7ufJQWI2bT" } }, { "cell_type": "code", "source": [ "!pip install --user pandas" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "D_XnqsLfI1ki", "outputId": "c9983630-5453-42cf-e5a4-e32b47c5b8ee" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n", "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" ] } ] }, { "cell_type": "code", "source": [ "housing_price_dataset = pd.read_csv('housing_price_dataset.csv')\n", "housing_price_dataset" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "TKu6XCn2I5KF", "outputId": "006ac90f-d56e-4bc9-8495-af9450376102" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SquareFeet Bedrooms Bathrooms Neighborhood YearBuilt Price\n", "0 2126 4 1 Rural 1969 215355.283618\n", "1 2459 3 2 Rural 1980 195014.221626\n", "2 1860 2 1 Suburb 1970 306891.012076\n", "3 2294 2 1 Urban 1996 206786.787153\n", "4 2130 5 2 Suburb 2001 272436.239065\n", "... ... ... ... ... ... ...\n", "49995 1282 5 3 Rural 1975 100080.865895\n", "49996 2854 2 2 Suburb 1988 374507.656727\n", "49997 2979 5 3 Suburb 1962 384110.555590\n", "49998 2596 5 2 Rural 1984 380512.685957\n", "49999 1572 5 3 Rural 2011 221618.583218\n", "\n", "[50000 rows x 6 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SquareFeetBedroomsBathroomsNeighborhoodYearBuiltPrice
0212641Rural1969215355.283618
1245932Rural1980195014.221626
2186021Suburb1970306891.012076
3229421Urban1996206786.787153
4213052Suburb2001272436.239065
.....................
49995128253Rural1975100080.865895
49996285422Suburb1988374507.656727
49997297953Suburb1962384110.555590
49998259652Rural1984380512.685957
49999157253Rural2011221618.583218
\n", "

50000 rows × 6 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "housing_price_dataset", "summary": "{\n \"name\": \"housing_price_dataset\",\n \"rows\": 50000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 575,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 2000,\n \"samples\": [\n 2578,\n 2250,\n 1585\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 5,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 1,\n 2,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Rural\",\n \"Suburb\",\n \"Urban\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 2001,\n 1967,\n 1962\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76141.84296604691,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 50000,\n \"samples\": [\n 170835.03571295898,\n 126913.4699981214,\n 246611.88309182983\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "markdown", "source": [ "# Podział zbioru" ], "metadata": { "id": "2PIqECUhIvcd" } }, { "cell_type": "code", "source": [ "hp_train_test, hp_dev = sklearn.model_selection.train_test_split(housing_price_dataset, test_size=0.1)\n", "hp_train, hp_test = sklearn.model_selection.train_test_split(hp_train_test, test_size=1000)" ], "metadata": { "id": "Rb5GTCQGIUzE" }, "execution_count": 8, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Normalizacja danych" ], "metadata": { "id": "v9X6AQHYjLA2" } }, { "cell_type": "code", "source": [ "housing_price_dataset[\"Neighborhood\"].unique()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iU0adUbpjdSS", "outputId": "d8bb2852-9017-40b6-bf1b-1619c869c8de" }, "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array(['Rural', 'Suburb', 'Urban'], dtype=object)" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "hp_train = pd.get_dummies(hp_train, columns=['Neighborhood'])\n", "hp_dev = pd.get_dummies(hp_dev, columns=['Neighborhood'])\n", "hp_test = pd.get_dummies(hp_test, columns=['Neighborhood'])" ], "metadata": { "id": "oLibzeZ5kivR" }, "execution_count": 10, "outputs": [] }, { "cell_type": "code", "source": [ "hp_train" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "1Pjm-8iKsMH-", "outputId": "6bdf19b2-ac29-4f7e-a479-5217df193eba" }, "execution_count": 11, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", "7616 2027 3 3 2013 237960.032012 \n", "47787 1292 5 1 2021 86121.435887 \n", "35285 1964 2 3 1970 208054.904277 \n", "8718 2581 4 2 1990 230475.439055 \n", "36680 2020 5 2 2011 278860.337033 \n", "... ... ... ... ... ... \n", "22830 1245 5 1 1975 167679.728402 \n", "43699 2065 4 2 2021 257521.317661 \n", "21160 1967 3 1 1951 262332.423882 \n", "30915 2867 2 3 1990 311233.596471 \n", "19117 1631 3 1 1967 200594.974438 \n", "\n", " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", "7616 0 0 1 \n", "47787 0 1 0 \n", "35285 0 0 1 \n", "8718 1 0 0 \n", "36680 0 0 1 \n", "... ... ... ... \n", "22830 1 0 0 \n", "43699 0 1 0 \n", "21160 0 1 0 \n", "30915 0 0 1 \n", "19117 1 0 0 \n", "\n", "[44000 rows x 8 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
76162027332013237960.032012001
47787129251202186121.435887010
352851964231970208054.904277001
87182581421990230475.439055100
366802020522011278860.337033001
...........................
228301245511975167679.728402100
436992065422021257521.317661010
211601967311951262332.423882010
309152867231990311233.596471001
191171631311967200594.974438100
\n", "

44000 rows × 8 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "hp_train", "summary": "{\n \"name\": \"hp_train\",\n \"rows\": 44000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 575,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 2000,\n \"samples\": [\n 2015,\n 2776,\n 1529\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 5,\n 4,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 2011,\n 1950,\n 1966\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76107.65251634463,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 44000,\n \"samples\": [\n 127869.24389754632,\n 331602.267141956,\n 149546.59653504143\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "code", "source": [ "hp_dev" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "ab4RCTHUt9Vt", "outputId": "6ccc34ad-8a8c-4677-c521-c6d821776e11" }, "execution_count": 12, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", "46301 2845 4 3 1954 354875.353057 \n", "10023 2362 4 3 2010 292371.871755 \n", "37044 1058 3 2 2007 155277.040755 \n", "17462 2891 5 1 2005 239120.147027 \n", "13804 2244 5 2 1966 254005.280471 \n", "... ... ... ... ... ... \n", "35925 1684 4 1 1950 212224.505489 \n", "21799 1021 5 3 1995 139005.940982 \n", "4318 2741 4 2 1962 339074.548520 \n", "31492 2053 3 3 2014 239382.414641 \n", "26727 2963 3 1 2004 321585.613385 \n", "\n", " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", "46301 0 0 1 \n", "10023 1 0 0 \n", "37044 0 1 0 \n", "17462 1 0 0 \n", "13804 1 0 0 \n", "... ... ... ... \n", "35925 1 0 0 \n", "21799 1 0 0 \n", "4318 1 0 0 \n", "31492 0 0 1 \n", "26727 0 1 0 \n", "\n", "[5000 rows x 8 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
463012845431954354875.353057001
100232362432010292371.871755100
370441058322007155277.040755010
174622891512005239120.147027100
138042244521966254005.280471100
...........................
359251684411950212224.505489100
217991021531995139005.940982100
43182741421962339074.548520100
314922053332014239382.414641001
267272963312004321585.613385010
\n", "

5000 rows × 8 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "hp_dev", "summary": "{\n \"name\": \"hp_dev\",\n \"rows\": 5000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 576,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 1829,\n \"samples\": [\n 2667,\n 2963,\n 2213\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 2,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 1966,\n 1986,\n 2021\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76778.00565792067,\n \"min\": -18159.685676249966,\n \"max\": 467492.8278233021,\n \"num_unique_values\": 5000,\n \"samples\": [\n 186133.49424564492,\n 217865.6155495013,\n 194238.86404489263\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "code", "source": [ "hp_test" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "zjOYohYCt-md", "outputId": "723811f9-e6b4-4878-f949-0cfdced5ca3d" }, "execution_count": 13, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", "49356 1174 5 3 1996 143866.306649 \n", "18656 1776 3 1 1964 125553.381347 \n", "27368 2524 2 2 2010 327261.077660 \n", "27243 1633 2 1 1953 241231.423110 \n", "24653 2811 4 2 1982 315724.479288 \n", "... ... ... ... ... ... \n", "20015 2106 2 2 2014 216406.701646 \n", "40921 1704 3 3 1986 153770.810572 \n", "30027 1150 5 3 1973 138938.157678 \n", "16008 2822 2 2 1982 296193.916437 \n", "23919 1348 2 2 1983 133497.577808 \n", "\n", " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", "49356 0 1 0 \n", "18656 0 0 1 \n", "27368 1 0 0 \n", "27243 1 0 0 \n", "24653 1 0 0 \n", "... ... ... ... \n", "20015 0 0 1 \n", "40921 1 0 0 \n", "30027 0 0 1 \n", "16008 0 1 0 \n", "23919 1 0 0 \n", "\n", "[1000 rows x 8 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
493561174531996143866.306649010
186561776311964125553.381347001
273682524222010327261.077660100
272431633211953241231.423110100
246532811421982315724.479288100
...........................
200152106222014216406.701646001
409211704331986153770.810572100
300271150531973138938.157678001
160082822221982296193.916437010
239191348221983133497.577808100
\n", "

1000 rows × 8 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "hp_test", "summary": "{\n \"name\": \"hp_test\",\n \"rows\": 1000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 581,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 792,\n \"samples\": [\n 2084,\n 2990,\n 1245\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 4,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 1982,\n 2016,\n 1960\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 74475.15532686812,\n \"min\": -7550.50457435759,\n \"max\": 437047.71344105,\n \"num_unique_values\": 1000,\n \"samples\": [\n 230653.38480715267,\n 204995.43595068945,\n 231582.08580545988\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 13 } ] }, { "cell_type": "markdown", "source": [ "# Statystyki" ], "metadata": { "id": "NOERGp9pYt2R" } }, { "cell_type": "markdown", "source": [ "### Wielkość podzbiorów" ], "metadata": { "id": "8qLEM0Ahis-X" } }, { "cell_type": "code", "source": [ "housing_price_dataset.describe()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "id": "Cp-IN7cc2Dgr", "outputId": "d75f9cad-e097-4858-cd49-db618dcd42a3" }, "execution_count": 21, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SquareFeet Bedrooms Bathrooms YearBuilt Price\n", "count 50000.000000 50000.000000 50000.000000 50000.000000 50000.000000\n", "mean 2006.374680 3.498700 1.995420 1985.404420 224827.325151\n", "std 575.513241 1.116326 0.815851 20.719377 76141.842966\n", "min 1000.000000 2.000000 1.000000 1950.000000 -36588.165397\n", "25% 1513.000000 3.000000 1.000000 1967.000000 169955.860225\n", "50% 2007.000000 3.000000 2.000000 1985.000000 225052.141166\n", "75% 2506.000000 4.000000 3.000000 2003.000000 279373.630052\n", "max 2999.000000 5.000000 3.000000 2021.000000 492195.259972" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SquareFeetBedroomsBathroomsYearBuiltPrice
count50000.00000050000.00000050000.00000050000.00000050000.000000
mean2006.3746803.4987001.9954201985.404420224827.325151
std575.5132411.1163260.81585120.71937776141.842966
min1000.0000002.0000001.0000001950.000000-36588.165397
25%1513.0000003.0000001.0000001967.000000169955.860225
50%2007.0000003.0000002.0000001985.000000225052.141166
75%2506.0000004.0000003.0000002003.000000279373.630052
max2999.0000005.0000003.0000002021.000000492195.259972
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"housing_price_dataset\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17058.702043862784,\n \"min\": 575.513241276615,\n \"max\": 50000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2006.37468,\n 2007.0,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17676.577845369047,\n \"min\": 1.1163257739856558,\n \"max\": 50000.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 50000.0,\n 3.4987,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17677.02248379316,\n \"min\": 0.8158506823228849,\n \"max\": 50000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 50000.0,\n 1.99542,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17088.851960342447,\n \"min\": 20.71937668741524,\n \"max\": 50000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.40442,\n 1985.0,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 162949.740052687,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 8,\n \"samples\": [\n 224827.32515099045,\n 225052.14116600397,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 21 } ] }, { "cell_type": "code", "source": [ "hp_train.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Icm98vi1X6Pe", "outputId": "207de571-34f3-4044-d970-9680ee895643" }, "execution_count": 14, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(44000, 8)" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "code", "source": [ "hp_dev.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LlqC13x0Ymm6", "outputId": "890f1281-0073-48ea-93d5-f86b03bf4564" }, "execution_count": 15, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(5000, 8)" ] }, "metadata": {}, "execution_count": 15 } ] }, { "cell_type": "code", "source": [ "hp_test.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8iwbOv4AYpk4", "outputId": "54f3ca4c-033d-47e9-f285-2a1d2c07538b" }, "execution_count": 16, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(1000, 8)" ] }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "markdown", "source": [ "### Statystyki kolumn" ], "metadata": { "id": "Y2HnsCXxiypY" } }, { "cell_type": "code", "source": [ "hp_train.describe()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "id": "wAUskqnzi8Cl", "outputId": "9f558980-671c-4916-9877-604fa2537e5c" }, "execution_count": 17, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", "count 44000.000000 44000.000000 44000.000000 44000.000000 44000.000000 \n", "mean 2006.261182 3.499636 1.997864 1985.416750 224928.983383 \n", "std 575.306280 1.117315 0.815760 20.700559 76107.652516 \n", "min 1000.000000 2.000000 1.000000 1950.000000 -36588.165397 \n", "25% 1513.000000 3.000000 1.000000 1967.000000 170088.571867 \n", "50% 2007.000000 3.000000 2.000000 1985.000000 225246.904135 \n", "75% 2505.000000 5.000000 3.000000 2003.000000 279365.119289 \n", "max 2999.000000 5.000000 3.000000 2021.000000 492195.259972 \n", "\n", " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", "count 44000.000000 44000.000000 44000.000000 \n", "mean 0.332841 0.333636 0.333523 \n", "std 0.471235 0.471517 0.471477 \n", "min 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 \n", "50% 0.000000 0.000000 0.000000 \n", "75% 1.000000 1.000000 1.000000 \n", "max 1.000000 1.000000 1.000000 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
count44000.00000044000.00000044000.00000044000.00000044000.00000044000.00000044000.00000044000.000000
mean2006.2611823.4996361.9978641985.416750224928.9833830.3328410.3336360.333523
std575.3062801.1173150.81576020.70055976107.6525160.4712350.4715170.471477
min1000.0000002.0000001.0000001950.000000-36588.1653970.0000000.0000000.000000
25%1513.0000003.0000001.0000001967.000000170088.5718670.0000000.0000000.000000
50%2007.0000003.0000002.0000001985.000000225246.9041350.0000000.0000000.000000
75%2505.0000005.0000003.0000002003.000000279365.1192891.0000001.0000001.000000
max2999.0000005.0000003.0000002021.000000492195.2599721.0000001.0000001.000000
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"hp_train\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14939.968071261836,\n \"min\": 575.3062795316038,\n \"max\": 44000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2006.2611818181817,\n 2007.0,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15555.206914611437,\n \"min\": 1.1173145826824615,\n \"max\": 44000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 44000.0,\n 3.4996363636363634,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15555.70202423901,\n \"min\": 0.8157604462168441,\n \"max\": 44000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 44000.0,\n 1.9978636363636364,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14969.494474985573,\n \"min\": 20.70055860487858,\n \"max\": 44000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.41675,\n 1985.0,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 163684.07731051612,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 8,\n \"samples\": [\n 224928.9833827127,\n 225246.9041353957,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207564412634,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3328409090909091,\n 1.0,\n 0.47123548806324117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207510022008,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3336363636363636,\n 1.0,\n 0.4715169068117858\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207517787427,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.33352272727272725,\n 1.0,\n 0.47147679658615904\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "code", "source": [ "hp_dev.describe()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "id": "BmM4_vWsjBK3", "outputId": "b0a1906f-9eac-46a5-84b6-0cdbea344d69" }, "execution_count": 18, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", "count 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 \n", "mean 2008.190800 3.487200 1.972600 1985.485400 224290.794530 \n", "std 576.206366 1.104753 0.816077 20.960049 76778.005658 \n", "min 1000.000000 2.000000 1.000000 1950.000000 -18159.685676 \n", "25% 1510.750000 3.000000 1.000000 1967.000000 169103.151768 \n", "50% 2007.000000 3.000000 2.000000 1985.000000 223614.924625 \n", "75% 2503.000000 4.000000 3.000000 2004.000000 279651.548644 \n", "max 2999.000000 5.000000 3.000000 2021.000000 467492.827823 \n", "\n", " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", "count 5000.000000 5000.000000 5000.000000 \n", "mean 0.337800 0.341600 0.320600 \n", "std 0.473007 0.474294 0.466754 \n", "min 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 \n", "50% 0.000000 0.000000 0.000000 \n", "75% 1.000000 1.000000 1.000000 \n", "max 1.000000 1.000000 1.000000 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
count5000.0000005000.0000005000.0000005000.0000005000.0000005000.0000005000.0000005000.000000
mean2008.1908003.4872001.9726001985.485400224290.7945300.3378000.3416000.320600
std576.2063661.1047530.81607720.96004976778.0056580.4730070.4742940.466754
min1000.0000002.0000001.0000001950.000000-18159.6856760.0000000.0000000.000000
25%1510.7500003.0000001.0000001967.000000169103.1517680.0000000.0000000.000000
50%2007.0000003.0000002.0000001985.000000223614.9246250.0000000.0000000.000000
75%2503.0000004.0000003.0000002004.000000279651.5486441.0000001.0000001.000000
max2999.0000005.0000003.0000002021.000000467492.8278231.0000001.0000001.000000
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"hp_dev\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1373.0060319958575,\n \"min\": 576.2063661142855,\n \"max\": 5000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2008.1908,\n 2007.0,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1766.6767900146253,\n \"min\": 1.1047534820271943,\n \"max\": 5000.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 5000.0,\n 3.4872,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.1212371098363,\n \"min\": 0.8160774696603855,\n \"max\": 5000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 5000.0,\n 1.9726,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1352.889275943266,\n \"min\": 20.9600489400744,\n \"max\": 5000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.4854,\n 1985.0,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 159056.8559402586,\n \"min\": -18159.685676249966,\n \"max\": 467492.8278233021,\n \"num_unique_values\": 8,\n \"samples\": [\n 224290.7945297919,\n 223614.92462488014,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.6250346212444,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3378,\n 1.0,\n 0.4730073014039385\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.6247777120989,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3416,\n 1.0,\n 0.474293612529388\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.626219259249,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3206,\n 1.0,\n 0.4667539092952179\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 18 } ] }, { "cell_type": "code", "source": [ "hp_test.describe()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "id": "T7edA8gVjBfU", "outputId": "99be05f6-e25f-45ae-9e4f-7f293d1ac14c" }, "execution_count": 19, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", "count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000 \n", "mean 2002.288000 3.515000 2.002000 1984.457000 223037.016061 \n", "std 581.670136 1.130953 0.817719 20.330949 74475.155327 \n", "min 1000.000000 2.000000 1.000000 1950.000000 -7550.504574 \n", "25% 1507.250000 2.000000 1.000000 1967.000000 168905.529102 \n", "50% 2021.500000 4.000000 2.000000 1983.000000 220416.485632 \n", "75% 2524.000000 5.000000 3.000000 2002.000000 279628.697596 \n", "max 2999.000000 5.000000 3.000000 2021.000000 437047.713441 \n", "\n", " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", "count 1000.000000 1000.000000 1000.000000 \n", "mean 0.342000 0.333000 0.325000 \n", "std 0.474617 0.471522 0.468609 \n", "min 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 \n", "50% 0.000000 0.000000 0.000000 \n", "75% 1.000000 1.000000 1.000000 \n", "max 1.000000 1.000000 1.000000 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
count1000.0000001000.0000001000.0000001000.0000001000.0000001000.0000001000.0000001000.000000
mean2002.2880003.5150002.0020001984.457000223037.0160610.3420000.3330000.325000
std581.6701361.1309530.81771920.33094974475.1553270.4746170.4715220.468609
min1000.0000002.0000001.0000001950.000000-7550.5045740.0000000.0000000.000000
25%1507.2500002.0000001.0000001967.000000168905.5291020.0000000.0000000.000000
50%2021.5000004.0000002.0000001983.000000220416.4856320.0000000.0000000.000000
75%2524.0000005.0000003.0000002002.000000279628.6975961.0000001.0000001.000000
max2999.0000005.0000003.0000002021.000000437047.7134411.0000001.0000001.000000
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"hp_test\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 830.5567967260185,\n \"min\": 581.6701360764563,\n \"max\": 2999.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 1000.0,\n 2002.288,\n 2524.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 352.4125101562337,\n \"min\": 1.1309527196368794,\n \"max\": 1000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 1000.0,\n 3.515,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 352.9069492337987,\n \"min\": 0.8177191844787945,\n \"max\": 1000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 1000.0,\n 2.002,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 731.3239730266098,\n \"min\": 20.330949276866008,\n \"max\": 2021.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1984.457,\n 1983.0,\n 1000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150108.67450064773,\n \"min\": -7550.50457435759,\n \"max\": 437047.71344105,\n \"num_unique_values\": 8,\n \"samples\": [\n 223037.01606120248,\n 220416.4856317892,\n 1000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.41137428502344,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.342,\n 1.0,\n 0.4746169626775482\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.4119852977801,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.333,\n 1.0,\n 0.4715223571935199\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.4125366409159,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.325,\n 1.0,\n 0.46860921309188386\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 19 } ] }, { "cell_type": "code", "source": [ "def print_sum(df_name, df):\n", " columns = ['Neighborhood_Rural', 'Neighborhood_Suburb', 'Neighborhood_Urban']\n", " print(df_name)\n", " for col in columns:\n", " print(col, df[col].sum())\n", " print()\n", "\n", "print_sum(\"hp_train\", hp_train)\n", "print_sum(\"hp_dev\", hp_dev)\n", "print_sum(\"hp_test\", hp_test)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7RBghGHvwEUe", "outputId": "e472b811-18fe-4530-b28f-37a9a9f4ed70" }, "execution_count": 20, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "hp_train\n", "Neighborhood_Rural 14645\n", "Neighborhood_Suburb 14680\n", "Neighborhood_Urban 14675\n", "\n", "hp_dev\n", "Neighborhood_Rural 1689\n", "Neighborhood_Suburb 1708\n", "Neighborhood_Urban 1603\n", "\n", "hp_test\n", "Neighborhood_Rural 342\n", "Neighborhood_Suburb 333\n", "Neighborhood_Urban 325\n", "\n" ] } ] } ] }