From 5ebf5d7bba424540247c0a43ed4ffc3ecd160c05 Mon Sep 17 00:00:00 2001 From: s495719 Date: Tue, 19 Mar 2024 23:33:31 +0100 Subject: [PATCH] Upload files to "/" --- IUM_02_Dane.ipynb | 3765 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3765 insertions(+) create mode 100644 IUM_02_Dane.ipynb diff --git a/IUM_02_Dane.ipynb b/IUM_02_Dane.ipynb new file mode 100644 index 0000000..4b78e0a --- /dev/null +++ b/IUM_02_Dane.ipynb @@ -0,0 +1,3765 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Import bibliotek" + ], + "metadata": { + "id": "fbReA72OlQ_Q" + } + }, + { + "cell_type": "code", + "source": [ + "import sklearn\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.model_selection import train_test_split\n", + "from google.colab import files\n", + "import pandas as pd" + ], + "metadata": { + "id": "lIs7iUiKlVvA" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Pobranie danych" + ], + "metadata": { + "id": "PFLEmQ76IauU" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "plw8exY_D-2b", + "outputId": "6cd21e52-fbfc-432e-e7f3-019e2ad2416c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.5.16)\n", + "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle) (2024.2.2)\n", + "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.8.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.31.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.66.2)\n", + "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.4)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.0.7)\n", + "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle) (6.1.0)\n", + "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle) (0.5.1)\n", + "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.6)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" + ] + } + ], + "source": [ + "#Zainstalujmy potrzebne biblioteki\n", + "!pip install --user kaggle #API Kaggle, do pobrania zbioru\n", + "!pip install --user pandas" + ] + }, + { + "cell_type": "code", + "source": [ + "files.upload()\n", + "! mkdir ~/.kaggle\n", + "! cp kaggle.json ~/.kaggle/\n", + "! chmod 600 ~/.kaggle/kaggle.json" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 88 + }, + "id": "vKwe6YuNFV0K", + "outputId": "23d34751-9086-4508-bf1b-162d8b770e28" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving kaggle.json to kaggle (4).json\n", + "mkdir: cannot create directory ‘/root/.kaggle’: File exists\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!kaggle datasets download -d muhammadbinimran/housing-price-prediction-data" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "V0tjpXGnHprW", + "outputId": "8ab72502-fd6f-4e12-966e-4bd135225b92" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "housing-price-prediction-data.zip: Skipping, found more recently modified local copy (use --force to force download)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!unzip -o housing-price-prediction-data.zip" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KFdbSDSGH5hK", + "outputId": "fe5639b9-9ff8-4c0c-c9f3-d0fd86f09c39" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Archive: housing-price-prediction-data.zip\n", + " inflating: housing_price_dataset.csv \n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Wczytanie zbioru" + ], + "metadata": { + "id": "tH7ufJQWI2bT" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install --user pandas" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D_XnqsLfI1ki", + "outputId": "c9983630-5453-42cf-e5a4-e32b47c5b8ee" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "housing_price_dataset = pd.read_csv('housing_price_dataset.csv')\n", + "housing_price_dataset" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "TKu6XCn2I5KF", + "outputId": "006ac90f-d56e-4bc9-8495-af9450376102" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " SquareFeet Bedrooms Bathrooms Neighborhood YearBuilt Price\n", + "0 2126 4 1 Rural 1969 215355.283618\n", + "1 2459 3 2 Rural 1980 195014.221626\n", + "2 1860 2 1 Suburb 1970 306891.012076\n", + "3 2294 2 1 Urban 1996 206786.787153\n", + "4 2130 5 2 Suburb 2001 272436.239065\n", + "... ... ... ... ... ... ...\n", + "49995 1282 5 3 Rural 1975 100080.865895\n", + "49996 2854 2 2 Suburb 1988 374507.656727\n", + "49997 2979 5 3 Suburb 1962 384110.555590\n", + "49998 2596 5 2 Rural 1984 380512.685957\n", + "49999 1572 5 3 Rural 2011 221618.583218\n", + "\n", + "[50000 rows x 6 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SquareFeetBedroomsBathroomsNeighborhoodYearBuiltPrice
0212641Rural1969215355.283618
1245932Rural1980195014.221626
2186021Suburb1970306891.012076
3229421Urban1996206786.787153
4213052Suburb2001272436.239065
.....................
49995128253Rural1975100080.865895
49996285422Suburb1988374507.656727
49997297953Suburb1962384110.555590
49998259652Rural1984380512.685957
49999157253Rural2011221618.583218
\n", + "

50000 rows × 6 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "housing_price_dataset", + "summary": "{\n \"name\": \"housing_price_dataset\",\n \"rows\": 50000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 575,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 2000,\n \"samples\": [\n 2578,\n 2250,\n 1585\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 5,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 1,\n 2,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Rural\",\n \"Suburb\",\n \"Urban\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 2001,\n 1967,\n 1962\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76141.84296604691,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 50000,\n \"samples\": [\n 170835.03571295898,\n 126913.4699981214,\n 246611.88309182983\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Podział zbioru" + ], + "metadata": { + "id": "2PIqECUhIvcd" + } + }, + { + "cell_type": "code", + "source": [ + "hp_train_test, hp_dev = sklearn.model_selection.train_test_split(housing_price_dataset, test_size=0.1)\n", + "hp_train, hp_test = sklearn.model_selection.train_test_split(hp_train_test, test_size=1000)" + ], + "metadata": { + "id": "Rb5GTCQGIUzE" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Normalizacja danych" + ], + "metadata": { + "id": "v9X6AQHYjLA2" + } + }, + { + "cell_type": "code", + "source": [ + "housing_price_dataset[\"Neighborhood\"].unique()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iU0adUbpjdSS", + "outputId": "d8bb2852-9017-40b6-bf1b-1619c869c8de" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['Rural', 'Suburb', 'Urban'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hp_train = pd.get_dummies(hp_train, columns=['Neighborhood'])\n", + "hp_dev = pd.get_dummies(hp_dev, columns=['Neighborhood'])\n", + "hp_test = pd.get_dummies(hp_test, columns=['Neighborhood'])" + ], + "metadata": { + "id": "oLibzeZ5kivR" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "hp_train" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "1Pjm-8iKsMH-", + "outputId": "6bdf19b2-ac29-4f7e-a479-5217df193eba" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", + "7616 2027 3 3 2013 237960.032012 \n", + "47787 1292 5 1 2021 86121.435887 \n", + "35285 1964 2 3 1970 208054.904277 \n", + "8718 2581 4 2 1990 230475.439055 \n", + "36680 2020 5 2 2011 278860.337033 \n", + "... ... ... ... ... ... \n", + "22830 1245 5 1 1975 167679.728402 \n", + "43699 2065 4 2 2021 257521.317661 \n", + "21160 1967 3 1 1951 262332.423882 \n", + "30915 2867 2 3 1990 311233.596471 \n", + "19117 1631 3 1 1967 200594.974438 \n", + "\n", + " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", + "7616 0 0 1 \n", + "47787 0 1 0 \n", + "35285 0 0 1 \n", + "8718 1 0 0 \n", + "36680 0 0 1 \n", + "... ... ... ... \n", + "22830 1 0 0 \n", + "43699 0 1 0 \n", + "21160 0 1 0 \n", + "30915 0 0 1 \n", + "19117 1 0 0 \n", + "\n", + "[44000 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
76162027332013237960.032012001
47787129251202186121.435887010
352851964231970208054.904277001
87182581421990230475.439055100
366802020522011278860.337033001
...........................
228301245511975167679.728402100
436992065422021257521.317661010
211601967311951262332.423882010
309152867231990311233.596471001
191171631311967200594.974438100
\n", + "

44000 rows × 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "hp_train", + "summary": "{\n \"name\": \"hp_train\",\n \"rows\": 44000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 575,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 2000,\n \"samples\": [\n 2015,\n 2776,\n 1529\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 5,\n 4,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 2011,\n 1950,\n 1966\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76107.65251634463,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 44000,\n \"samples\": [\n 127869.24389754632,\n 331602.267141956,\n 149546.59653504143\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hp_dev" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "ab4RCTHUt9Vt", + "outputId": "6ccc34ad-8a8c-4677-c521-c6d821776e11" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", + "46301 2845 4 3 1954 354875.353057 \n", + "10023 2362 4 3 2010 292371.871755 \n", + "37044 1058 3 2 2007 155277.040755 \n", + "17462 2891 5 1 2005 239120.147027 \n", + "13804 2244 5 2 1966 254005.280471 \n", + "... ... ... ... ... ... \n", + "35925 1684 4 1 1950 212224.505489 \n", + "21799 1021 5 3 1995 139005.940982 \n", + "4318 2741 4 2 1962 339074.548520 \n", + "31492 2053 3 3 2014 239382.414641 \n", + "26727 2963 3 1 2004 321585.613385 \n", + "\n", + " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", + "46301 0 0 1 \n", + "10023 1 0 0 \n", + "37044 0 1 0 \n", + "17462 1 0 0 \n", + "13804 1 0 0 \n", + "... ... ... ... \n", + "35925 1 0 0 \n", + "21799 1 0 0 \n", + "4318 1 0 0 \n", + "31492 0 0 1 \n", + "26727 0 1 0 \n", + "\n", + "[5000 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
463012845431954354875.353057001
100232362432010292371.871755100
370441058322007155277.040755010
174622891512005239120.147027100
138042244521966254005.280471100
...........................
359251684411950212224.505489100
217991021531995139005.940982100
43182741421962339074.548520100
314922053332014239382.414641001
267272963312004321585.613385010
\n", + "

5000 rows × 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "hp_dev", + "summary": "{\n \"name\": \"hp_dev\",\n \"rows\": 5000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 576,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 1829,\n \"samples\": [\n 2667,\n 2963,\n 2213\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 2,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 1966,\n 1986,\n 2021\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76778.00565792067,\n \"min\": -18159.685676249966,\n \"max\": 467492.8278233021,\n \"num_unique_values\": 5000,\n \"samples\": [\n 186133.49424564492,\n 217865.6155495013,\n 194238.86404489263\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hp_test" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "zjOYohYCt-md", + "outputId": "723811f9-e6b4-4878-f949-0cfdced5ca3d" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", + "49356 1174 5 3 1996 143866.306649 \n", + "18656 1776 3 1 1964 125553.381347 \n", + "27368 2524 2 2 2010 327261.077660 \n", + "27243 1633 2 1 1953 241231.423110 \n", + "24653 2811 4 2 1982 315724.479288 \n", + "... ... ... ... ... ... \n", + "20015 2106 2 2 2014 216406.701646 \n", + "40921 1704 3 3 1986 153770.810572 \n", + "30027 1150 5 3 1973 138938.157678 \n", + "16008 2822 2 2 1982 296193.916437 \n", + "23919 1348 2 2 1983 133497.577808 \n", + "\n", + " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", + "49356 0 1 0 \n", + "18656 0 0 1 \n", + "27368 1 0 0 \n", + "27243 1 0 0 \n", + "24653 1 0 0 \n", + "... ... ... ... \n", + "20015 0 0 1 \n", + "40921 1 0 0 \n", + "30027 0 0 1 \n", + "16008 0 1 0 \n", + "23919 1 0 0 \n", + "\n", + "[1000 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
493561174531996143866.306649010
186561776311964125553.381347001
273682524222010327261.077660100
272431633211953241231.423110100
246532811421982315724.479288100
...........................
200152106222014216406.701646001
409211704331986153770.810572100
300271150531973138938.157678001
160082822221982296193.916437010
239191348221983133497.577808100
\n", + "

1000 rows × 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "hp_test", + "summary": "{\n \"name\": \"hp_test\",\n \"rows\": 1000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 581,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 792,\n \"samples\": [\n 2084,\n 2990,\n 1245\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 4,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 1982,\n 2016,\n 1960\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 74475.15532686812,\n \"min\": -7550.50457435759,\n \"max\": 437047.71344105,\n \"num_unique_values\": 1000,\n \"samples\": [\n 230653.38480715267,\n 204995.43595068945,\n 231582.08580545988\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Statystyki" + ], + "metadata": { + "id": "NOERGp9pYt2R" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Wielkość podzbiorów" + ], + "metadata": { + "id": "8qLEM0Ahis-X" + } + }, + { + "cell_type": "code", + "source": [ + "housing_price_dataset.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "Cp-IN7cc2Dgr", + "outputId": "d75f9cad-e097-4858-cd49-db618dcd42a3" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " SquareFeet Bedrooms Bathrooms YearBuilt Price\n", + "count 50000.000000 50000.000000 50000.000000 50000.000000 50000.000000\n", + "mean 2006.374680 3.498700 1.995420 1985.404420 224827.325151\n", + "std 575.513241 1.116326 0.815851 20.719377 76141.842966\n", + "min 1000.000000 2.000000 1.000000 1950.000000 -36588.165397\n", + "25% 1513.000000 3.000000 1.000000 1967.000000 169955.860225\n", + "50% 2007.000000 3.000000 2.000000 1985.000000 225052.141166\n", + "75% 2506.000000 4.000000 3.000000 2003.000000 279373.630052\n", + "max 2999.000000 5.000000 3.000000 2021.000000 492195.259972" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SquareFeetBedroomsBathroomsYearBuiltPrice
count50000.00000050000.00000050000.00000050000.00000050000.000000
mean2006.3746803.4987001.9954201985.404420224827.325151
std575.5132411.1163260.81585120.71937776141.842966
min1000.0000002.0000001.0000001950.000000-36588.165397
25%1513.0000003.0000001.0000001967.000000169955.860225
50%2007.0000003.0000002.0000001985.000000225052.141166
75%2506.0000004.0000003.0000002003.000000279373.630052
max2999.0000005.0000003.0000002021.000000492195.259972
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"housing_price_dataset\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17058.702043862784,\n \"min\": 575.513241276615,\n \"max\": 50000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2006.37468,\n 2007.0,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17676.577845369047,\n \"min\": 1.1163257739856558,\n \"max\": 50000.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 50000.0,\n 3.4987,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17677.02248379316,\n \"min\": 0.8158506823228849,\n \"max\": 50000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 50000.0,\n 1.99542,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17088.851960342447,\n \"min\": 20.71937668741524,\n \"max\": 50000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.40442,\n 1985.0,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 162949.740052687,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 8,\n \"samples\": [\n 224827.32515099045,\n 225052.14116600397,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hp_train.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Icm98vi1X6Pe", + "outputId": "207de571-34f3-4044-d970-9680ee895643" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(44000, 8)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hp_dev.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LlqC13x0Ymm6", + "outputId": "890f1281-0073-48ea-93d5-f86b03bf4564" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(5000, 8)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hp_test.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8iwbOv4AYpk4", + "outputId": "54f3ca4c-033d-47e9-f285-2a1d2c07538b" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(1000, 8)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Statystyki kolumn" + ], + "metadata": { + "id": "Y2HnsCXxiypY" + } + }, + { + "cell_type": "code", + "source": [ + "hp_train.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "wAUskqnzi8Cl", + "outputId": "9f558980-671c-4916-9877-604fa2537e5c" + }, + "execution_count": 17, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", + "count 44000.000000 44000.000000 44000.000000 44000.000000 44000.000000 \n", + "mean 2006.261182 3.499636 1.997864 1985.416750 224928.983383 \n", + "std 575.306280 1.117315 0.815760 20.700559 76107.652516 \n", + "min 1000.000000 2.000000 1.000000 1950.000000 -36588.165397 \n", + "25% 1513.000000 3.000000 1.000000 1967.000000 170088.571867 \n", + "50% 2007.000000 3.000000 2.000000 1985.000000 225246.904135 \n", + "75% 2505.000000 5.000000 3.000000 2003.000000 279365.119289 \n", + "max 2999.000000 5.000000 3.000000 2021.000000 492195.259972 \n", + "\n", + " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", + "count 44000.000000 44000.000000 44000.000000 \n", + "mean 0.332841 0.333636 0.333523 \n", + "std 0.471235 0.471517 0.471477 \n", + "min 0.000000 0.000000 0.000000 \n", + "25% 0.000000 0.000000 0.000000 \n", + "50% 0.000000 0.000000 0.000000 \n", + "75% 1.000000 1.000000 1.000000 \n", + "max 1.000000 1.000000 1.000000 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
count44000.00000044000.00000044000.00000044000.00000044000.00000044000.00000044000.00000044000.000000
mean2006.2611823.4996361.9978641985.416750224928.9833830.3328410.3336360.333523
std575.3062801.1173150.81576020.70055976107.6525160.4712350.4715170.471477
min1000.0000002.0000001.0000001950.000000-36588.1653970.0000000.0000000.000000
25%1513.0000003.0000001.0000001967.000000170088.5718670.0000000.0000000.000000
50%2007.0000003.0000002.0000001985.000000225246.9041350.0000000.0000000.000000
75%2505.0000005.0000003.0000002003.000000279365.1192891.0000001.0000001.000000
max2999.0000005.0000003.0000002021.000000492195.2599721.0000001.0000001.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"hp_train\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14939.968071261836,\n \"min\": 575.3062795316038,\n \"max\": 44000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2006.2611818181817,\n 2007.0,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15555.206914611437,\n \"min\": 1.1173145826824615,\n \"max\": 44000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 44000.0,\n 3.4996363636363634,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15555.70202423901,\n \"min\": 0.8157604462168441,\n \"max\": 44000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 44000.0,\n 1.9978636363636364,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14969.494474985573,\n \"min\": 20.70055860487858,\n \"max\": 44000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.41675,\n 1985.0,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 163684.07731051612,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 8,\n \"samples\": [\n 224928.9833827127,\n 225246.9041353957,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207564412634,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3328409090909091,\n 1.0,\n 0.47123548806324117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207510022008,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3336363636363636,\n 1.0,\n 0.4715169068117858\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207517787427,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.33352272727272725,\n 1.0,\n 0.47147679658615904\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hp_dev.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "BmM4_vWsjBK3", + "outputId": "b0a1906f-9eac-46a5-84b6-0cdbea344d69" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", + "count 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 \n", + "mean 2008.190800 3.487200 1.972600 1985.485400 224290.794530 \n", + "std 576.206366 1.104753 0.816077 20.960049 76778.005658 \n", + "min 1000.000000 2.000000 1.000000 1950.000000 -18159.685676 \n", + "25% 1510.750000 3.000000 1.000000 1967.000000 169103.151768 \n", + "50% 2007.000000 3.000000 2.000000 1985.000000 223614.924625 \n", + "75% 2503.000000 4.000000 3.000000 2004.000000 279651.548644 \n", + "max 2999.000000 5.000000 3.000000 2021.000000 467492.827823 \n", + "\n", + " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", + "count 5000.000000 5000.000000 5000.000000 \n", + "mean 0.337800 0.341600 0.320600 \n", + "std 0.473007 0.474294 0.466754 \n", + "min 0.000000 0.000000 0.000000 \n", + "25% 0.000000 0.000000 0.000000 \n", + "50% 0.000000 0.000000 0.000000 \n", + "75% 1.000000 1.000000 1.000000 \n", + "max 1.000000 1.000000 1.000000 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
count5000.0000005000.0000005000.0000005000.0000005000.0000005000.0000005000.0000005000.000000
mean2008.1908003.4872001.9726001985.485400224290.7945300.3378000.3416000.320600
std576.2063661.1047530.81607720.96004976778.0056580.4730070.4742940.466754
min1000.0000002.0000001.0000001950.000000-18159.6856760.0000000.0000000.000000
25%1510.7500003.0000001.0000001967.000000169103.1517680.0000000.0000000.000000
50%2007.0000003.0000002.0000001985.000000223614.9246250.0000000.0000000.000000
75%2503.0000004.0000003.0000002004.000000279651.5486441.0000001.0000001.000000
max2999.0000005.0000003.0000002021.000000467492.8278231.0000001.0000001.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"hp_dev\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1373.0060319958575,\n \"min\": 576.2063661142855,\n \"max\": 5000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2008.1908,\n 2007.0,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1766.6767900146253,\n \"min\": 1.1047534820271943,\n \"max\": 5000.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 5000.0,\n 3.4872,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.1212371098363,\n \"min\": 0.8160774696603855,\n \"max\": 5000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 5000.0,\n 1.9726,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1352.889275943266,\n \"min\": 20.9600489400744,\n \"max\": 5000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.4854,\n 1985.0,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 159056.8559402586,\n \"min\": -18159.685676249966,\n \"max\": 467492.8278233021,\n \"num_unique_values\": 8,\n \"samples\": [\n 224290.7945297919,\n 223614.92462488014,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.6250346212444,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3378,\n 1.0,\n 0.4730073014039385\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.6247777120989,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3416,\n 1.0,\n 0.474293612529388\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.626219259249,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3206,\n 1.0,\n 0.4667539092952179\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hp_test.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "T7edA8gVjBfU", + "outputId": "99be05f6-e25f-45ae-9e4f-7f293d1ac14c" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n", + "count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000 \n", + "mean 2002.288000 3.515000 2.002000 1984.457000 223037.016061 \n", + "std 581.670136 1.130953 0.817719 20.330949 74475.155327 \n", + "min 1000.000000 2.000000 1.000000 1950.000000 -7550.504574 \n", + "25% 1507.250000 2.000000 1.000000 1967.000000 168905.529102 \n", + "50% 2021.500000 4.000000 2.000000 1983.000000 220416.485632 \n", + "75% 2524.000000 5.000000 3.000000 2002.000000 279628.697596 \n", + "max 2999.000000 5.000000 3.000000 2021.000000 437047.713441 \n", + "\n", + " Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n", + "count 1000.000000 1000.000000 1000.000000 \n", + "mean 0.342000 0.333000 0.325000 \n", + "std 0.474617 0.471522 0.468609 \n", + "min 0.000000 0.000000 0.000000 \n", + "25% 0.000000 0.000000 0.000000 \n", + "50% 0.000000 0.000000 0.000000 \n", + "75% 1.000000 1.000000 1.000000 \n", + "max 1.000000 1.000000 1.000000 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SquareFeetBedroomsBathroomsYearBuiltPriceNeighborhood_RuralNeighborhood_SuburbNeighborhood_Urban
count1000.0000001000.0000001000.0000001000.0000001000.0000001000.0000001000.0000001000.000000
mean2002.2880003.5150002.0020001984.457000223037.0160610.3420000.3330000.325000
std581.6701361.1309530.81771920.33094974475.1553270.4746170.4715220.468609
min1000.0000002.0000001.0000001950.000000-7550.5045740.0000000.0000000.000000
25%1507.2500002.0000001.0000001967.000000168905.5291020.0000000.0000000.000000
50%2021.5000004.0000002.0000001983.000000220416.4856320.0000000.0000000.000000
75%2524.0000005.0000003.0000002002.000000279628.6975961.0000001.0000001.000000
max2999.0000005.0000003.0000002021.000000437047.7134411.0000001.0000001.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"hp_test\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 830.5567967260185,\n \"min\": 581.6701360764563,\n \"max\": 2999.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 1000.0,\n 2002.288,\n 2524.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 352.4125101562337,\n \"min\": 1.1309527196368794,\n \"max\": 1000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 1000.0,\n 3.515,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 352.9069492337987,\n \"min\": 0.8177191844787945,\n \"max\": 1000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 1000.0,\n 2.002,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 731.3239730266098,\n \"min\": 20.330949276866008,\n \"max\": 2021.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1984.457,\n 1983.0,\n 1000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150108.67450064773,\n \"min\": -7550.50457435759,\n \"max\": 437047.71344105,\n \"num_unique_values\": 8,\n \"samples\": [\n 223037.01606120248,\n 220416.4856317892,\n 1000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.41137428502344,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.342,\n 1.0,\n 0.4746169626775482\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.4119852977801,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.333,\n 1.0,\n 0.4715223571935199\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.4125366409159,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.325,\n 1.0,\n 0.46860921309188386\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def print_sum(df_name, df):\n", + " columns = ['Neighborhood_Rural', 'Neighborhood_Suburb', 'Neighborhood_Urban']\n", + " print(df_name)\n", + " for col in columns:\n", + " print(col, df[col].sum())\n", + " print()\n", + "\n", + "print_sum(\"hp_train\", hp_train)\n", + "print_sum(\"hp_dev\", hp_dev)\n", + "print_sum(\"hp_test\", hp_test)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7RBghGHvwEUe", + "outputId": "e472b811-18fe-4530-b28f-37a9a9f4ed70" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "hp_train\n", + "Neighborhood_Rural 14645\n", + "Neighborhood_Suburb 14680\n", + "Neighborhood_Urban 14675\n", + "\n", + "hp_dev\n", + "Neighborhood_Rural 1689\n", + "Neighborhood_Suburb 1708\n", + "Neighborhood_Urban 1603\n", + "\n", + "hp_test\n", + "Neighborhood_Rural 342\n", + "Neighborhood_Suburb 333\n", + "Neighborhood_Urban 325\n", + "\n" + ] + } + ] + } + ] +} \ No newline at end of file