ium_495719/IUM_02_Dane.ipynb

3765 lines
184 KiB
Plaintext
Raw Normal View History

2024-03-19 23:33:31 +01:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Import bibliotek"
],
"metadata": {
"id": "fbReA72OlQ_Q"
}
},
{
"cell_type": "code",
"source": [
"import sklearn\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.model_selection import train_test_split\n",
"from google.colab import files\n",
"import pandas as pd"
],
"metadata": {
"id": "lIs7iUiKlVvA"
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Pobranie danych"
],
"metadata": {
"id": "PFLEmQ76IauU"
}
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "plw8exY_D-2b",
"outputId": "6cd21e52-fbfc-432e-e7f3-019e2ad2416c"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.5.16)\n",
"Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle) (2024.2.2)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.31.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.66.2)\n",
"Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.4)\n",
"Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.0.7)\n",
"Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle) (6.1.0)\n",
"Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle) (0.5.1)\n",
"Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.6)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
"Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
]
}
],
"source": [
"#Zainstalujmy potrzebne biblioteki\n",
"!pip install --user kaggle #API Kaggle, do pobrania zbioru\n",
"!pip install --user pandas"
]
},
{
"cell_type": "code",
"source": [
"files.upload()\n",
"! mkdir ~/.kaggle\n",
"! cp kaggle.json ~/.kaggle/\n",
"! chmod 600 ~/.kaggle/kaggle.json"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
},
"id": "vKwe6YuNFV0K",
"outputId": "23d34751-9086-4508-bf1b-162d8b770e28"
},
"execution_count": 3,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <input type=\"file\" id=\"files-fe6c8433-f772-471c-b5fe-47a4b4599e3c\" name=\"files[]\" multiple disabled\n",
" style=\"border:none\" />\n",
" <output id=\"result-fe6c8433-f772-471c-b5fe-47a4b4599e3c\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script>// Copyright 2017 Google LLC\n",
"//\n",
"// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"// you may not use this file except in compliance with the License.\n",
"// You may obtain a copy of the License at\n",
"//\n",
"// http://www.apache.org/licenses/LICENSE-2.0\n",
"//\n",
"// Unless required by applicable law or agreed to in writing, software\n",
"// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"// See the License for the specific language governing permissions and\n",
"// limitations under the License.\n",
"\n",
"/**\n",
" * @fileoverview Helpers for google.colab Python module.\n",
" */\n",
"(function(scope) {\n",
"function span(text, styleAttributes = {}) {\n",
" const element = document.createElement('span');\n",
" element.textContent = text;\n",
" for (const key of Object.keys(styleAttributes)) {\n",
" element.style[key] = styleAttributes[key];\n",
" }\n",
" return element;\n",
"}\n",
"\n",
"// Max number of bytes which will be uploaded at a time.\n",
"const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
"\n",
"function _uploadFiles(inputId, outputId) {\n",
" const steps = uploadFilesStep(inputId, outputId);\n",
" const outputElement = document.getElementById(outputId);\n",
" // Cache steps on the outputElement to make it available for the next call\n",
" // to uploadFilesContinue from Python.\n",
" outputElement.steps = steps;\n",
"\n",
" return _uploadFilesContinue(outputId);\n",
"}\n",
"\n",
"// This is roughly an async generator (not supported in the browser yet),\n",
"// where there are multiple asynchronous steps and the Python side is going\n",
"// to poll for completion of each step.\n",
"// This uses a Promise to block the python side on completion of each step,\n",
"// then passes the result of the previous step as the input to the next step.\n",
"function _uploadFilesContinue(outputId) {\n",
" const outputElement = document.getElementById(outputId);\n",
" const steps = outputElement.steps;\n",
"\n",
" const next = steps.next(outputElement.lastPromiseValue);\n",
" return Promise.resolve(next.value.promise).then((value) => {\n",
" // Cache the last promise value to make it available to the next\n",
" // step of the generator.\n",
" outputElement.lastPromiseValue = value;\n",
" return next.value.response;\n",
" });\n",
"}\n",
"\n",
"/**\n",
" * Generator function which is called between each async step of the upload\n",
" * process.\n",
" * @param {string} inputId Element ID of the input file picker element.\n",
" * @param {string} outputId Element ID of the output display.\n",
" * @return {!Iterable<!Object>} Iterable of next steps.\n",
" */\n",
"function* uploadFilesStep(inputId, outputId) {\n",
" const inputElement = document.getElementById(inputId);\n",
" inputElement.disabled = false;\n",
"\n",
" const outputElement = document.getElementById(outputId);\n",
" outputElement.innerHTML = '';\n",
"\n",
" const pickedPromise = new Promise((resolve) => {\n",
" inputElement.addEventListener('change', (e) => {\n",
" resolve(e.target.files);\n",
" });\n",
" });\n",
"\n",
" const cancel = document.createElement('button');\n",
" inputElement.parentElement.appendChild(cancel);\n",
" cancel.textContent = 'Cancel upload';\n",
" const cancelPromise = new Promise((resolve) => {\n",
" cancel.onclick = () => {\n",
" resolve(null);\n",
" };\n",
" });\n",
"\n",
" // Wait for the user to pick the files.\n",
" const files = yield {\n",
" promise: Promise.race([pickedPromise, cancelPromise]),\n",
" response: {\n",
" action: 'starting',\n",
" }\n",
" };\n",
"\n",
" cancel.remove();\n",
"\n",
" // Disable the input element since further picks are not allowed.\n",
" inputElement.disabled = true;\n",
"\n",
" if (!files) {\n",
" return {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
" }\n",
"\n",
" for (const file of files) {\n",
" const li = document.createElement('li');\n",
" li.append(span(file.name, {fontWeight: 'bold'}));\n",
" li.append(span(\n",
" `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
" `last modified: ${\n",
" file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
" 'n/a'} - `));\n",
" const percent = span('0% done');\n",
" li.appendChild(percent);\n",
"\n",
" outputElement.appendChild(li);\n",
"\n",
" const fileDataPromise = new Promise((resolve) => {\n",
" const reader = new FileReader();\n",
" reader.onload = (e) => {\n",
" resolve(e.target.result);\n",
" };\n",
" reader.readAsArrayBuffer(file);\n",
" });\n",
" // Wait for the data to be ready.\n",
" let fileData = yield {\n",
" promise: fileDataPromise,\n",
" response: {\n",
" action: 'continue',\n",
" }\n",
" };\n",
"\n",
" // Use a chunked sending to avoid message size limits. See b/62115660.\n",
" let position = 0;\n",
" do {\n",
" const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
" const chunk = new Uint8Array(fileData, position, length);\n",
" position += length;\n",
"\n",
" const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
" yield {\n",
" response: {\n",
" action: 'append',\n",
" file: file.name,\n",
" data: base64,\n",
" },\n",
" };\n",
"\n",
" let percentDone = fileData.byteLength === 0 ?\n",
" 100 :\n",
" Math.round((position / fileData.byteLength) * 100);\n",
" percent.textContent = `${percentDone}% done`;\n",
"\n",
" } while (position < fileData.byteLength);\n",
" }\n",
"\n",
" // All done.\n",
" yield {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
"}\n",
"\n",
"scope.google = scope.google || {};\n",
"scope.google.colab = scope.google.colab || {};\n",
"scope.google.colab._files = {\n",
" _uploadFiles,\n",
" _uploadFilesContinue,\n",
"};\n",
"})(self);\n",
"</script> "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Saving kaggle.json to kaggle (4).json\n",
"mkdir: cannot create directory /root/.kaggle: File exists\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!kaggle datasets download -d muhammadbinimran/housing-price-prediction-data"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "V0tjpXGnHprW",
"outputId": "8ab72502-fd6f-4e12-966e-4bd135225b92"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"housing-price-prediction-data.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!unzip -o housing-price-prediction-data.zip"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "KFdbSDSGH5hK",
"outputId": "fe5639b9-9ff8-4c0c-c9f3-d0fd86f09c39"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Archive: housing-price-prediction-data.zip\n",
" inflating: housing_price_dataset.csv \n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# Wczytanie zbioru"
],
"metadata": {
"id": "tH7ufJQWI2bT"
}
},
{
"cell_type": "code",
"source": [
"!pip install --user pandas"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "D_XnqsLfI1ki",
"outputId": "c9983630-5453-42cf-e5a4-e32b47c5b8ee"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
"Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"housing_price_dataset = pd.read_csv('housing_price_dataset.csv')\n",
"housing_price_dataset"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "TKu6XCn2I5KF",
"outputId": "006ac90f-d56e-4bc9-8495-af9450376102"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" SquareFeet Bedrooms Bathrooms Neighborhood YearBuilt Price\n",
"0 2126 4 1 Rural 1969 215355.283618\n",
"1 2459 3 2 Rural 1980 195014.221626\n",
"2 1860 2 1 Suburb 1970 306891.012076\n",
"3 2294 2 1 Urban 1996 206786.787153\n",
"4 2130 5 2 Suburb 2001 272436.239065\n",
"... ... ... ... ... ... ...\n",
"49995 1282 5 3 Rural 1975 100080.865895\n",
"49996 2854 2 2 Suburb 1988 374507.656727\n",
"49997 2979 5 3 Suburb 1962 384110.555590\n",
"49998 2596 5 2 Rural 1984 380512.685957\n",
"49999 1572 5 3 Rural 2011 221618.583218\n",
"\n",
"[50000 rows x 6 columns]"
],
"text/html": [
"\n",
" <div id=\"df-edad94fe-b9cc-4eb7-8eb9-112894937818\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SquareFeet</th>\n",
" <th>Bedrooms</th>\n",
" <th>Bathrooms</th>\n",
" <th>Neighborhood</th>\n",
" <th>YearBuilt</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2126</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>Rural</td>\n",
" <td>1969</td>\n",
" <td>215355.283618</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2459</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>Rural</td>\n",
" <td>1980</td>\n",
" <td>195014.221626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1860</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Suburb</td>\n",
" <td>1970</td>\n",
" <td>306891.012076</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2294</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Urban</td>\n",
" <td>1996</td>\n",
" <td>206786.787153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2130</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>Suburb</td>\n",
" <td>2001</td>\n",
" <td>272436.239065</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49995</th>\n",
" <td>1282</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>Rural</td>\n",
" <td>1975</td>\n",
" <td>100080.865895</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49996</th>\n",
" <td>2854</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>Suburb</td>\n",
" <td>1988</td>\n",
" <td>374507.656727</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49997</th>\n",
" <td>2979</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>Suburb</td>\n",
" <td>1962</td>\n",
" <td>384110.555590</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49998</th>\n",
" <td>2596</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>Rural</td>\n",
" <td>1984</td>\n",
" <td>380512.685957</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49999</th>\n",
" <td>1572</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>Rural</td>\n",
" <td>2011</td>\n",
" <td>221618.583218</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>50000 rows × 6 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-edad94fe-b9cc-4eb7-8eb9-112894937818')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-edad94fe-b9cc-4eb7-8eb9-112894937818 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-edad94fe-b9cc-4eb7-8eb9-112894937818');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-e281a0ab-be36-4cd9-99c8-0c33c244ff16\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e281a0ab-be36-4cd9-99c8-0c33c244ff16')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-e281a0ab-be36-4cd9-99c8-0c33c244ff16 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "housing_price_dataset",
"summary": "{\n \"name\": \"housing_price_dataset\",\n \"rows\": 50000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 575,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 2000,\n \"samples\": [\n 2578,\n 2250,\n 1585\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 5,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 1,\n 2,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Rural\",\n \"Suburb\",\n \"Urban\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 2001,\n 1967,\n 1962\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76141.84296604691,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 50000,\n \"samples\": [\n 170835.03571295898,\n 126913.4699981214,\n 246611.88309182983\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "markdown",
"source": [
"# Podział zbioru"
],
"metadata": {
"id": "2PIqECUhIvcd"
}
},
{
"cell_type": "code",
"source": [
"hp_train_test, hp_dev = sklearn.model_selection.train_test_split(housing_price_dataset, test_size=0.1)\n",
"hp_train, hp_test = sklearn.model_selection.train_test_split(hp_train_test, test_size=1000)"
],
"metadata": {
"id": "Rb5GTCQGIUzE"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Normalizacja danych"
],
"metadata": {
"id": "v9X6AQHYjLA2"
}
},
{
"cell_type": "code",
"source": [
"housing_price_dataset[\"Neighborhood\"].unique()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iU0adUbpjdSS",
"outputId": "d8bb2852-9017-40b6-bf1b-1619c869c8de"
},
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['Rural', 'Suburb', 'Urban'], dtype=object)"
]
},
"metadata": {},
"execution_count": 9
}
]
},
{
"cell_type": "code",
"source": [
"hp_train = pd.get_dummies(hp_train, columns=['Neighborhood'])\n",
"hp_dev = pd.get_dummies(hp_dev, columns=['Neighborhood'])\n",
"hp_test = pd.get_dummies(hp_test, columns=['Neighborhood'])"
],
"metadata": {
"id": "oLibzeZ5kivR"
},
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"source": [
"hp_train"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "1Pjm-8iKsMH-",
"outputId": "6bdf19b2-ac29-4f7e-a479-5217df193eba"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n",
"7616 2027 3 3 2013 237960.032012 \n",
"47787 1292 5 1 2021 86121.435887 \n",
"35285 1964 2 3 1970 208054.904277 \n",
"8718 2581 4 2 1990 230475.439055 \n",
"36680 2020 5 2 2011 278860.337033 \n",
"... ... ... ... ... ... \n",
"22830 1245 5 1 1975 167679.728402 \n",
"43699 2065 4 2 2021 257521.317661 \n",
"21160 1967 3 1 1951 262332.423882 \n",
"30915 2867 2 3 1990 311233.596471 \n",
"19117 1631 3 1 1967 200594.974438 \n",
"\n",
" Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n",
"7616 0 0 1 \n",
"47787 0 1 0 \n",
"35285 0 0 1 \n",
"8718 1 0 0 \n",
"36680 0 0 1 \n",
"... ... ... ... \n",
"22830 1 0 0 \n",
"43699 0 1 0 \n",
"21160 0 1 0 \n",
"30915 0 0 1 \n",
"19117 1 0 0 \n",
"\n",
"[44000 rows x 8 columns]"
],
"text/html": [
"\n",
" <div id=\"df-601e59b6-c587-4e32-955e-084019ad4fa2\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SquareFeet</th>\n",
" <th>Bedrooms</th>\n",
" <th>Bathrooms</th>\n",
" <th>YearBuilt</th>\n",
" <th>Price</th>\n",
" <th>Neighborhood_Rural</th>\n",
" <th>Neighborhood_Suburb</th>\n",
" <th>Neighborhood_Urban</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7616</th>\n",
" <td>2027</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2013</td>\n",
" <td>237960.032012</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47787</th>\n",
" <td>1292</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2021</td>\n",
" <td>86121.435887</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35285</th>\n",
" <td>1964</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1970</td>\n",
" <td>208054.904277</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8718</th>\n",
" <td>2581</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>1990</td>\n",
" <td>230475.439055</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36680</th>\n",
" <td>2020</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>2011</td>\n",
" <td>278860.337033</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22830</th>\n",
" <td>1245</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1975</td>\n",
" <td>167679.728402</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43699</th>\n",
" <td>2065</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2021</td>\n",
" <td>257521.317661</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21160</th>\n",
" <td>1967</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1951</td>\n",
" <td>262332.423882</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30915</th>\n",
" <td>2867</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1990</td>\n",
" <td>311233.596471</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19117</th>\n",
" <td>1631</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1967</td>\n",
" <td>200594.974438</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>44000 rows × 8 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-601e59b6-c587-4e32-955e-084019ad4fa2')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-601e59b6-c587-4e32-955e-084019ad4fa2 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-601e59b6-c587-4e32-955e-084019ad4fa2');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-e5dfecf4-d4d2-4c5d-8938-cf2f49863177\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e5dfecf4-d4d2-4c5d-8938-cf2f49863177')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-e5dfecf4-d4d2-4c5d-8938-cf2f49863177 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "hp_train",
"summary": "{\n \"name\": \"hp_train\",\n \"rows\": 44000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 575,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 2000,\n \"samples\": [\n 2015,\n 2776,\n 1529\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 5,\n 4,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 2011,\n 1950,\n 1966\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76107.65251634463,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 44000,\n \"samples\": [\n 127869.24389754632,\n 331602.267141956,\n 149546.59653504143\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"hp_dev"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "ab4RCTHUt9Vt",
"outputId": "6ccc34ad-8a8c-4677-c521-c6d821776e11"
},
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n",
"46301 2845 4 3 1954 354875.353057 \n",
"10023 2362 4 3 2010 292371.871755 \n",
"37044 1058 3 2 2007 155277.040755 \n",
"17462 2891 5 1 2005 239120.147027 \n",
"13804 2244 5 2 1966 254005.280471 \n",
"... ... ... ... ... ... \n",
"35925 1684 4 1 1950 212224.505489 \n",
"21799 1021 5 3 1995 139005.940982 \n",
"4318 2741 4 2 1962 339074.548520 \n",
"31492 2053 3 3 2014 239382.414641 \n",
"26727 2963 3 1 2004 321585.613385 \n",
"\n",
" Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n",
"46301 0 0 1 \n",
"10023 1 0 0 \n",
"37044 0 1 0 \n",
"17462 1 0 0 \n",
"13804 1 0 0 \n",
"... ... ... ... \n",
"35925 1 0 0 \n",
"21799 1 0 0 \n",
"4318 1 0 0 \n",
"31492 0 0 1 \n",
"26727 0 1 0 \n",
"\n",
"[5000 rows x 8 columns]"
],
"text/html": [
"\n",
" <div id=\"df-e8450f4b-c328-4c04-9a1f-3e67a17788ad\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SquareFeet</th>\n",
" <th>Bedrooms</th>\n",
" <th>Bathrooms</th>\n",
" <th>YearBuilt</th>\n",
" <th>Price</th>\n",
" <th>Neighborhood_Rural</th>\n",
" <th>Neighborhood_Suburb</th>\n",
" <th>Neighborhood_Urban</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>46301</th>\n",
" <td>2845</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>1954</td>\n",
" <td>354875.353057</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10023</th>\n",
" <td>2362</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2010</td>\n",
" <td>292371.871755</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37044</th>\n",
" <td>1058</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2007</td>\n",
" <td>155277.040755</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17462</th>\n",
" <td>2891</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2005</td>\n",
" <td>239120.147027</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13804</th>\n",
" <td>2244</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1966</td>\n",
" <td>254005.280471</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35925</th>\n",
" <td>1684</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1950</td>\n",
" <td>212224.505489</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21799</th>\n",
" <td>1021</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>1995</td>\n",
" <td>139005.940982</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4318</th>\n",
" <td>2741</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>1962</td>\n",
" <td>339074.548520</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31492</th>\n",
" <td>2053</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2014</td>\n",
" <td>239382.414641</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26727</th>\n",
" <td>2963</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2004</td>\n",
" <td>321585.613385</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5000 rows × 8 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e8450f4b-c328-4c04-9a1f-3e67a17788ad')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-e8450f4b-c328-4c04-9a1f-3e67a17788ad button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-e8450f4b-c328-4c04-9a1f-3e67a17788ad');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-93811741-2640-42ae-b1db-3d88d736a520\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-93811741-2640-42ae-b1db-3d88d736a520')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-93811741-2640-42ae-b1db-3d88d736a520 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "hp_dev",
"summary": "{\n \"name\": \"hp_dev\",\n \"rows\": 5000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 576,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 1829,\n \"samples\": [\n 2667,\n 2963,\n 2213\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 2,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 1966,\n 1986,\n 2021\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 76778.00565792067,\n \"min\": -18159.685676249966,\n \"max\": 467492.8278233021,\n \"num_unique_values\": 5000,\n \"samples\": [\n 186133.49424564492,\n 217865.6155495013,\n 194238.86404489263\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"source": [
"hp_test"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "zjOYohYCt-md",
"outputId": "723811f9-e6b4-4878-f949-0cfdced5ca3d"
},
"execution_count": 13,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n",
"49356 1174 5 3 1996 143866.306649 \n",
"18656 1776 3 1 1964 125553.381347 \n",
"27368 2524 2 2 2010 327261.077660 \n",
"27243 1633 2 1 1953 241231.423110 \n",
"24653 2811 4 2 1982 315724.479288 \n",
"... ... ... ... ... ... \n",
"20015 2106 2 2 2014 216406.701646 \n",
"40921 1704 3 3 1986 153770.810572 \n",
"30027 1150 5 3 1973 138938.157678 \n",
"16008 2822 2 2 1982 296193.916437 \n",
"23919 1348 2 2 1983 133497.577808 \n",
"\n",
" Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n",
"49356 0 1 0 \n",
"18656 0 0 1 \n",
"27368 1 0 0 \n",
"27243 1 0 0 \n",
"24653 1 0 0 \n",
"... ... ... ... \n",
"20015 0 0 1 \n",
"40921 1 0 0 \n",
"30027 0 0 1 \n",
"16008 0 1 0 \n",
"23919 1 0 0 \n",
"\n",
"[1000 rows x 8 columns]"
],
"text/html": [
"\n",
" <div id=\"df-ac9089e6-45f0-4578-97d1-ff8843413135\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SquareFeet</th>\n",
" <th>Bedrooms</th>\n",
" <th>Bathrooms</th>\n",
" <th>YearBuilt</th>\n",
" <th>Price</th>\n",
" <th>Neighborhood_Rural</th>\n",
" <th>Neighborhood_Suburb</th>\n",
" <th>Neighborhood_Urban</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>49356</th>\n",
" <td>1174</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>1996</td>\n",
" <td>143866.306649</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18656</th>\n",
" <td>1776</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1964</td>\n",
" <td>125553.381347</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27368</th>\n",
" <td>2524</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2010</td>\n",
" <td>327261.077660</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27243</th>\n",
" <td>1633</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1953</td>\n",
" <td>241231.423110</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24653</th>\n",
" <td>2811</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>1982</td>\n",
" <td>315724.479288</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20015</th>\n",
" <td>2106</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2014</td>\n",
" <td>216406.701646</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40921</th>\n",
" <td>1704</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1986</td>\n",
" <td>153770.810572</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30027</th>\n",
" <td>1150</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>1973</td>\n",
" <td>138938.157678</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16008</th>\n",
" <td>2822</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1982</td>\n",
" <td>296193.916437</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23919</th>\n",
" <td>1348</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1983</td>\n",
" <td>133497.577808</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 8 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ac9089e6-45f0-4578-97d1-ff8843413135')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-ac9089e6-45f0-4578-97d1-ff8843413135 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-ac9089e6-45f0-4578-97d1-ff8843413135');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-cf201a7d-0bc3-4ad7-982e-4424ba424285\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-cf201a7d-0bc3-4ad7-982e-4424ba424285')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-cf201a7d-0bc3-4ad7-982e-4424ba424285 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "hp_test",
"summary": "{\n \"name\": \"hp_test\",\n \"rows\": 1000,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 581,\n \"min\": 1000,\n \"max\": 2999,\n \"num_unique_values\": 792,\n \"samples\": [\n 2084,\n 2990,\n 1245\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 3,\n 4,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 1950,\n \"max\": 2021,\n \"num_unique_values\": 72,\n \"samples\": [\n 1982,\n 2016,\n 1960\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 74475.15532686812,\n \"min\": -7550.50457435759,\n \"max\": 437047.71344105,\n \"num_unique_values\": 1000,\n \"samples\": [\n 230653.38480715267,\n 204995.43595068945,\n 231582.08580545988\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"uint8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "markdown",
"source": [
"# Statystyki"
],
"metadata": {
"id": "NOERGp9pYt2R"
}
},
{
"cell_type": "markdown",
"source": [
"### Wielkość podzbiorów"
],
"metadata": {
"id": "8qLEM0Ahis-X"
}
},
{
"cell_type": "code",
"source": [
"housing_price_dataset.describe()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"id": "Cp-IN7cc2Dgr",
"outputId": "d75f9cad-e097-4858-cd49-db618dcd42a3"
},
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" SquareFeet Bedrooms Bathrooms YearBuilt Price\n",
"count 50000.000000 50000.000000 50000.000000 50000.000000 50000.000000\n",
"mean 2006.374680 3.498700 1.995420 1985.404420 224827.325151\n",
"std 575.513241 1.116326 0.815851 20.719377 76141.842966\n",
"min 1000.000000 2.000000 1.000000 1950.000000 -36588.165397\n",
"25% 1513.000000 3.000000 1.000000 1967.000000 169955.860225\n",
"50% 2007.000000 3.000000 2.000000 1985.000000 225052.141166\n",
"75% 2506.000000 4.000000 3.000000 2003.000000 279373.630052\n",
"max 2999.000000 5.000000 3.000000 2021.000000 492195.259972"
],
"text/html": [
"\n",
" <div id=\"df-7aae1c56-9f56-4494-a4ac-b562730ae374\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SquareFeet</th>\n",
" <th>Bedrooms</th>\n",
" <th>Bathrooms</th>\n",
" <th>YearBuilt</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>50000.000000</td>\n",
" <td>50000.000000</td>\n",
" <td>50000.000000</td>\n",
" <td>50000.000000</td>\n",
" <td>50000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2006.374680</td>\n",
" <td>3.498700</td>\n",
" <td>1.995420</td>\n",
" <td>1985.404420</td>\n",
" <td>224827.325151</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>575.513241</td>\n",
" <td>1.116326</td>\n",
" <td>0.815851</td>\n",
" <td>20.719377</td>\n",
" <td>76141.842966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1000.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1950.000000</td>\n",
" <td>-36588.165397</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1513.000000</td>\n",
" <td>3.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1967.000000</td>\n",
" <td>169955.860225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2007.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1985.000000</td>\n",
" <td>225052.141166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2506.000000</td>\n",
" <td>4.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2003.000000</td>\n",
" <td>279373.630052</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2999.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2021.000000</td>\n",
" <td>492195.259972</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7aae1c56-9f56-4494-a4ac-b562730ae374')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-7aae1c56-9f56-4494-a4ac-b562730ae374 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-7aae1c56-9f56-4494-a4ac-b562730ae374');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-d68153f8-bf7b-47f9-8c6e-765901ebe973\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d68153f8-bf7b-47f9-8c6e-765901ebe973')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-d68153f8-bf7b-47f9-8c6e-765901ebe973 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"housing_price_dataset\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17058.702043862784,\n \"min\": 575.513241276615,\n \"max\": 50000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2006.37468,\n 2007.0,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17676.577845369047,\n \"min\": 1.1163257739856558,\n \"max\": 50000.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 50000.0,\n 3.4987,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17677.02248379316,\n \"min\": 0.8158506823228849,\n \"max\": 50000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 50000.0,\n 1.99542,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17088.851960342447,\n \"min\": 20.71937668741524,\n \"max\": 50000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.40442,\n 1985.0,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 162949.740052687,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 8,\n \"samples\": [\n 224827.32515099045,\n 225052.14116600397,\n 50000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"source": [
"hp_train.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Icm98vi1X6Pe",
"outputId": "207de571-34f3-4044-d970-9680ee895643"
},
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(44000, 8)"
]
},
"metadata": {},
"execution_count": 14
}
]
},
{
"cell_type": "code",
"source": [
"hp_dev.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LlqC13x0Ymm6",
"outputId": "890f1281-0073-48ea-93d5-f86b03bf4564"
},
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(5000, 8)"
]
},
"metadata": {},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"source": [
"hp_test.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8iwbOv4AYpk4",
"outputId": "54f3ca4c-033d-47e9-f285-2a1d2c07538b"
},
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(1000, 8)"
]
},
"metadata": {},
"execution_count": 16
}
]
},
{
"cell_type": "markdown",
"source": [
"### Statystyki kolumn"
],
"metadata": {
"id": "Y2HnsCXxiypY"
}
},
{
"cell_type": "code",
"source": [
"hp_train.describe()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"id": "wAUskqnzi8Cl",
"outputId": "9f558980-671c-4916-9877-604fa2537e5c"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n",
"count 44000.000000 44000.000000 44000.000000 44000.000000 44000.000000 \n",
"mean 2006.261182 3.499636 1.997864 1985.416750 224928.983383 \n",
"std 575.306280 1.117315 0.815760 20.700559 76107.652516 \n",
"min 1000.000000 2.000000 1.000000 1950.000000 -36588.165397 \n",
"25% 1513.000000 3.000000 1.000000 1967.000000 170088.571867 \n",
"50% 2007.000000 3.000000 2.000000 1985.000000 225246.904135 \n",
"75% 2505.000000 5.000000 3.000000 2003.000000 279365.119289 \n",
"max 2999.000000 5.000000 3.000000 2021.000000 492195.259972 \n",
"\n",
" Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n",
"count 44000.000000 44000.000000 44000.000000 \n",
"mean 0.332841 0.333636 0.333523 \n",
"std 0.471235 0.471517 0.471477 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 \n",
"75% 1.000000 1.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 "
],
"text/html": [
"\n",
" <div id=\"df-2223dbc3-5627-46ea-be81-2587126788d7\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SquareFeet</th>\n",
" <th>Bedrooms</th>\n",
" <th>Bathrooms</th>\n",
" <th>YearBuilt</th>\n",
" <th>Price</th>\n",
" <th>Neighborhood_Rural</th>\n",
" <th>Neighborhood_Suburb</th>\n",
" <th>Neighborhood_Urban</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>44000.000000</td>\n",
" <td>44000.000000</td>\n",
" <td>44000.000000</td>\n",
" <td>44000.000000</td>\n",
" <td>44000.000000</td>\n",
" <td>44000.000000</td>\n",
" <td>44000.000000</td>\n",
" <td>44000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2006.261182</td>\n",
" <td>3.499636</td>\n",
" <td>1.997864</td>\n",
" <td>1985.416750</td>\n",
" <td>224928.983383</td>\n",
" <td>0.332841</td>\n",
" <td>0.333636</td>\n",
" <td>0.333523</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>575.306280</td>\n",
" <td>1.117315</td>\n",
" <td>0.815760</td>\n",
" <td>20.700559</td>\n",
" <td>76107.652516</td>\n",
" <td>0.471235</td>\n",
" <td>0.471517</td>\n",
" <td>0.471477</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1000.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1950.000000</td>\n",
" <td>-36588.165397</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1513.000000</td>\n",
" <td>3.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1967.000000</td>\n",
" <td>170088.571867</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2007.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1985.000000</td>\n",
" <td>225246.904135</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2505.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2003.000000</td>\n",
" <td>279365.119289</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2999.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2021.000000</td>\n",
" <td>492195.259972</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2223dbc3-5627-46ea-be81-2587126788d7')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-2223dbc3-5627-46ea-be81-2587126788d7 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-2223dbc3-5627-46ea-be81-2587126788d7');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-134a70fe-f3be-4778-aa13-c049d26c4190\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-134a70fe-f3be-4778-aa13-c049d26c4190')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-134a70fe-f3be-4778-aa13-c049d26c4190 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"hp_train\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14939.968071261836,\n \"min\": 575.3062795316038,\n \"max\": 44000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2006.2611818181817,\n 2007.0,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15555.206914611437,\n \"min\": 1.1173145826824615,\n \"max\": 44000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 44000.0,\n 3.4996363636363634,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15555.70202423901,\n \"min\": 0.8157604462168441,\n \"max\": 44000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 44000.0,\n 1.9978636363636364,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14969.494474985573,\n \"min\": 20.70055860487858,\n \"max\": 44000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.41675,\n 1985.0,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 163684.07731051612,\n \"min\": -36588.16539749279,\n \"max\": 492195.2599720151,\n \"num_unique_values\": 8,\n \"samples\": [\n 224928.9833827127,\n 225246.9041353957,\n 44000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207564412634,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3328409090909091,\n 1.0,\n 0.47123548806324117\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207510022008,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3336363636363636,\n 1.0,\n 0.4715169068117858\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15556.207517787427,\n \"min\": 0.0,\n \"max\": 44000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.33352272727272725,\n 1.0,\n 0.47147679658615904\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"hp_dev.describe()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"id": "BmM4_vWsjBK3",
"outputId": "b0a1906f-9eac-46a5-84b6-0cdbea344d69"
},
"execution_count": 18,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n",
"count 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 \n",
"mean 2008.190800 3.487200 1.972600 1985.485400 224290.794530 \n",
"std 576.206366 1.104753 0.816077 20.960049 76778.005658 \n",
"min 1000.000000 2.000000 1.000000 1950.000000 -18159.685676 \n",
"25% 1510.750000 3.000000 1.000000 1967.000000 169103.151768 \n",
"50% 2007.000000 3.000000 2.000000 1985.000000 223614.924625 \n",
"75% 2503.000000 4.000000 3.000000 2004.000000 279651.548644 \n",
"max 2999.000000 5.000000 3.000000 2021.000000 467492.827823 \n",
"\n",
" Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n",
"count 5000.000000 5000.000000 5000.000000 \n",
"mean 0.337800 0.341600 0.320600 \n",
"std 0.473007 0.474294 0.466754 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 \n",
"75% 1.000000 1.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 "
],
"text/html": [
"\n",
" <div id=\"df-3a88c1a6-aca2-4090-80ae-854db7e8fbba\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SquareFeet</th>\n",
" <th>Bedrooms</th>\n",
" <th>Bathrooms</th>\n",
" <th>YearBuilt</th>\n",
" <th>Price</th>\n",
" <th>Neighborhood_Rural</th>\n",
" <th>Neighborhood_Suburb</th>\n",
" <th>Neighborhood_Urban</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5000.000000</td>\n",
" <td>5000.000000</td>\n",
" <td>5000.000000</td>\n",
" <td>5000.000000</td>\n",
" <td>5000.000000</td>\n",
" <td>5000.000000</td>\n",
" <td>5000.000000</td>\n",
" <td>5000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2008.190800</td>\n",
" <td>3.487200</td>\n",
" <td>1.972600</td>\n",
" <td>1985.485400</td>\n",
" <td>224290.794530</td>\n",
" <td>0.337800</td>\n",
" <td>0.341600</td>\n",
" <td>0.320600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>576.206366</td>\n",
" <td>1.104753</td>\n",
" <td>0.816077</td>\n",
" <td>20.960049</td>\n",
" <td>76778.005658</td>\n",
" <td>0.473007</td>\n",
" <td>0.474294</td>\n",
" <td>0.466754</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1000.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1950.000000</td>\n",
" <td>-18159.685676</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1510.750000</td>\n",
" <td>3.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1967.000000</td>\n",
" <td>169103.151768</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2007.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1985.000000</td>\n",
" <td>223614.924625</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2503.000000</td>\n",
" <td>4.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2004.000000</td>\n",
" <td>279651.548644</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2999.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2021.000000</td>\n",
" <td>467492.827823</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3a88c1a6-aca2-4090-80ae-854db7e8fbba')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-3a88c1a6-aca2-4090-80ae-854db7e8fbba button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-3a88c1a6-aca2-4090-80ae-854db7e8fbba');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-0c4f4805-cbb1-4a98-a845-0615781e0eb2\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-0c4f4805-cbb1-4a98-a845-0615781e0eb2')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-0c4f4805-cbb1-4a98-a845-0615781e0eb2 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"hp_dev\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1373.0060319958575,\n \"min\": 576.2063661142855,\n \"max\": 5000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 2008.1908,\n 2007.0,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1766.6767900146253,\n \"min\": 1.1047534820271943,\n \"max\": 5000.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 5000.0,\n 3.4872,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.1212371098363,\n \"min\": 0.8160774696603855,\n \"max\": 5000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 5000.0,\n 1.9726,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1352.889275943266,\n \"min\": 20.9600489400744,\n \"max\": 5000.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1985.4854,\n 1985.0,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 159056.8559402586,\n \"min\": -18159.685676249966,\n \"max\": 467492.8278233021,\n \"num_unique_values\": 8,\n \"samples\": [\n 224290.7945297919,\n 223614.92462488014,\n 5000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.6250346212444,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3378,\n 1.0,\n 0.4730073014039385\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.6247777120989,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3416,\n 1.0,\n 0.474293612529388\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1767.626219259249,\n \"min\": 0.0,\n \"max\": 5000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.3206,\n 1.0,\n 0.4667539092952179\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"source": [
"hp_test.describe()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"id": "T7edA8gVjBfU",
"outputId": "99be05f6-e25f-45ae-9e4f-7f293d1ac14c"
},
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" SquareFeet Bedrooms Bathrooms YearBuilt Price \\\n",
"count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000 \n",
"mean 2002.288000 3.515000 2.002000 1984.457000 223037.016061 \n",
"std 581.670136 1.130953 0.817719 20.330949 74475.155327 \n",
"min 1000.000000 2.000000 1.000000 1950.000000 -7550.504574 \n",
"25% 1507.250000 2.000000 1.000000 1967.000000 168905.529102 \n",
"50% 2021.500000 4.000000 2.000000 1983.000000 220416.485632 \n",
"75% 2524.000000 5.000000 3.000000 2002.000000 279628.697596 \n",
"max 2999.000000 5.000000 3.000000 2021.000000 437047.713441 \n",
"\n",
" Neighborhood_Rural Neighborhood_Suburb Neighborhood_Urban \n",
"count 1000.000000 1000.000000 1000.000000 \n",
"mean 0.342000 0.333000 0.325000 \n",
"std 0.474617 0.471522 0.468609 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 \n",
"75% 1.000000 1.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 "
],
"text/html": [
"\n",
" <div id=\"df-83a49760-99ab-4703-b53f-d2e8d4fc84c3\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SquareFeet</th>\n",
" <th>Bedrooms</th>\n",
" <th>Bathrooms</th>\n",
" <th>YearBuilt</th>\n",
" <th>Price</th>\n",
" <th>Neighborhood_Rural</th>\n",
" <th>Neighborhood_Suburb</th>\n",
" <th>Neighborhood_Urban</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1000.000000</td>\n",
" <td>1000.000000</td>\n",
" <td>1000.000000</td>\n",
" <td>1000.000000</td>\n",
" <td>1000.000000</td>\n",
" <td>1000.000000</td>\n",
" <td>1000.000000</td>\n",
" <td>1000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2002.288000</td>\n",
" <td>3.515000</td>\n",
" <td>2.002000</td>\n",
" <td>1984.457000</td>\n",
" <td>223037.016061</td>\n",
" <td>0.342000</td>\n",
" <td>0.333000</td>\n",
" <td>0.325000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>581.670136</td>\n",
" <td>1.130953</td>\n",
" <td>0.817719</td>\n",
" <td>20.330949</td>\n",
" <td>74475.155327</td>\n",
" <td>0.474617</td>\n",
" <td>0.471522</td>\n",
" <td>0.468609</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1000.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1950.000000</td>\n",
" <td>-7550.504574</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1507.250000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1967.000000</td>\n",
" <td>168905.529102</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2021.500000</td>\n",
" <td>4.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1983.000000</td>\n",
" <td>220416.485632</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2524.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2002.000000</td>\n",
" <td>279628.697596</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2999.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3.000000</td>\n",
" <td>2021.000000</td>\n",
" <td>437047.713441</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-83a49760-99ab-4703-b53f-d2e8d4fc84c3')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-83a49760-99ab-4703-b53f-d2e8d4fc84c3 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-83a49760-99ab-4703-b53f-d2e8d4fc84c3');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-64da87a7-4bfb-4db0-8cba-1e1d0ad9438a\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-64da87a7-4bfb-4db0-8cba-1e1d0ad9438a')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-64da87a7-4bfb-4db0-8cba-1e1d0ad9438a button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"hp_test\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"SquareFeet\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 830.5567967260185,\n \"min\": 581.6701360764563,\n \"max\": 2999.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 1000.0,\n 2002.288,\n 2524.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bedrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 352.4125101562337,\n \"min\": 1.1309527196368794,\n \"max\": 1000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 1000.0,\n 3.515,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bathrooms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 352.9069492337987,\n \"min\": 0.8177191844787945,\n \"max\": 1000.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 1000.0,\n 2.002,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"YearBuilt\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 731.3239730266098,\n \"min\": 20.330949276866008,\n \"max\": 2021.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 1984.457,\n 1983.0,\n 1000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150108.67450064773,\n \"min\": -7550.50457435759,\n \"max\": 437047.71344105,\n \"num_unique_values\": 8,\n \"samples\": [\n 223037.01606120248,\n 220416.4856317892,\n 1000.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Rural\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.41137428502344,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.342,\n 1.0,\n 0.4746169626775482\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Suburb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.4119852977801,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.333,\n 1.0,\n 0.4715223571935199\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Neighborhood_Urban\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 353.4125366409159,\n \"min\": 0.0,\n \"max\": 1000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.325,\n 1.0,\n 0.46860921309188386\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"source": [
"def print_sum(df_name, df):\n",
" columns = ['Neighborhood_Rural', 'Neighborhood_Suburb', 'Neighborhood_Urban']\n",
" print(df_name)\n",
" for col in columns:\n",
" print(col, df[col].sum())\n",
" print()\n",
"\n",
"print_sum(\"hp_train\", hp_train)\n",
"print_sum(\"hp_dev\", hp_dev)\n",
"print_sum(\"hp_test\", hp_test)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7RBghGHvwEUe",
"outputId": "e472b811-18fe-4530-b28f-37a9a9f4ed70"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"hp_train\n",
"Neighborhood_Rural 14645\n",
"Neighborhood_Suburb 14680\n",
"Neighborhood_Urban 14675\n",
"\n",
"hp_dev\n",
"Neighborhood_Rural 1689\n",
"Neighborhood_Suburb 1708\n",
"Neighborhood_Urban 1603\n",
"\n",
"hp_test\n",
"Neighborhood_Rural 342\n",
"Neighborhood_Suburb 333\n",
"Neighborhood_Urban 325\n",
"\n"
]
}
]
}
]
}