{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "Ładowanie danych:"
      ],
      "metadata": {
        "id": "coWdAJZAPC1C"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "bozs99nnO2jv",
        "outputId": "4119ebc8-eccf-4574-866c-2502176e0fbd"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "fatal: destination path 'mieszkania5' already exists and is not an empty directory.\n"
          ]
        }
      ],
      "source": [
        "!git clone git://gonito.net/mieszkania5"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Importy:"
      ],
      "metadata": {
        "id": "OFaZTYDGQqLQ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import csv\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "\n",
        "data = pd.read_table(\"mieszkania5/train/train.tsv\", delimiter='\\t', header=None)\n",
        "data.rename(columns={0: 'cena', 1: 'stan', 2: 'czynsz', 3: 'x3', 4: 'cenazam', 5: 'link', 6: 'pietro', 7: 'x7', 8: 'metraz', 9: 'rynek', 10: 'liczba pokoi', 11: 'budynek', 12: 'x12', 13: 'x13', 14: 'x14', 15: 'x15', 16: 'x16', 17: 'x17', 18: 'x18', 19: 'x19', 20: 'x20', 21: 'x21', 22: 'x22', 23: 'x23', 24: 'x24', 25: 'x25'}, inplace=True)\n",
        "\n",
        "data.drop('x3', inplace=True, axis=1)\n",
        "data.drop('cenazam', inplace=True, axis=1)\n",
        "data.drop('link', inplace=True, axis=1)\n",
        "data.drop('pietro', inplace=True, axis=1)\n",
        "data.drop('budynek', inplace=True, axis=1)\n",
        "data.drop('x7', inplace=True, axis=1)\n",
        "data.drop('x12', inplace=True, axis=1)\n",
        "data.drop('x13', inplace=True, axis=1)\n",
        "data.drop('x14', inplace=True, axis=1)\n",
        "data.drop('x15', inplace=True, axis=1)\n",
        "data.drop('x16', inplace=True, axis=1)\n",
        "data.drop('x17', inplace=True, axis=1)\n",
        "data.drop('x18', inplace=True, axis=1)\n",
        "data.drop('x19', inplace=True, axis=1)\n",
        "data.drop('x20', inplace=True, axis=1)\n",
        "data.drop('x21', inplace=True, axis=1)\n",
        "data.drop('x22', inplace=True, axis=1)\n",
        "data.drop('x23', inplace=True, axis=1)\n",
        "data.drop('x24', inplace=True, axis=1)\n",
        "data.drop('x25', inplace=True, axis=1)\n",
        "\n",
        "data['czynsz'] = data['czynsz'].str.extract('(\\d+)')\n",
        "data['stan'] = data['stan'].map({'do zamieszkania': 2, 'do remontu': 1, 'do wykończenia': 2})\n",
        "data['rynek'] = data['rynek'].map({'wtórny': 0, 'pierwotny': 1})\n",
        "\n",
        "data.dropna(inplace=True)"
      ],
      "metadata": {
        "id": "K-TUB0pAPCp2"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "57zFDlw7PDDb"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "cena = data['cena']\n",
        "parametry = data[['stan', 'czynsz', 'liczba pokoi', 'metraz', 'rynek']]"
      ],
      "metadata": {
        "id": "___F5VBeco6H"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.linear_model import LinearRegression"
      ],
      "metadata": {
        "id": "H1shMEsxTccr"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def train_model(cena, parametry):\n",
        "  model = LinearRegression()\n",
        "  model.fit(X=parametry, y=cena)\n",
        "  return model"
      ],
      "metadata": {
        "id": "vT9sCZ2XTjKy"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "model = train_model(cena, parametry)"
      ],
      "metadata": {
        "id": "-DZ-HNMtUBmr"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def predict(stan, czynsz, liczba_pokoi, metraz, rynek):\n",
        "  return model.predict(np.array([[stan, czynsz, liczba_pokoi, metraz, rynek]])).item()"
      ],
      "metadata": {
        "id": "oK_ZW9N9Wg2u"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "predict(1, 200, 2, 40.0, 0)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "bLmRBRBMgFTg",
        "outputId": "f94f3691-9a2a-4035-b3ad-dde097631e85"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "217119.72285625804"
            ]
          },
          "metadata": {},
          "execution_count": 60
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "K7eEdZFzgI3n"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}