mieszkania5/Mieszkania.ipynb

5.8 KiB

Ładowanie danych:

!git clone git://gonito.net/mieszkania5
fatal: destination path 'mieszkania5' already exists and is not an empty directory.

Importy:

import csv
import pandas as pd
import numpy as np

data = pd.read_table("mieszkania5/train/train.tsv", delimiter='\t', header=None)
data.rename(columns={0: 'cena', 1: 'stan', 2: 'czynsz', 3: 'x3', 4: 'cenazam', 5: 'link', 6: 'pietro', 7: 'x7', 8: 'metraz', 9: 'rynek', 10: 'liczba pokoi', 11: 'budynek', 12: 'x12', 13: 'x13', 14: 'x14', 15: 'x15', 16: 'x16', 17: 'x17', 18: 'x18', 19: 'x19', 20: 'x20', 21: 'x21', 22: 'x22', 23: 'x23', 24: 'x24', 25: 'x25'}, inplace=True)

data.drop('x3', inplace=True, axis=1)
data.drop('cenazam', inplace=True, axis=1)
data.drop('link', inplace=True, axis=1)
data.drop('pietro', inplace=True, axis=1)
data.drop('budynek', inplace=True, axis=1)
data.drop('x7', inplace=True, axis=1)
data.drop('x12', inplace=True, axis=1)
data.drop('x13', inplace=True, axis=1)
data.drop('x14', inplace=True, axis=1)
data.drop('x15', inplace=True, axis=1)
data.drop('x16', inplace=True, axis=1)
data.drop('x17', inplace=True, axis=1)
data.drop('x18', inplace=True, axis=1)
data.drop('x19', inplace=True, axis=1)
data.drop('x20', inplace=True, axis=1)
data.drop('x21', inplace=True, axis=1)
data.drop('x22', inplace=True, axis=1)
data.drop('x23', inplace=True, axis=1)
data.drop('x24', inplace=True, axis=1)
data.drop('x25', inplace=True, axis=1)

data['czynsz'] = data['czynsz'].str.extract('(\d+)')
data['stan'] = data['stan'].map({'do zamieszkania': 2, 'do remontu': 1, 'do wykończenia': 2})
data['rynek'] = data['rynek'].map({'wtórny': 0, 'pierwotny': 1})

data.dropna(inplace=True)
cena = data['cena']
parametry = data[['stan', 'czynsz', 'liczba pokoi', 'metraz', 'rynek']]
from sklearn.linear_model import LinearRegression
def train_model(cena, parametry):
  model = LinearRegression()
  model.fit(X=parametry, y=cena)
  return model
model = train_model(cena, parametry)
def predict(stan, czynsz, liczba_pokoi, metraz, rynek):
  return model.predict(np.array([[stan, czynsz, liczba_pokoi, metraz, rynek]])).item()
predict(1, 200, 2, 40.0, 0)
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
  warnings.warn(
217119.72285625804