forked from tdwojak/Python2018
128 lines
3.5 KiB
Python
Executable File
128 lines
3.5 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import pandas as pd
|
|
from matplotlib import pyplot as plt
|
|
import sklearn as skl
|
|
from sklearn.linear_model import LinearRegression
|
|
import numpy as np
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
def wczytaj_dane():
|
|
r = pd.read_csv("mieszkania.csv")
|
|
dane = pd.DataFrame(r, columns = ['Id', 'Expected', 'Rooms', 'SqrMeters', 'Floor', 'Location', 'Description'])
|
|
return dane
|
|
|
|
def most_common_room_number(dane):
|
|
k = dane.Rooms[dane.Rooms.value_counts().max()]
|
|
return k
|
|
|
|
def cheapest_flats(dane, n):
|
|
o = dane.sort_values(by ='Expected').head(n)
|
|
return o
|
|
|
|
def find_borough(desc):
|
|
dzielnice = ['Stare Miasto',
|
|
'Wilda',
|
|
'Jeżyce',
|
|
'Rataje',
|
|
'Piątkowo',
|
|
'Winogrady',
|
|
'Miłostowo',
|
|
'Dębiec']
|
|
|
|
lista = desc.split()
|
|
for i in lista:
|
|
if i == "Stare": ## do poprawienia
|
|
return "Stare Miasto"
|
|
|
|
else:
|
|
for l in dzielnice:
|
|
if i == l:
|
|
return l
|
|
else:
|
|
return "Inne"
|
|
|
|
|
|
|
|
def add_borough(dane):
|
|
miasta = []
|
|
|
|
for i in dane['Location']:
|
|
miasta.append(find_borough(i))
|
|
|
|
dane['Borough'] = miasta
|
|
|
|
return dane
|
|
|
|
def write_plot(dane, filename):
|
|
wykres = dane.Borough.value_counts().plot.bar(figsize = (14,14))
|
|
wykres.set_title('Liczba ogłoszeń mieszkań z podziałem na dzielnice')
|
|
wykres.set_xlabel('Dzielnice')
|
|
wykres.set_ylabel('Liczba')
|
|
fig = wykres.get_figure()
|
|
fig.savefig(filename + '.png')
|
|
|
|
|
|
def mean_price(dane, room_number):
|
|
srednia = dane.Expected[dane.Rooms == room_number].mean()
|
|
return round(srednia)
|
|
|
|
def find_13(dane):
|
|
mieszkania = dane[dane.Floor == 13].Borough.values
|
|
return mieszkania
|
|
|
|
def find_best_flats(dane):
|
|
best = dane[(dane.Borough == 'Winogrady') & (dane.Floor == 1) & (dane.Rooms == 3)]
|
|
return best
|
|
|
|
def main():
|
|
dane = wczytaj_dane()
|
|
print(dane[:5])
|
|
print('-' * 100)
|
|
print("Najpopularniejsza liczba pokoi w mieszkaniu to: {}"
|
|
.format(most_common_room_number(dane)))
|
|
n = int(input('Podaj liczbę najtańszych ofert, które mamy wyświetlić -->'))
|
|
print ('Dane', n, 'najtańszych ofert:\n', cheapest_flats(dane, n))
|
|
print('-' * 100)
|
|
add_borough(dane)
|
|
filename = 'wykres'
|
|
write_plot(dane, filename)
|
|
print('Zapisano wykres w pliku')
|
|
print('-' * 100)
|
|
print("{} to najładniejsza dzielnica w Poznaniu.".format(find_borough("Grunwald i Jeżyce")))
|
|
print('-' * 100)
|
|
print('Lista dzielnic, które zawierają ofertę mieszkań na 13 piętrze:')
|
|
print(find_13(dane))
|
|
print('-' * 100)
|
|
print('Ogłoszenia mieszkań, które znajdują się na Winogradach, mają 3 pokoje i są położone na 1 piętrze:')
|
|
print(find_best_flats(dane))
|
|
print('-' * 100)
|
|
print("Średnia cena mieszkania 3-pokojowego, to: {}"
|
|
.format(mean_price(dane, 3)))
|
|
|
|
#REGRESJA LINIOWA
|
|
|
|
print('Budowanie modelu regresji liniowej...')
|
|
|
|
train = pd.DataFrame(dane[:-1000])
|
|
dev = pd.DataFrame(dane[-1000:])
|
|
|
|
X_train = pd.DataFrame(train, columns=['Rooms','SqrMeters'])
|
|
y_train = pd.DataFrame(train, columns=['Expected'])
|
|
lm = LinearRegression()
|
|
lm.fit(X_train, y_train)
|
|
lm.predict(X_train)
|
|
|
|
X_dev = pd.DataFrame(dev, columns=['Rooms','SqrMeters'])
|
|
predicted = lm.predict(X_dev)
|
|
|
|
rmse = np.sqrt(mean_squared_error(predicted, dev.Expected))
|
|
print("RMSE:", round(rmse,2))
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|