forked from tdwojak/Python2018
Rozwiazanie zadania z labs06
This commit is contained in:
parent
edcc7fa76d
commit
164d110319
101
labs06/task02.py
101
labs06/task02.py
@ -1,14 +1,25 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
import sklearn as skl
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
|
||||||
def wczytaj_dane():
|
def wczytaj_dane():
|
||||||
pass
|
r = pd.read_csv("mieszkania.csv")
|
||||||
|
dane = pd.DataFrame(r, columns = ['Id', 'Expected', 'Rooms', 'SqrMeters', 'Floor', 'Location', 'Description'])
|
||||||
|
return dane
|
||||||
|
|
||||||
def most_common_room_number(dane):
|
def most_common_room_number(dane):
|
||||||
pass
|
k = dane.Rooms[dane.Rooms.value_counts().max()]
|
||||||
|
return k
|
||||||
|
|
||||||
def cheapest_flats(dane, n):
|
def cheapest_flats(dane, n):
|
||||||
pass
|
o = dane.sort_values(by ='Expected').head(n)
|
||||||
|
return o
|
||||||
|
|
||||||
def find_borough(desc):
|
def find_borough(desc):
|
||||||
dzielnice = ['Stare Miasto',
|
dzielnice = ['Stare Miasto',
|
||||||
@ -19,36 +30,98 @@ def find_borough(desc):
|
|||||||
'Winogrady',
|
'Winogrady',
|
||||||
'Miłostowo',
|
'Miłostowo',
|
||||||
'Dębiec']
|
'Dębiec']
|
||||||
pass
|
|
||||||
|
lista = desc.split()
|
||||||
|
for i in lista:
|
||||||
|
if i == "Stare": ## do poprawienia
|
||||||
|
return "Stare Miasto"
|
||||||
|
|
||||||
|
else:
|
||||||
|
for l in dzielnice:
|
||||||
|
if i == l:
|
||||||
|
return l
|
||||||
|
else:
|
||||||
|
return "Inne"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def add_borough(dane):
|
def add_borough(dane):
|
||||||
pass
|
miasta = []
|
||||||
|
|
||||||
|
for i in dane['Location']:
|
||||||
|
miasta.append(find_borough(i))
|
||||||
|
|
||||||
|
dane['Borough'] = miasta
|
||||||
|
|
||||||
|
return dane
|
||||||
|
|
||||||
def write_plot(dane, filename):
|
def write_plot(dane, filename):
|
||||||
pass
|
wykres = dane.Borough.value_counts().plot.bar(figsize = (14,14))
|
||||||
|
wykres.set_title('Liczba ogłoszeń mieszkań z podziałem na dzielnice')
|
||||||
|
wykres.set_xlabel('Dzielnice')
|
||||||
|
wykres.set_ylabel('Liczba')
|
||||||
|
fig = wykres.get_figure()
|
||||||
|
fig.savefig(filename + '.png')
|
||||||
|
|
||||||
|
|
||||||
def mean_price(dane, room_number):
|
def mean_price(dane, room_number):
|
||||||
pass
|
srednia = dane.Expected[dane.Rooms == room_number].mean()
|
||||||
|
return round(srednia)
|
||||||
|
|
||||||
def find_13(dane):
|
def find_13(dane):
|
||||||
pass
|
mieszkania = dane[dane.Floor == 13].Borough.values
|
||||||
|
return mieszkania
|
||||||
|
|
||||||
def find_best_flats(dane):
|
def find_best_flats(dane):
|
||||||
pass
|
best = dane[(dane.Borough == 'Winogrady') & (dane.Floor == 1) & (dane.Rooms == 3)]
|
||||||
|
return best
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
dane = wczytaj_dane()
|
dane = wczytaj_dane()
|
||||||
print(dane[:5])
|
print(dane[:5])
|
||||||
|
print('-' * 100)
|
||||||
print("Najpopularniejsza liczba pokoi w mieszkaniu to: {}"
|
print("Najpopularniejsza liczba pokoi w mieszkaniu to: {}"
|
||||||
.format(most_common_room_number(dane)))
|
.format(most_common_room_number(dane)))
|
||||||
|
n = int(input('Podaj liczbę najtańszych ofert, które mamy wyświetlić -->'))
|
||||||
print("{} to najłądniejsza dzielnica w Poznaniu."
|
print ('Dane', n, 'najtańszych ofert:\n', cheapest_flats(dane, n))
|
||||||
.format(find_borough("Grunwald i Jeżyce"))))
|
print('-' * 100)
|
||||||
|
add_borough(dane)
|
||||||
|
filename = 'wykres'
|
||||||
|
write_plot(dane, filename)
|
||||||
|
print('Zapisano wykres w pliku')
|
||||||
|
print('-' * 100)
|
||||||
|
print("{} to najładniejsza dzielnica w Poznaniu.".format(find_borough("Grunwald i Jeżyce")))
|
||||||
|
print('-' * 100)
|
||||||
|
print('Lista dzielnic, które zawierają ofertę mieszkań na 13 piętrze:')
|
||||||
|
print(find_13(dane))
|
||||||
|
print('-' * 100)
|
||||||
|
print('Ogłoszenia mieszkań, które znajdują się na Winogradach, mają 3 pokoje i są położone na 1 piętrze:')
|
||||||
|
print(find_best_flats(dane))
|
||||||
|
print('-' * 100)
|
||||||
print("Średnia cena mieszkania 3-pokojowego, to: {}"
|
print("Średnia cena mieszkania 3-pokojowego, to: {}"
|
||||||
.format(mean_price(dane, 3)))
|
.format(mean_price(dane, 3)))
|
||||||
|
|
||||||
|
#REGRESJA LINIOWA
|
||||||
|
|
||||||
|
print('Budowanie modelu regresji liniowej...')
|
||||||
|
|
||||||
|
train = pd.DataFrame(dane[:-1000])
|
||||||
|
dev = pd.DataFrame(dane[-1000:])
|
||||||
|
|
||||||
|
X_train = pd.DataFrame(train, columns=['Rooms','SqrMeters'])
|
||||||
|
y_train = pd.DataFrame(train, columns=['Expected'])
|
||||||
|
lm = LinearRegression()
|
||||||
|
lm.fit(X_train, y_train)
|
||||||
|
lm.predict(X_train)
|
||||||
|
|
||||||
|
X_dev = pd.DataFrame(dev, columns=['Rooms','SqrMeters'])
|
||||||
|
predicted = lm.predict(X_dev)
|
||||||
|
|
||||||
|
rmse = np.sqrt(mean_squared_error(predicted, dev.Expected))
|
||||||
|
print("RMSE:", round(rmse,2))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Loading…
Reference in New Issue
Block a user