forked from tdwojak/Python2017
task02 **dodatkowe (labs06) done
This commit is contained in:
parent
8424251b3c
commit
c440fa5c25
87
labs06/model.py
Normal file
87
labs06/model.py
Normal file
@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""*(dodatkowe)*: Korzystając z pakietu *sklearn* zbuduj model regresji liniowej,
|
||||
która będzie wyznaczać cenę mieszkania na podstawie wielkości mieszkania i liczby pokoi."""
|
||||
|
||||
#import bibliotek
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import sklearn
|
||||
from sklearn import linear_model
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
import math
|
||||
|
||||
#wczytanie danych
|
||||
dane = pd.read_csv('mieszkania.csv', sep = ',', encoding = 'utf-8')
|
||||
dane = pd.DataFrame(dane)
|
||||
|
||||
#analiza korelacji
|
||||
print(dane.corr())
|
||||
#korelacja niewielka dodatnia pomiedzy SqrMeters, a Expected = 0.109640
|
||||
#korelacja niewielka dodatnia pomiedzy Rooms, a Expected = 0.081177
|
||||
#niska korelaje pokazuje wykres rozrzutu
|
||||
dane.plot.scatter(x='SqrMeters', y='Expected')
|
||||
plt.show()
|
||||
|
||||
#data preparation
|
||||
#X -independent variables
|
||||
#Y -dependent variable
|
||||
|
||||
X = dane[['SqrMeters', 'Rooms' ]]
|
||||
X= pd.DataFrame(X)
|
||||
|
||||
Y = dane['Expected']
|
||||
|
||||
#splitting data into a training and test set:
|
||||
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=1)
|
||||
|
||||
#train LinearRegression model using the training set of data
|
||||
lm = linear_model.LinearRegression()
|
||||
lm.fit(X_train, Y_train)
|
||||
|
||||
# coefficients of the model
|
||||
for idx, col_name in enumerate(X_train.columns):
|
||||
print("The coefficient for {} is {}".format(col_name, lm.coef_[idx]))
|
||||
|
||||
# intercept of the model
|
||||
intercept = lm.intercept_
|
||||
print("The intercept for our model is {}".format(intercept))
|
||||
|
||||
#linear model : 123969.05-34261.86*X1+5299.47*X2
|
||||
|
||||
#R^2 proportion of variability in Y that is explained by X in model = accuracy of regression models
|
||||
#It seems that 0.78% of the variability in Y can be explained using X
|
||||
print(lm.score(X_test, Y_test))
|
||||
|
||||
#predykcja
|
||||
#comparing the prediction for the test data set (data not used for training) with the ground truth for the data test set
|
||||
y_predict = lm.predict(X_test)
|
||||
|
||||
lm_mse = mean_squared_error(y_predict, Y_test)
|
||||
|
||||
#It seems that we are an average of 1148825.67 away from the ground truth when making predictions on our test set.
|
||||
print(lm_mse)
|
||||
print(math.sqrt(lm_mse))
|
||||
|
||||
print('--------')
|
||||
#print(X_test['SqrMeters'])
|
||||
#linear model plot - how our model plots against our test data.
|
||||
plt.scatter(X_test['SqrMeters'], Y_test, color='black')
|
||||
plt.plot(X_test['SqrMeters'], y_predict, color='blue', linewidth=2)
|
||||
|
||||
plt.scatter(X_test['SqrMeters'], Y_test, color='black')
|
||||
plt.plot(X_test['SqrMeters'], y_predict, color='blue', linewidth=2)
|
||||
|
||||
plt.xticks(())
|
||||
plt.yticks(())
|
||||
|
||||
plt.show()
|
||||
|
||||
#prediction using made up data
|
||||
# SqrMeters:45
|
||||
# Rooms: 3
|
||||
print(lm.predict([[45, 3]]))
|
||||
# Expected price: 259659.47
|
Loading…
Reference in New Issue
Block a user