add home_pricing application
This commit is contained in:
parent
fde621ab57
commit
762b979ca9
39
home_pricing/DataCollectingScraper/DataCollectingScraper.py
Normal file
39
home_pricing/DataCollectingScraper/DataCollectingScraper.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
from typing import Any
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from selenium.webdriver.chrome.service import Service
|
||||||
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
|
||||||
|
from DataCollectingScraper.DataScrapers.IDataScraper import IDataScraper, IDataScraperState
|
||||||
|
|
||||||
|
class DataCollectingScraper:
|
||||||
|
def __init__(self, scraper_impl: IDataScraper) -> None:
|
||||||
|
self.scraper_impl : IDataScraper = scraper_impl
|
||||||
|
self.driver : webdriver.Chrome = None
|
||||||
|
self.instantiate_driver()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_driver_options() -> list:
|
||||||
|
return [
|
||||||
|
# "--headless",
|
||||||
|
"--no-sandbox",
|
||||||
|
"--disable-dev-shm-usage",
|
||||||
|
]
|
||||||
|
|
||||||
|
def instantiate_driver(self) -> None:
|
||||||
|
options = Options()
|
||||||
|
opts = DataCollectingScraper.get_driver_options()
|
||||||
|
for opt in opts:
|
||||||
|
options.add_argument(opt)
|
||||||
|
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
|
||||||
|
|
||||||
|
def __call__(self, *args: Any, **kwds: Any) -> None:
|
||||||
|
state : IDataScraperState = None
|
||||||
|
if self.driver is not None:
|
||||||
|
while(True):
|
||||||
|
print(f"Calling DataCollectingScraper for {str(self.scraper_impl)} and page {self.scraper_impl.get_home_page()}")
|
||||||
|
state = self.scraper_impl.scrap_data(self.driver, state)
|
||||||
|
if not state.finished():
|
||||||
|
self.instantiate_driver()
|
||||||
|
else:
|
||||||
|
break
|
@ -0,0 +1,20 @@
|
|||||||
|
class IDataScraperState:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._finished = False
|
||||||
|
|
||||||
|
def finished(self) -> bool:
|
||||||
|
return self._finished
|
||||||
|
|
||||||
|
class IDataScraper:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.name = ""
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_home_page() -> str:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def scrap_data() -> IDataScraperState:
|
||||||
|
pass
|
@ -0,0 +1,94 @@
|
|||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
from DataCollectingScraper.DataScrapers.IDataScraper import IDataScraper, IDataScraperState
|
||||||
|
from DataCollectingScraper.helpers.WebDriverWaiter import WebDriverWaiter
|
||||||
|
from DataCollectingScraper.helpers.SingleOfferCSVWriter import SingleOfferCSVWriter
|
||||||
|
|
||||||
|
from DataCollectingScraper.models.SingleOffer import SingleOffer
|
||||||
|
|
||||||
|
class OtoDomDataScraperState(IDataScraperState):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
# Scrapping offers links stage
|
||||||
|
self._offers_links = []
|
||||||
|
self._offers_page_number = 1
|
||||||
|
self._offers_pages_scrapped = False
|
||||||
|
# Scrapping offers
|
||||||
|
self._scrapped_offers_links = []
|
||||||
|
# Final state
|
||||||
|
self._finished = False
|
||||||
|
|
||||||
|
|
||||||
|
class OtoDomDataScraperImpl(IDataScraper):
|
||||||
|
def __init__(self, offers_sublink = "") -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.name = "OtoDomDataScraperImpl"
|
||||||
|
self.output_csv_file = "output.csv"
|
||||||
|
self.offers_sublink = offers_sublink
|
||||||
|
self.pages_amount = 275
|
||||||
|
|
||||||
|
self.offers_list_presence_selector = "//*[contains(text(), 'Wszystkie ogłoszenia')]"
|
||||||
|
self.offers_list_selector = "a[data-cy='listing-item-link']"
|
||||||
|
|
||||||
|
self.area_detail_selector = "div[aria-label='Powierzchnia'] > div:nth-child(3) > div"
|
||||||
|
self.rooms_number_detail_selector = "div[aria-label='Liczba pokoi'] > div:nth-child(3) > div"
|
||||||
|
self.floor_number_detail_selector = "div[aria-label='Piętro'] > div:nth-child(3) > div"
|
||||||
|
self.property_form_detail_selector = "div[aria-label='Forma własności'] > div:nth-child(3) > div"
|
||||||
|
self.state_detail_selector = "div[aria-label='Stan wykończenia'] > div:nth-child(3) > div"
|
||||||
|
self.location_detail_selector = "a[aria-label='Adres']"
|
||||||
|
self.construction_year_detail_selector = "div[aria-label='Rok budowy'] > div:nth-child(2) > div"
|
||||||
|
self.price_detail_selector = "strong[aria-label='Cena']"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_home_page() -> str:
|
||||||
|
return "https://www.otodom.pl/pl/"
|
||||||
|
|
||||||
|
def scrap_one_offer(self, driver: webdriver.Chrome, offer_link : str) -> SingleOffer:
|
||||||
|
single_offer = SingleOffer()
|
||||||
|
driver.get(offer_link)
|
||||||
|
WebDriverWaiter.wait_for(driver, (By.CSS_SELECTOR, self.area_detail_selector))
|
||||||
|
single_offer.area = driver.find_element(By.CSS_SELECTOR, self.area_detail_selector).text
|
||||||
|
single_offer.rooms_number = driver.find_element(By.CSS_SELECTOR, self.rooms_number_detail_selector).text
|
||||||
|
single_offer.floor = driver.find_element(By.CSS_SELECTOR, self.floor_number_detail_selector).text
|
||||||
|
single_offer.property_form = driver.find_element(By.CSS_SELECTOR, self.property_form_detail_selector).text
|
||||||
|
single_offer.state = driver.find_element(By.CSS_SELECTOR, self.state_detail_selector).text
|
||||||
|
single_offer.location = driver.find_element(By.CSS_SELECTOR, self.location_detail_selector).text
|
||||||
|
single_offer.construction_year = driver.find_element(By.CSS_SELECTOR, self.construction_year_detail_selector).text
|
||||||
|
single_offer.price = driver.find_element(By.CSS_SELECTOR, self.price_detail_selector).text
|
||||||
|
return single_offer
|
||||||
|
|
||||||
|
def scrap_data(self, driver: webdriver.Chrome, state: OtoDomDataScraperState = None) -> OtoDomDataScraperState:
|
||||||
|
if not state:
|
||||||
|
state = OtoDomDataScraperState()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not state._offers_pages_scrapped:
|
||||||
|
for page_number in range(state._offers_page_number, self.pages_amount):
|
||||||
|
state._offers_page_number = page_number
|
||||||
|
print(f"Get page: {self.offers_sublink}&page={page_number}")
|
||||||
|
driver.get(self.get_home_page() + self.offers_sublink + f"&page={page_number}")
|
||||||
|
WebDriverWaiter.wait_for(driver, (By.CSS_SELECTOR, self.offers_list_selector))
|
||||||
|
offers_list = driver.find_elements(By.CSS_SELECTOR, self.offers_list_selector)
|
||||||
|
if len(offers_list) == 0:
|
||||||
|
continue
|
||||||
|
for offer in offers_list:
|
||||||
|
offer_link = offer.get_attribute("href")
|
||||||
|
state._offers_links.append(offer_link)
|
||||||
|
|
||||||
|
state._offers_pages_scrapped = True
|
||||||
|
|
||||||
|
for offer_link in state._offers_links:
|
||||||
|
if offer_link not in state._scrapped_offers_links:
|
||||||
|
print(f"Scrapping offer: {offer_link}")
|
||||||
|
state._scrapped_offers_links.append(offer_link)
|
||||||
|
single_offer = self.scrap_one_offer(driver, offer_link)
|
||||||
|
SingleOfferCSVWriter.save_to_file(self.output_csv_file, single_offer)
|
||||||
|
|
||||||
|
|
||||||
|
driver.close()
|
||||||
|
state._finished = True
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return state
|
@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
from DataCollectingScraper.models.SingleOffer import SingleOffer
|
||||||
|
|
||||||
|
import csv
|
||||||
|
|
||||||
|
class SingleOfferCSVWriter:
|
||||||
|
@staticmethod
|
||||||
|
def save_to_file(file_name: str, offer: SingleOffer):
|
||||||
|
with open(file_name, mode="a", newline="") as file:
|
||||||
|
writer = csv.DictWriter(file, fieldnames=offer.get_columns_names())
|
||||||
|
if file.tell() == 0:
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows([offer.get_dict_repr()])
|
@ -0,0 +1,16 @@
|
|||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.common.exceptions import TimeoutException
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
|
||||||
|
|
||||||
|
class WebDriverWaiter:
|
||||||
|
@staticmethod
|
||||||
|
def wait_for(driver: webdriver.Chrome, locator: Tuple[str, str]):
|
||||||
|
delay = 10 # seconds
|
||||||
|
try:
|
||||||
|
WebDriverWait(driver, delay).until(EC.presence_of_element_located(locator))
|
||||||
|
except TimeoutException:
|
||||||
|
print("Timeout")
|
118
home_pricing/DataCollectingScraper/models/SingleOffer.py
Normal file
118
home_pricing/DataCollectingScraper/models/SingleOffer.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
|
||||||
|
class SingleOffer():
|
||||||
|
def __init__(self):
|
||||||
|
self._area : float = None
|
||||||
|
self._rooms_number : int = None
|
||||||
|
self._floor : int = None
|
||||||
|
self._property_form : str = None
|
||||||
|
self._state : str = None
|
||||||
|
self._location : str = None
|
||||||
|
self._construction_year : int = None
|
||||||
|
self._price : float = None
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
result = f"Area: {self.area} "
|
||||||
|
result += f"rooms number: {self.rooms_number} "
|
||||||
|
result += f"floor: {self.floor} "
|
||||||
|
result += f"property form: {self.property_form} "
|
||||||
|
result += f"state: {self.state} "
|
||||||
|
result += f"location: {self.location} "
|
||||||
|
result += f"construction year: {self.construction_year} "
|
||||||
|
result += f"price: {self.price} "
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_columns_names() -> list:
|
||||||
|
return ["Area", "Rooms", "Floor", "Property form", "State", "Location", "Construction year", "Price"]
|
||||||
|
|
||||||
|
def get_dict_repr(self) -> dict:
|
||||||
|
return {
|
||||||
|
"Area": self.area,
|
||||||
|
"Rooms": self.rooms_number,
|
||||||
|
"Floor": self.floor,
|
||||||
|
"Property form": self.property_form,
|
||||||
|
"State": self.state,
|
||||||
|
"Location": self.location,
|
||||||
|
"Construction year": self.construction_year,
|
||||||
|
"Price": self.price
|
||||||
|
}
|
||||||
|
|
||||||
|
# Area
|
||||||
|
@property
|
||||||
|
def area(self):
|
||||||
|
return self._area
|
||||||
|
|
||||||
|
@area.setter
|
||||||
|
def area(self, area : str):
|
||||||
|
area_str = area.replace("m²", "").replace(",", ".")
|
||||||
|
self._area = float(area_str)
|
||||||
|
|
||||||
|
# Rooms number
|
||||||
|
@property
|
||||||
|
def rooms_number(self):
|
||||||
|
return self._rooms_number
|
||||||
|
|
||||||
|
@rooms_number.setter
|
||||||
|
def rooms_number(self, rooms_number : str):
|
||||||
|
self._rooms_number = int(rooms_number)
|
||||||
|
|
||||||
|
# Floor
|
||||||
|
@property
|
||||||
|
def floor(self):
|
||||||
|
return self._floor
|
||||||
|
|
||||||
|
@floor.setter
|
||||||
|
def floor(self, floor : str):
|
||||||
|
floor_str = floor
|
||||||
|
index_of_slash = floor.find("/")
|
||||||
|
if index_of_slash != -1:
|
||||||
|
floor_str = floor_str[:index_of_slash]
|
||||||
|
if floor_str == "parter":
|
||||||
|
floor_str = "0"
|
||||||
|
self._floor = int(floor_str)
|
||||||
|
|
||||||
|
# Property form
|
||||||
|
@property
|
||||||
|
def property_form(self):
|
||||||
|
return self._property_form
|
||||||
|
|
||||||
|
@property_form.setter
|
||||||
|
def property_form(self, property_form : str):
|
||||||
|
self._property_form = property_form
|
||||||
|
|
||||||
|
# State
|
||||||
|
@property
|
||||||
|
def state(self):
|
||||||
|
return self._state
|
||||||
|
|
||||||
|
@state.setter
|
||||||
|
def state(self, state : str):
|
||||||
|
self._state = state
|
||||||
|
|
||||||
|
# Location
|
||||||
|
@property
|
||||||
|
def location(self):
|
||||||
|
return self._location
|
||||||
|
|
||||||
|
@location.setter
|
||||||
|
def location(self, location : str):
|
||||||
|
self._location = location
|
||||||
|
|
||||||
|
# Construction year
|
||||||
|
@property
|
||||||
|
def construction_year(self):
|
||||||
|
return self._construction_year
|
||||||
|
|
||||||
|
@construction_year.setter
|
||||||
|
def construction_year(self, construction_year : str):
|
||||||
|
self._construction_year = int(construction_year)
|
||||||
|
|
||||||
|
# Price
|
||||||
|
@property
|
||||||
|
def price(self):
|
||||||
|
return self._price
|
||||||
|
|
||||||
|
@price.setter
|
||||||
|
def price(self, price : str):
|
||||||
|
price_str = price.replace(" zł", "").replace(",", ".").replace(" ", "")
|
||||||
|
self._price = float(price_str)
|
53
home_pricing/DataPreprocessor/DataPreprocessor.py
Normal file
53
home_pricing/DataPreprocessor/DataPreprocessor.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from pandas.core.frame import DataFrame
|
||||||
|
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
|
||||||
|
class DataPreprocessor:
|
||||||
|
def __init__(self, data : DataFrame) -> None:
|
||||||
|
self._data = data
|
||||||
|
self._transformers = {}
|
||||||
|
|
||||||
|
def remove_outliers_by(self, property : str, lower : float, upper: float) -> None:
|
||||||
|
q_low = self._data[property].quantile(lower)
|
||||||
|
q_hi = self._data[property].quantile(upper)
|
||||||
|
self._data = self._data[(self._data[property] < q_hi) & (self._data[property] > q_low)]
|
||||||
|
|
||||||
|
def transform_column_with_standard_scaler(self, column_name : str):
|
||||||
|
scaler = StandardScaler()
|
||||||
|
scaler.fit(self._data[[column_name]])
|
||||||
|
self._data[[column_name]] = scaler.transform(self._data[[column_name]])
|
||||||
|
self._transformers[column_name] = scaler
|
||||||
|
|
||||||
|
def encode_labels(self, column_name: str) -> None:
|
||||||
|
encoder = LabelEncoder()
|
||||||
|
encoder.fit(self._data[column_name])
|
||||||
|
self._data[column_name] = encoder.transform(self._data[column_name])
|
||||||
|
self._transformers[column_name] = encoder
|
||||||
|
|
||||||
|
def get_value(self, column_name, original_data):
|
||||||
|
return self._transformers[column_name].transform(original_data)
|
||||||
|
|
||||||
|
def transorm_address_to_district(self, row):
|
||||||
|
elements = row.split(',')
|
||||||
|
if len(elements) < 4:
|
||||||
|
return ""
|
||||||
|
return elements[-4].lstrip()
|
||||||
|
|
||||||
|
def trasform_column(self, column_name, function):
|
||||||
|
self._data[column_name] = self._data[column_name].apply(function)
|
||||||
|
|
||||||
|
def preprocess_data(self) -> None:
|
||||||
|
self.transform_column_with_standard_scaler("Area")
|
||||||
|
self.transform_column_with_standard_scaler("Construction year")
|
||||||
|
self.encode_labels("Property form")
|
||||||
|
self.encode_labels("State")
|
||||||
|
self.trasform_column("Location", self.transorm_address_to_district)
|
||||||
|
self._data = self._data[self._data['Location'] != ""]
|
||||||
|
self.encode_labels("Location")
|
||||||
|
|
||||||
|
self.remove_outliers_by("Price", 0.1, 0.9)
|
||||||
|
self.remove_outliers_by("Area", 0.1, 0.9)
|
||||||
|
|
||||||
|
def get_preprocessed_data(self) -> DataFrame:
|
||||||
|
return self._data
|
7
home_pricing/DataPreprocessor/helpers/OffersCSVReader.py
Normal file
7
home_pricing/DataPreprocessor/helpers/OffersCSVReader.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from pandas.core.frame import DataFrame
|
||||||
|
|
||||||
|
class OffersCSVReader:
|
||||||
|
@staticmethod
|
||||||
|
def read_from_file(file_name: str) -> DataFrame:
|
||||||
|
return pd.read_csv(file_name)
|
35
home_pricing/Prediction/Trainer/PredictionModelTrainer.py
Normal file
35
home_pricing/Prediction/Trainer/PredictionModelTrainer.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from pandas.core.frame import DataFrame
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.neural_network import MLPRegressor
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
class PredictionModelTrainer:
|
||||||
|
def __init__(self, preprocessed_data : DataFrame) -> None:
|
||||||
|
self.data_ : DataFrame = preprocessed_data
|
||||||
|
self.trained_model_ : MLPRegressor = None
|
||||||
|
|
||||||
|
def train(self) -> None:
|
||||||
|
print("Training home pricing model with MLPRegressor")
|
||||||
|
X = self.data_.drop(columns=['Price'])
|
||||||
|
y = self.data_['Price']
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
|
||||||
|
self.trained_model_ = MLPRegressor(
|
||||||
|
activation="relu",
|
||||||
|
hidden_layer_sizes=(10, 80, 200),
|
||||||
|
max_iter=2000,
|
||||||
|
random_state=5,
|
||||||
|
alpha=0.01,
|
||||||
|
solver="lbfgs"
|
||||||
|
)
|
||||||
|
self.trained_model_.fit(X_train, y_train)
|
||||||
|
y_pred = self.trained_model_.predict(X_test)
|
||||||
|
mse = mean_squared_error(y_test, y_pred)
|
||||||
|
print("Mean Squared Error:", mse)
|
||||||
|
rmse = math.sqrt(mse)
|
||||||
|
print(f"Root mean squared error: {rmse}")
|
||||||
|
print(f"Model on avaerage is wrong by {round(rmse, 2)} PLN")
|
||||||
|
|
||||||
|
def get_trained_model(self) -> MLPRegressor:
|
||||||
|
return self.trained_model_
|
216
home_pricing/home_pricing.html
Normal file
216
home_pricing/home_pricing.html
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Home pricing tool</title>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
background-color: #f2f2f2;
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2 {
|
||||||
|
color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
form {
|
||||||
|
background-color: #fff;
|
||||||
|
padding: 20px;
|
||||||
|
border-radius: 10px;
|
||||||
|
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||||
|
max-width: 400px;
|
||||||
|
margin: 0 auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
label {
|
||||||
|
display: block;
|
||||||
|
margin-bottom: 5px;
|
||||||
|
color: #555;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="text"],
|
||||||
|
select {
|
||||||
|
width: 100%;
|
||||||
|
padding: 10px;
|
||||||
|
margin-bottom: 15px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
border-radius: 5px;
|
||||||
|
box-sizing: border-box; /* Ensure padding and border are included in element's total width and height */
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="button"] {
|
||||||
|
background-color: #4CAF50;
|
||||||
|
color: white;
|
||||||
|
padding: 15px 30px; /* Adjusted padding to make it larger */
|
||||||
|
border: none;
|
||||||
|
border-radius: 5px;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background-color 0.3s;
|
||||||
|
display: block; /* Centering the button */
|
||||||
|
margin: 0 auto; /* Centering the button */
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="button"]:hover {
|
||||||
|
background-color: #45a049;
|
||||||
|
}
|
||||||
|
|
||||||
|
#response {
|
||||||
|
margin-top: 20px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
padding: 20px;
|
||||||
|
border-radius: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#response h3 {
|
||||||
|
color: #333;
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#response pre {
|
||||||
|
background-color: #f9f9f9;
|
||||||
|
padding: 10px;
|
||||||
|
border-radius: 5px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h2>Home pricing tool</h2>
|
||||||
|
|
||||||
|
<form id="propertyForm" action="#" method="post">
|
||||||
|
<label for="powierzchnia">Powierzchnia:</label>
|
||||||
|
<input type="text" id="powierzchnia" name="powierzchnia">
|
||||||
|
|
||||||
|
<label for="rok_budowy">Rok budowy:</label>
|
||||||
|
<input type="text" id="rok_budowy" name="rok_budowy">
|
||||||
|
|
||||||
|
<label for="ilosc_pokoi">Ilość pokoi:</label>
|
||||||
|
<input type="text" id="ilosc_pokoi" name="ilosc_pokoi">
|
||||||
|
|
||||||
|
<label for="numer_pietra">Numer piętra:</label>
|
||||||
|
<input type="text" id="numer_pietra" name="numer_pietra">
|
||||||
|
|
||||||
|
<label for="forma_wlasnosci">Forma własności:</label>
|
||||||
|
<select id="forma_wlasnosci" name="forma_wlasnosci">
|
||||||
|
<option value="pełna własność">pełna własność</option>
|
||||||
|
<option value="spółdzielcze wł. prawo do lokalu">spółdzielcze wł. prawo do lokalu</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<label for="dzielnica">Dzielnica:</label>
|
||||||
|
<select id="dzielnica" name="dzielnica">
|
||||||
|
<option value="Bartoszowice">Bartoszowice</option>
|
||||||
|
<option value="Bieńkowice">Bieńkowice</option>
|
||||||
|
<option value="Biskupin">Biskupin</option>
|
||||||
|
<option value="Borek">Borek</option>
|
||||||
|
<option value="Brochów">Brochów</option>
|
||||||
|
<option value="Dąbie">Dąbie</option>
|
||||||
|
<option value="Gaj">Gaj</option>
|
||||||
|
<option value="Gajowice">Gajowice</option>
|
||||||
|
<option value="Grabiszyn">Grabiszyn</option>
|
||||||
|
<option value="Grabiszynek">Grabiszynek</option>
|
||||||
|
<option value="Gądów">Gądów</option>
|
||||||
|
<option value="Huby">Huby</option>
|
||||||
|
<option value="Iwiny">Iwiny</option>
|
||||||
|
<option value="Jagodno">Jagodno</option>
|
||||||
|
<option value="Karłowice">Karłowice</option>
|
||||||
|
<option value="Klecina">Klecina</option>
|
||||||
|
<option value="Kleczków">Kleczków</option>
|
||||||
|
<option value="Kowale">Kowale</option>
|
||||||
|
<option value="Kozanów">Kozanów</option>
|
||||||
|
<option value="Krzyki">Krzyki</option>
|
||||||
|
<option value="Księże">Księże</option>
|
||||||
|
<option value="Kuźniki">Kuźniki</option>
|
||||||
|
<option value="Leśnica">Leśnica</option>
|
||||||
|
<option value="Ligota">Ligota</option>
|
||||||
|
<option value="Lipa Piotrowska">Lipa Piotrowska</option>
|
||||||
|
<option value="Maślice">Maślice</option>
|
||||||
|
<option value="Muchobór Mały">Muchobór Mały</option>
|
||||||
|
<option value="Muchobór Wielki">Muchobór Wielki</option>
|
||||||
|
<option value="Nadodrze">Nadodrze</option>
|
||||||
|
<option value="Nowy Dwór">Nowy Dwór</option>
|
||||||
|
<option value="Oporów">Oporów</option>
|
||||||
|
<option value="Osobowice">Osobowice</option>
|
||||||
|
<option value="Ołbin">Ołbin</option>
|
||||||
|
<option value="Ołtaszyn">Ołtaszyn</option>
|
||||||
|
<option value="Partynice">Partynice</option>
|
||||||
|
<option value="Pawłowice">Pawłowice</option>
|
||||||
|
<option value="Pilczyce">Pilczyce</option>
|
||||||
|
<option value="Plac Grunwaldzki">Plac Grunwaldzki</option>
|
||||||
|
<option value="Polanowice">Polanowice</option>
|
||||||
|
<option value="Popowice Północne">Popowice Północne</option>
|
||||||
|
<option value="Powstańców Śląskich">Powstańców Śląskich</option>
|
||||||
|
<option value="Poświętne">Poświętne</option>
|
||||||
|
<option value="Pracze Odrzańskie">Pracze Odrzańskie</option>
|
||||||
|
<option value="Przedmieście Oławskie">Przedmieście Oławskie</option>
|
||||||
|
<option value="Przedmieście Świdnickie">Przedmieście Świdnickie</option>
|
||||||
|
<option value="Psie Pole">Psie Pole</option>
|
||||||
|
<option value="Radomierzyce">Radomierzyce</option>
|
||||||
|
<option value="Radwanice">Radwanice</option>
|
||||||
|
<option value="Różanka">Różanka</option>
|
||||||
|
<option value="Sołtysowice">Sołtysowice</option>
|
||||||
|
<option value="Stare Miasto">Stare Miasto</option>
|
||||||
|
<option value="Strachocin">Strachocin</option>
|
||||||
|
<option value="Strachowice">Strachowice</option>
|
||||||
|
<option value="Swojczyce">Swojczyce</option>
|
||||||
|
<option value="Szczepin">Szczepin</option>
|
||||||
|
<option value="Szczytniki">Szczytniki</option>
|
||||||
|
<option value="Sępolno">Sępolno</option>
|
||||||
|
<option value="Tarnogaj">Tarnogaj</option>
|
||||||
|
<option value="Widawa">Widawa</option>
|
||||||
|
<option value="Wilczyce">Wilczyce</option>
|
||||||
|
<option value="Wojszyce">Wojszyce</option>
|
||||||
|
<option value="Wysoka">Wysoka</option>
|
||||||
|
<option value="Zawidawie">Zawidawie</option>
|
||||||
|
<option value="Żerniki">Żerniki</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<label for="stan_nieruchomosci">Stan nieruchomości:</label>
|
||||||
|
<select id="stan_nieruchomosci" name="stan_nieruchomosci">
|
||||||
|
<option value="do remontu">do remontu</option>
|
||||||
|
<option value="do wykończenia">do wykończenia</option>
|
||||||
|
<option value="do zamieszkania">do zamieszkania</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<input type="button" value="Wylicz cenę" onclick="submitForm()">
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<div id="response">
|
||||||
|
<h3>Spodziewana cena: </h3>
|
||||||
|
<div id="response_value"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function submitForm() {
|
||||||
|
var formData = {
|
||||||
|
powierzchnia: document.getElementById("powierzchnia").value,
|
||||||
|
rok_budowy: document.getElementById("rok_budowy").value,
|
||||||
|
ilosc_pokoi: document.getElementById("ilosc_pokoi").value,
|
||||||
|
numer_pietra: document.getElementById("numer_pietra").value,
|
||||||
|
forma_wlasnosci: document.getElementById("forma_wlasnosci").value,
|
||||||
|
dzielnica: document.getElementById("dzielnica").value,
|
||||||
|
stan_nieruchomosci: document.getElementById("stan_nieruchomosci").value
|
||||||
|
};
|
||||||
|
const url = 'http://localhost:8081/calculate_price';
|
||||||
|
|
||||||
|
fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify(formData),
|
||||||
|
})
|
||||||
|
.then((response) => response.json())
|
||||||
|
.then((data) => {
|
||||||
|
document.getElementById("response_value").textContent = data["estimated_price"] + " PLN"
|
||||||
|
console.log(data["estimated_price"])
|
||||||
|
})
|
||||||
|
.catch(error => console.error('Error:', error));
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
46
home_pricing/main.py
Normal file
46
home_pricing/main.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from DataCollectingScraper.DataScrapers.OtoDomDataScraperImpl import OtoDomDataScraperImpl
|
||||||
|
from DataCollectingScraper.DataCollectingScraper import DataCollectingScraper
|
||||||
|
|
||||||
|
from DataPreprocessor.helpers.OffersCSVReader import OffersCSVReader
|
||||||
|
from DataPreprocessor.DataPreprocessor import DataPreprocessor
|
||||||
|
|
||||||
|
from Prediction.Trainer.PredictionModelTrainer import PredictionModelTrainer
|
||||||
|
|
||||||
|
from sklearn.neural_network import MLPRegressor
|
||||||
|
from pandas.core.frame import DataFrame
|
||||||
|
import pandas as pd
|
||||||
|
import joblib
|
||||||
|
|
||||||
|
download_data = False
|
||||||
|
train_model = False
|
||||||
|
|
||||||
|
# Downloading raw data
|
||||||
|
if download_data:
|
||||||
|
offers_sublink = "wyniki/sprzedaz/mieszkanie/dolnoslaskie/wroclaw/wroclaw/wroclaw?viewType=listing"
|
||||||
|
scraper = DataCollectingScraper(OtoDomDataScraperImpl(offers_sublink))
|
||||||
|
scraper()
|
||||||
|
|
||||||
|
# Reading downloaded data
|
||||||
|
data_frame : DataFrame = OffersCSVReader.read_from_file("output.csv")
|
||||||
|
|
||||||
|
# Prepare data for neural network (data preprocessing)
|
||||||
|
data_preprocessor = DataPreprocessor(data_frame)
|
||||||
|
data_preprocessor.preprocess_data()
|
||||||
|
|
||||||
|
if train_model:
|
||||||
|
preprocessed_data : DataFrame = data_preprocessor.get_preprocessed_data()
|
||||||
|
# Train neural network with preprocessed data
|
||||||
|
trainer = PredictionModelTrainer(preprocessed_data)
|
||||||
|
trainer.train()
|
||||||
|
trained_model : MLPRegressor = trainer.get_trained_model()
|
||||||
|
joblib.dump(trained_model, 'trained_model.pkl')
|
||||||
|
|
||||||
|
trained_model = joblib.load('trained_model.pkl')
|
||||||
|
scaled_area = data_preprocessor.get_value('Area', pd.DataFrame({'Area': [56.0]}))
|
||||||
|
scaled_construction_year = data_preprocessor.get_value('Construction year', pd.DataFrame({'Construction year': [1980]}))
|
||||||
|
encoded_location = data_preprocessor.get_value("Location", ['Krzyki'])
|
||||||
|
sample_data = [[scaled_area, 3, 8, 0, 2, encoded_location, scaled_construction_year]]
|
||||||
|
sample = pd.DataFrame(sample_data, columns=['Area', 'Rooms', 'Floor', 'Property form' , 'State', 'Location', 'Construction year'])
|
||||||
|
|
||||||
|
prediction = trained_model.predict(sample)
|
||||||
|
print('Predicted price: ', round(float(prediction),0), 'zł')
|
1954
home_pricing/output.csv
Normal file
1954
home_pricing/output.csv
Normal file
File diff suppressed because it is too large
Load Diff
46
home_pricing/rest_api_server.py
Normal file
46
home_pricing/rest_api_server.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from flask import Flask, request, jsonify
|
||||||
|
from flask_cors import CORS
|
||||||
|
import joblib
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from DataPreprocessor.helpers.OffersCSVReader import OffersCSVReader
|
||||||
|
from DataPreprocessor.DataPreprocessor import DataPreprocessor
|
||||||
|
from pandas.core.frame import DataFrame
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
CORS(app) # This will enable CORS for all routes
|
||||||
|
|
||||||
|
# Reading downloaded data
|
||||||
|
data_frame : DataFrame = OffersCSVReader.read_from_file("output.csv")
|
||||||
|
|
||||||
|
# Prepare data for neural network (data preprocessing)
|
||||||
|
data_preprocessor = DataPreprocessor(data_frame)
|
||||||
|
data_preprocessor.preprocess_data()
|
||||||
|
trained_model = joblib.load('trained_model.pkl')
|
||||||
|
|
||||||
|
@app.route('/calculate_price', methods=['POST'])
|
||||||
|
def calculate_price():
|
||||||
|
input_data = request.json
|
||||||
|
|
||||||
|
scaled_area = data_preprocessor.get_value('Area', pd.DataFrame({'Area': [input_data["powierzchnia"]]}))
|
||||||
|
scaled_construction_year = data_preprocessor.get_value('Construction year', pd.DataFrame({'Construction year': [input_data["rok_budowy"]]}))
|
||||||
|
encoded_location = data_preprocessor.get_value("Location", [input_data["dzielnica"]])
|
||||||
|
encoded_state = data_preprocessor.get_value("State", [input_data["stan_nieruchomosci"]])
|
||||||
|
encoded_property_form = data_preprocessor.get_value("Property form", [input_data["forma_wlasnosci"]])
|
||||||
|
floor = input_data['numer_pietra']
|
||||||
|
rooms = input_data['ilosc_pokoi']
|
||||||
|
|
||||||
|
sample_data = [[scaled_area, rooms, floor, encoded_property_form, encoded_state, encoded_location, scaled_construction_year]]
|
||||||
|
sample = pd.DataFrame(sample_data, columns=['Area', 'Rooms', 'Floor', 'Property form' , 'State', 'Location', 'Construction year'])
|
||||||
|
|
||||||
|
prediction = trained_model.predict(sample)
|
||||||
|
|
||||||
|
calculated_price = {
|
||||||
|
'estimated_price': round(float(prediction),0)
|
||||||
|
}
|
||||||
|
return jsonify(calculated_price)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True, port=8081)
|
BIN
home_pricing/trained_model.pkl
Normal file
BIN
home_pricing/trained_model.pkl
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user