Dodano nowe pliki do repozytorium
This commit is contained in:
commit
0c8713c83d
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
10
.idea/ML9.iml
Normal file
10
.idea/ML9.iml
Normal file
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (ML9)" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/ML9.iml" filepath="$PROJECT_DIR$/.idea/ML9.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
BIN
Raport Kamil Szostak.docx
Normal file
BIN
Raport Kamil Szostak.docx
Normal file
Binary file not shown.
BIN
Raport Kamil Szostak.pdf
Normal file
BIN
Raport Kamil Szostak.pdf
Normal file
Binary file not shown.
18906
archive/apartments_pl_2023_08.csv
Normal file
18906
archive/apartments_pl_2023_08.csv
Normal file
File diff suppressed because it is too large
Load Diff
16998
archive/apartments_pl_2023_09.csv
Normal file
16998
archive/apartments_pl_2023_09.csv
Normal file
File diff suppressed because it is too large
Load Diff
16691
archive/apartments_pl_2023_10.csv
Normal file
16691
archive/apartments_pl_2023_10.csv
Normal file
File diff suppressed because it is too large
Load Diff
16303
archive/apartments_pl_2023_11.csv
Normal file
16303
archive/apartments_pl_2023_11.csv
Normal file
File diff suppressed because it is too large
Load Diff
16484
archive/apartments_pl_2023_12.csv
Normal file
16484
archive/apartments_pl_2023_12.csv
Normal file
File diff suppressed because it is too large
Load Diff
15522
archive/apartments_pl_2024_01.csv
Normal file
15522
archive/apartments_pl_2024_01.csv
Normal file
File diff suppressed because it is too large
Load Diff
16362
archive/apartments_pl_2024_02.csv
Normal file
16362
archive/apartments_pl_2024_02.csv
Normal file
File diff suppressed because it is too large
Load Diff
8534
archive/apartments_rent_pl_2023_11.csv
Normal file
8534
archive/apartments_rent_pl_2023_11.csv
Normal file
File diff suppressed because it is too large
Load Diff
8873
archive/apartments_rent_pl_2023_12.csv
Normal file
8873
archive/apartments_rent_pl_2023_12.csv
Normal file
File diff suppressed because it is too large
Load Diff
8431
archive/apartments_rent_pl_2024_01.csv
Normal file
8431
archive/apartments_rent_pl_2024_01.csv
Normal file
File diff suppressed because it is too large
Load Diff
8319
archive/apartments_rent_pl_2024_02.csv
Normal file
8319
archive/apartments_rent_pl_2024_02.csv
Normal file
File diff suppressed because it is too large
Load Diff
38
neural network.py
Normal file
38
neural network.py
Normal file
@ -0,0 +1,38 @@
|
||||
import pandas as pd
|
||||
import math
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense
|
||||
from sklearn import metrics
|
||||
|
||||
|
||||
df = pd.read_csv('data.csv')
|
||||
|
||||
scaler = StandardScaler()
|
||||
X = scaler.fit_transform(df.iloc[:, :-1])
|
||||
y = df.iloc[:, -1]
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
print("Podzielono zbiór na {} rekordów uczących i {} rekordów testowych".format(len(y_train), len(y_test)))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(32, activation='relu', input_dim=(X_train.shape[1])))
|
||||
model.add(Dense(32, activation='relu'))
|
||||
model.add(Dense(1, activation='linear'))
|
||||
print("Stworzono sieć neuronową: \n")
|
||||
model.summary()
|
||||
model.compile(optimizer='adam', loss='mean_squared_error', metrics=["mae"])
|
||||
|
||||
epochs = 1500
|
||||
model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test))
|
||||
|
||||
print("Zakończono trenowanie sieci neuronowej z wykorzystaniem biblioteki Keras.")
|
||||
|
||||
predicted_prices = model.predict(X_test)
|
||||
|
||||
rmse = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices))
|
||||
mae = metrics.mean_absolute_error(y_test, predicted_prices)
|
||||
|
||||
print('RMSE: {:.2f} zł'.format(rmse))
|
||||
print('MAE: {:.2f} zł'.format(mae))
|
38
regression (linear and Random Forest Tree).py
Normal file
38
regression (linear and Random Forest Tree).py
Normal file
@ -0,0 +1,38 @@
|
||||
import pandas as pd
|
||||
import math
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn import metrics
|
||||
|
||||
df = pd.read_csv('data.csv')
|
||||
|
||||
scaler = StandardScaler()
|
||||
X = scaler.fit_transform(df.iloc[:, :-1])
|
||||
y = df.iloc[:, -1]
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
print("Podzielono zbiór na {} rekordów uczących i {} rekordów testowych".format(len(y_train), len(y_test)))
|
||||
|
||||
regressor_lin = LinearRegression()
|
||||
regressor_lin.fit(X_train, y_train)
|
||||
print("\nWyuczono model regresjii liniowej:")
|
||||
predicted_prices_lin = regressor_lin.predict(X_test)
|
||||
|
||||
rmse_lin = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_lin))
|
||||
mae_lin = metrics.mean_absolute_error(y_test, predicted_prices_lin)
|
||||
|
||||
print('RMSE: {:.2f} zł'.format(rmse_lin))
|
||||
print('MAE: {:.2f} zł'.format(mae_lin))
|
||||
|
||||
regressor_RF = RandomForestRegressor(random_state=8)
|
||||
regressor_RF.fit(X_train, y_train)
|
||||
print("\nWyuczono model regresji drzew Random Forest:")
|
||||
predicted_prices_RF = regressor_RF.predict(X_test)
|
||||
|
||||
rmse_RF = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_RF))
|
||||
mae_RF = metrics.mean_absolute_error(y_test, predicted_prices_RF)
|
||||
|
||||
print('RMSE: {:.2f} zł'.format(rmse_RF))
|
||||
print('MAE: {:.2f} zł'.format(mae_RF))
|
35
scraper.py
Normal file
35
scraper.py
Normal file
@ -0,0 +1,35 @@
|
||||
import pandas as pd
|
||||
|
||||
df_2311 = pd.read_csv('archive/apartments_pl_2023_11.csv')
|
||||
df_2312 = pd.read_csv('archive/apartments_pl_2023_12.csv')
|
||||
df_2401 = pd.read_csv('archive/apartments_pl_2024_01.csv')
|
||||
df_2402 = pd.read_csv('archive/apartments_pl_2024_02.csv')
|
||||
|
||||
|
||||
def pull_krakow(df):
|
||||
df_krakow = df[df["city"] == 'krakow']
|
||||
return df_krakow
|
||||
|
||||
|
||||
df_2311 = pull_krakow(df_2311)
|
||||
df_2312 = pull_krakow(df_2312)
|
||||
df_2401 = pull_krakow(df_2401)
|
||||
df_2402 = pull_krakow(df_2402)
|
||||
|
||||
df_concatenated = pd.concat([df_2311, df_2312, df_2401, df_2402], ignore_index=True)
|
||||
|
||||
sum = len(df_2311) + len(df_2312) + len(df_2401) + len(df_2402)
|
||||
print(sum)
|
||||
|
||||
df_no_duplicates = df_concatenated.drop_duplicates(subset = ["squareMeters", "rooms", "floor", "centreDistance", "price"])
|
||||
print(len(df_no_duplicates))
|
||||
|
||||
df_selected_columns = df_no_duplicates[["squareMeters", "rooms", "floor", "buildYear", "centreDistance", "poiCount", "price"]]
|
||||
|
||||
df_na_dropped = df_selected_columns.dropna()
|
||||
|
||||
print(len(df_na_dropped))
|
||||
|
||||
df_na_dropped.to_csv('data.csv', index=False)
|
||||
|
||||
print("Dane zapisane do data.csv.")
|
Loading…
Reference in New Issue
Block a user