Dodano nowe pliki do repozytorium

This commit is contained in:
Kamil Szostak 2024-02-25 17:26:20 +01:00
commit 0c8713c83d
22 changed files with 155413 additions and 0 deletions

3
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

10
.idea/ML9.iml Normal file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (ML9)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ML9.iml" filepath="$PROJECT_DIR$/.idea/ML9.iml" />
</modules>
</component>
</project>

BIN
Raport Kamil Szostak.docx Normal file

Binary file not shown.

BIN
Raport Kamil Szostak.pdf Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3848
data.csv Normal file

File diff suppressed because it is too large Load Diff

38
neural network.py Normal file
View File

@ -0,0 +1,38 @@
import pandas as pd
import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from sklearn import metrics
df = pd.read_csv('data.csv')
scaler = StandardScaler()
X = scaler.fit_transform(df.iloc[:, :-1])
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Podzielono zbiór na {} rekordów uczących i {} rekordów testowych".format(len(y_train), len(y_test)))
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=(X_train.shape[1])))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
print("Stworzono sieć neuronową: \n")
model.summary()
model.compile(optimizer='adam', loss='mean_squared_error', metrics=["mae"])
epochs = 1500
model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test))
print("Zakończono trenowanie sieci neuronowej z wykorzystaniem biblioteki Keras.")
predicted_prices = model.predict(X_test)
rmse = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices))
mae = metrics.mean_absolute_error(y_test, predicted_prices)
print('RMSE: {:.2f}'.format(rmse))
print('MAE: {:.2f}'.format(mae))

View File

@ -0,0 +1,38 @@
import pandas as pd
import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
df = pd.read_csv('data.csv')
scaler = StandardScaler()
X = scaler.fit_transform(df.iloc[:, :-1])
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Podzielono zbiór na {} rekordów uczących i {} rekordów testowych".format(len(y_train), len(y_test)))
regressor_lin = LinearRegression()
regressor_lin.fit(X_train, y_train)
print("\nWyuczono model regresjii liniowej:")
predicted_prices_lin = regressor_lin.predict(X_test)
rmse_lin = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_lin))
mae_lin = metrics.mean_absolute_error(y_test, predicted_prices_lin)
print('RMSE: {:.2f}'.format(rmse_lin))
print('MAE: {:.2f}'.format(mae_lin))
regressor_RF = RandomForestRegressor(random_state=8)
regressor_RF.fit(X_train, y_train)
print("\nWyuczono model regresji drzew Random Forest:")
predicted_prices_RF = regressor_RF.predict(X_test)
rmse_RF = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_RF))
mae_RF = metrics.mean_absolute_error(y_test, predicted_prices_RF)
print('RMSE: {:.2f}'.format(rmse_RF))
print('MAE: {:.2f}'.format(mae_RF))

35
scraper.py Normal file
View File

@ -0,0 +1,35 @@
import pandas as pd
df_2311 = pd.read_csv('archive/apartments_pl_2023_11.csv')
df_2312 = pd.read_csv('archive/apartments_pl_2023_12.csv')
df_2401 = pd.read_csv('archive/apartments_pl_2024_01.csv')
df_2402 = pd.read_csv('archive/apartments_pl_2024_02.csv')
def pull_krakow(df):
df_krakow = df[df["city"] == 'krakow']
return df_krakow
df_2311 = pull_krakow(df_2311)
df_2312 = pull_krakow(df_2312)
df_2401 = pull_krakow(df_2401)
df_2402 = pull_krakow(df_2402)
df_concatenated = pd.concat([df_2311, df_2312, df_2401, df_2402], ignore_index=True)
sum = len(df_2311) + len(df_2312) + len(df_2401) + len(df_2402)
print(sum)
df_no_duplicates = df_concatenated.drop_duplicates(subset = ["squareMeters", "rooms", "floor", "centreDistance", "price"])
print(len(df_no_duplicates))
df_selected_columns = df_no_duplicates[["squareMeters", "rooms", "floor", "buildYear", "centreDistance", "poiCount", "price"]]
df_na_dropped = df_selected_columns.dropna()
print(len(df_na_dropped))
df_na_dropped.to_csv('data.csv', index=False)
print("Dane zapisane do data.csv.")