Dodano nowe pliki do repozytorium
This commit is contained in:
commit
0c8713c83d
|
@ -0,0 +1,3 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,6 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
|
@ -0,0 +1,4 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (ML9)" project-jdk-type="Python SDK" />
|
||||
</project>
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/ML9.iml" filepath="$PROJECT_DIR$/.idea/ML9.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,38 @@
|
|||
import pandas as pd
|
||||
import math
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense
|
||||
from sklearn import metrics
|
||||
|
||||
|
||||
df = pd.read_csv('data.csv')
|
||||
|
||||
scaler = StandardScaler()
|
||||
X = scaler.fit_transform(df.iloc[:, :-1])
|
||||
y = df.iloc[:, -1]
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
print("Podzielono zbiór na {} rekordów uczących i {} rekordów testowych".format(len(y_train), len(y_test)))
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(32, activation='relu', input_dim=(X_train.shape[1])))
|
||||
model.add(Dense(32, activation='relu'))
|
||||
model.add(Dense(1, activation='linear'))
|
||||
print("Stworzono sieć neuronową: \n")
|
||||
model.summary()
|
||||
model.compile(optimizer='adam', loss='mean_squared_error', metrics=["mae"])
|
||||
|
||||
epochs = 1500
|
||||
model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test))
|
||||
|
||||
print("Zakończono trenowanie sieci neuronowej z wykorzystaniem biblioteki Keras.")
|
||||
|
||||
predicted_prices = model.predict(X_test)
|
||||
|
||||
rmse = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices))
|
||||
mae = metrics.mean_absolute_error(y_test, predicted_prices)
|
||||
|
||||
print('RMSE: {:.2f} zł'.format(rmse))
|
||||
print('MAE: {:.2f} zł'.format(mae))
|
|
@ -0,0 +1,38 @@
|
|||
import pandas as pd
|
||||
import math
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn import metrics
|
||||
|
||||
df = pd.read_csv('data.csv')
|
||||
|
||||
scaler = StandardScaler()
|
||||
X = scaler.fit_transform(df.iloc[:, :-1])
|
||||
y = df.iloc[:, -1]
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
print("Podzielono zbiór na {} rekordów uczących i {} rekordów testowych".format(len(y_train), len(y_test)))
|
||||
|
||||
regressor_lin = LinearRegression()
|
||||
regressor_lin.fit(X_train, y_train)
|
||||
print("\nWyuczono model regresjii liniowej:")
|
||||
predicted_prices_lin = regressor_lin.predict(X_test)
|
||||
|
||||
rmse_lin = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_lin))
|
||||
mae_lin = metrics.mean_absolute_error(y_test, predicted_prices_lin)
|
||||
|
||||
print('RMSE: {:.2f} zł'.format(rmse_lin))
|
||||
print('MAE: {:.2f} zł'.format(mae_lin))
|
||||
|
||||
regressor_RF = RandomForestRegressor(random_state=8)
|
||||
regressor_RF.fit(X_train, y_train)
|
||||
print("\nWyuczono model regresji drzew Random Forest:")
|
||||
predicted_prices_RF = regressor_RF.predict(X_test)
|
||||
|
||||
rmse_RF = math.sqrt(metrics.mean_squared_error(y_test, predicted_prices_RF))
|
||||
mae_RF = metrics.mean_absolute_error(y_test, predicted_prices_RF)
|
||||
|
||||
print('RMSE: {:.2f} zł'.format(rmse_RF))
|
||||
print('MAE: {:.2f} zł'.format(mae_RF))
|
|
@ -0,0 +1,35 @@
|
|||
import pandas as pd
|
||||
|
||||
df_2311 = pd.read_csv('archive/apartments_pl_2023_11.csv')
|
||||
df_2312 = pd.read_csv('archive/apartments_pl_2023_12.csv')
|
||||
df_2401 = pd.read_csv('archive/apartments_pl_2024_01.csv')
|
||||
df_2402 = pd.read_csv('archive/apartments_pl_2024_02.csv')
|
||||
|
||||
|
||||
def pull_krakow(df):
|
||||
df_krakow = df[df["city"] == 'krakow']
|
||||
return df_krakow
|
||||
|
||||
|
||||
df_2311 = pull_krakow(df_2311)
|
||||
df_2312 = pull_krakow(df_2312)
|
||||
df_2401 = pull_krakow(df_2401)
|
||||
df_2402 = pull_krakow(df_2402)
|
||||
|
||||
df_concatenated = pd.concat([df_2311, df_2312, df_2401, df_2402], ignore_index=True)
|
||||
|
||||
sum = len(df_2311) + len(df_2312) + len(df_2401) + len(df_2402)
|
||||
print(sum)
|
||||
|
||||
df_no_duplicates = df_concatenated.drop_duplicates(subset = ["squareMeters", "rooms", "floor", "centreDistance", "price"])
|
||||
print(len(df_no_duplicates))
|
||||
|
||||
df_selected_columns = df_no_duplicates[["squareMeters", "rooms", "floor", "buildYear", "centreDistance", "poiCount", "price"]]
|
||||
|
||||
df_na_dropped = df_selected_columns.dropna()
|
||||
|
||||
print(len(df_na_dropped))
|
||||
|
||||
df_na_dropped.to_csv('data.csv', index=False)
|
||||
|
||||
print("Dane zapisane do data.csv.")
|
Loading…
Reference in New Issue