71 KiB
71 KiB
import pandas as pd
import numpy as np
import joblib
df_train = pd.read_csv('train.tsv', sep='\t', header=None)
pd.set_option('display.max_columns', 500)
df_train.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 309000.0 | do zamieszkania | 390 zł | spółdzielcze własnościowe | 7113 | https://www.otodom.pl/oferta/niezalezny-uklad-... | 2 | NaN | 43.44 | wtórny | 4.0 | blok | NaN | NaN | Niezależny Układ W Nowoczesnym Wydaniu | 1 | NaN | gazowe | plastikowe | NaN | NaN | NaN | cegła | Polecamy na sprzedaż dwupokojowe mieszkanie p... | NaN | telewizja kablowa, internet, meble, piwnica, g... |
1 | 314900.0 | do wykończenia | NaN | pełna własność | 7392 | https://www.otodom.pl/oferta/urokliwe-mieszkan... | 2 | NaN | 42.60 | pierwotny | 2.0 | blok | NaN | NaN | Urokliwe mieszkanie 2 pokojowe Strzeszyn | 1 | NaN | gazowe | plastikowe | NaN | NaN | NaN | cegła | Kameralne 2 pokojowe mieszkanie z aneksem kuc... | NaN | telewizja kablowa, internet, telefon, drzwi / ... |
2 | 249000.0 | do remontu | 300 zł | pełna własność | 5621 | https://www.otodom.pl/oferta/mieszkanie-do-rem... | 2 | NaN | 44.30 | wtórny | 4.0 | blok | NaN | NaN | Mieszkanie do remontu ul. Klonowa, blisko tramwaj | 2 | NaN | miejskie | plastikowe | 1960.0 | NaN | 2019-06-30 | cegła | Oferta bezpośrednio od właściciela - bez pośr... | NaN | telewizja kablowa, internet, telefon, domofon ... |
3 | 419000.0 | do zamieszkania | 490 zł | pełna własność | 4761 | https://www.otodom.pl/oferta/w-szeregowcu-4-po... | 4 | NaN | 88 | wtórny | 3.0 | szeregowiec | NaN | NaN | W szeregowcu 4 pokoje z garażem | 1 | NaN | gazowe | plastikowe | NaN | NaN | NaN | cegła | Drodzy Państwo Zapraszam do zapoznania się z ... | NaN | telewizja kablowa, internet, zmywarka, lodówka... |
4 | 499000.0 | NaN | 850 zł | NaN | 6481 | https://www.otodom.pl/oferta/komfortowe-przest... | 3 | NaN | 77 | wtórny | 16.0 | blok | NaN | NaN | Komfortowe,Przestronne,3Pokoje, Armii Krajowej!!! | 7 | NaN | NaN | plastikowe | NaN | NaN | NaN | NaN | Biuro Immohouse ma przyjemność proponować do ... | NaN | balkon, piwnica, winda |
1,2,6, 8, 9 - to bedziemy w aplikacji używać
df_train = df_train.iloc[:, [0, 1, 2, 6, 8, 9]]
df_train.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2547 entries, 0 to 2546 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 0 2547 non-null float64 1 1 1519 non-null object 2 2 929 non-null object 3 6 2547 non-null object 4 8 2547 non-null object 5 9 2547 non-null object dtypes: float64(1), object(5) memory usage: 119.5+ KB
df_train.rename(columns={0: "cena m", 1: "stan", 2: "czynsz", 6: "l pokoi", 8: "metraż", 9: "rynek"}, inplace=True)
df_train.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2547 entries, 0 to 2546 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 cena m 2547 non-null float64 1 stan 1519 non-null object 2 czynsz 929 non-null object 3 l pokoi 2547 non-null object 4 metraż 2547 non-null object 5 rynek 2547 non-null object dtypes: float64(1), object(5) memory usage: 119.5+ KB
Usunięcie kolumn, w których jest dużo pustych wartości
df_train.drop(columns = ['czynsz'], axis=1, inplace=True)
df_train.head()
cena m | stan | l pokoi | metraż | rynek | |
---|---|---|---|---|---|
0 | 309000.0 | do zamieszkania | 2 | 43.44 | wtórny |
1 | 314900.0 | do wykończenia | 2 | 42.60 | pierwotny |
2 | 249000.0 | do remontu | 2 | 44.30 | wtórny |
3 | 419000.0 | do zamieszkania | 4 | 88 | wtórny |
4 | 499000.0 | NaN | 3 | 77 | wtórny |
Uzupełnianie braków w danych
df_train.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2547 entries, 0 to 2546 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 cena m 2547 non-null float64 1 stan 1519 non-null object 2 l pokoi 2547 non-null object 3 metraż 2547 non-null object 4 rynek 2547 non-null object dtypes: float64(1), object(4) memory usage: 99.6+ KB
df_train['stan'].fillna('brak info', inplace=True)
df_train = df_train[df_train['metraż'] != "6 909"]
df_train = df_train[df_train['l pokoi'] != "więcej niż 10"]
df_train['l pokoi'] = pd.to_numeric(df_train['l pokoi'])
df_train['metraż'] = pd.to_numeric(df_train['metraż'])
X_train = df_train.drop(columns = ["cena m"])
y_train = df_train["cena m"]
number_cols = X_train.select_dtypes(exclude=[object]).columns
object_cols = X_train.select_dtypes(include=[object]).columns
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
ct = ColumnTransformer([
(number_cols, StandardScaler()),
(object_cols, OneHotEncoder())
],remainder='passthrough')
from sklearn.linear_model import Ridge
scaler = StandardScaler()
encoder = OneHotEncoder()
# putting numeric columns to scaler and categorical to encoder
num_transformer = make_pipeline(scaler)
cat_transformer = make_pipeline(encoder)
# getting together our scaler and encoder with preprocessor
preprocessor = ColumnTransformer(
transformers=[('num', num_transformer, number_cols),
('cat', cat_transformer, object_cols)])
clf = Ridge(alpha=1.0)
model1 = make_pipeline(preprocessor, clf)
X_train.head()
stan | l pokoi | metraż | rynek | |
---|---|---|---|---|
0 | do zamieszkania | 2 | 43.44 | wtórny |
1 | do wykończenia | 2 | 42.60 | pierwotny |
2 | do remontu | 2 | 44.30 | wtórny |
3 | do zamieszkania | 4 | 88.00 | wtórny |
4 | brak info | 3 | 77.00 | wtórny |
y_train
0 309000.0 1 314900.0 2 249000.0 3 419000.0 4 499000.0 ... 2541 383680.0 2542 507600.0 2543 342400.0 2544 335000.0 2545 260000.0 Name: cena m, Length: 2540, dtype: float64
Model Ridge - regresja liniowa
model1.fit(X_train, y_train)
Pipeline(steps=[('columntransformer', ColumnTransformer(transformers=[('num', Pipeline(steps=[('standardscaler', StandardScaler())]), Index(['l pokoi', 'metraż'], dtype='object')), ('cat', Pipeline(steps=[('onehotencoder', OneHotEncoder())]), Index(['stan', 'rynek'], dtype='object'))])), ('ridge', Ridge())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('columntransformer', ColumnTransformer(transformers=[('num', Pipeline(steps=[('standardscaler', StandardScaler())]), Index(['l pokoi', 'metraż'], dtype='object')), ('cat', Pipeline(steps=[('onehotencoder', OneHotEncoder())]), Index(['stan', 'rynek'], dtype='object'))])), ('ridge', Ridge())])
ColumnTransformer(transformers=[('num', Pipeline(steps=[('standardscaler', StandardScaler())]), Index(['l pokoi', 'metraż'], dtype='object')), ('cat', Pipeline(steps=[('onehotencoder', OneHotEncoder())]), Index(['stan', 'rynek'], dtype='object'))])
Index(['l pokoi', 'metraż'], dtype='object')
StandardScaler()
Index(['stan', 'rynek'], dtype='object')
OneHotEncoder()
Ridge()
df_test = pd.read_csv('in.tsv', sep='\t', header=None)
df_test.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | do zamieszkania | 250 zł | pełna własność | 6311 | https://www.otodom.pl/oferta/idealne-mieszkani... | 3 | NaN | 59.10 | wtórny | 4.0 | blok | NaN | NaN | idealne mieszkanie do zamieszkania od zaraz | 2 | NaN | miejskie | plastikowe | NaN | NaN | NaN | wielka płyta | Zamieszkaj od zaraz – idealna lokalizacja OFE... | NaN | telewizja kablowa, internet, telefon, domofon ... |
1 | do zamieszkania | NaN | pełna własność | 7868 | https://www.otodom.pl/oferta/2-pokoje-38m2-po-... | 2 | NaN | 38.00 | wtórny | 12.0 | blok | NaN | NaN | 2 pokoje / 38m2 / po remoncie / Winogrady | 4 | NaN | inne | plastikowe | NaN | NaN | NaN | NaN | Na sprzedaż mieszkanie dwupokojowe na osiedlu... | NaN | balkon |
2 | do zamieszkania | 650 zł | pełna własność | 5717 | https://www.otodom.pl/oferta/3-pokoje-na-jezyc... | 3 | NaN | 63.84 | wtórny | 4.0 | blok | NaN | NaN | 3 pokoje na Jeżycach blisko Rusałki | 4 | NaN | miejskie | plastikowe | 1958.0 | NaN | NaN | cegła | !! Oferta dostępna tylko u nas !! Polecam ofe... | NaN | telewizja kablowa, internet, telefon, piwnica,... |
3 | do zamieszkania | 359 zł | spółdzielcze własnościowe | 7380 | https://www.otodom.pl/oferta/uniwersytet-przyr... | 4 | NaN | 50.00 | wtórny | 10.0 | blok | NaN | NaN | Uniwersytet Przyrodniczy - 4 pokoje - ROI 8-10% | 10 | NaN | miejskie | plastikowe | 1975.0 | NaN | 2019-06-30 | wielka płyta | Oferta bezpośrednio od właściciela - bez po... | NaN | telewizja kablowa, internet, telefon, drzwi / ... |
4 | NaN | NaN | NaN | 7373 | https://www.otodom.pl/oferta/mieszkanie-blisko... | 3 | NaN | 65.62 | pierwotny | 3.0 | NaN | NaN | NaN | Mieszkanie blisko centrum w kameralnej okolicy. | 1 | NaN | NaN | NaN | 2020.0 | NaN | 2020-01-01 | NaN | MIESZKANIE 3-POKOJOWE O POW.65,62M2 Mieszkani... | NaN | garaż/miejsce parkingowe, taras, pom. użytkowe |
y_test = pd.read_csv('expected.tsv', sep='\t', header=None)
X_train
stan | l pokoi | metraż | rynek | |
---|---|---|---|---|
0 | do zamieszkania | 2 | 43.44 | wtórny |
1 | do wykończenia | 2 | 42.60 | pierwotny |
2 | do remontu | 2 | 44.30 | wtórny |
3 | do zamieszkania | 4 | 88.00 | wtórny |
4 | brak info | 3 | 77.00 | wtórny |
... | ... | ... | ... | ... |
2541 | do wykończenia | 3 | 70.40 | pierwotny |
2542 | do wykończenia | 4 | 94.00 | wtórny |
2543 | brak info | 2 | 53.50 | wtórny |
2544 | brak info | 3 | 55.25 | wtórny |
2545 | brak info | 3 | 62.00 | wtórny |
2540 rows × 4 columns
df_test.rename(columns={0: "stan", 1: "czynsz", 5: "l pokoi", 7: "metraż", 8: "rynek"}, inplace=True)
df_test.columns
Index([ 'stan', 'czynsz', 2, 3, 4, 'l pokoi', 6, 'metraż', 'rynek', 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], dtype='object')
df_test = df_test[['stan', 'l pokoi', 'metraż', 'rynek']]
df_test.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 462 entries, 0 to 461 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 stan 275 non-null object 1 l pokoi 462 non-null int64 2 metraż 462 non-null float64 3 rynek 462 non-null object dtypes: float64(1), int64(1), object(2) memory usage: 14.6+ KB
df_test['stan'].fillna('brak info', inplace=True)
df_test.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 462 entries, 0 to 461 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 stan 462 non-null object 1 l pokoi 462 non-null int64 2 metraż 462 non-null float64 3 rynek 462 non-null object dtypes: float64(1), int64(1), object(2) memory usage: 14.6+ KB
df_test['metraż']
0 59.10 1 38.00 2 63.84 3 50.00 4 65.62 ... 457 72.78 458 51.23 459 54.16 460 90.10 461 71.90 Name: metraż, Length: 462, dtype: float64
df_test.reset_index(drop=True, inplace=True)
df_test.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 462 entries, 0 to 461 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 stan 462 non-null object 1 l pokoi 462 non-null int64 2 metraż 462 non-null float64 3 rynek 462 non-null object dtypes: float64(1), int64(1), object(2) memory usage: 14.6+ KB
model1
Pipeline(steps=[('columntransformer', ColumnTransformer(transformers=[('num', Pipeline(steps=[('standardscaler', StandardScaler())]), Index(['l pokoi', 'metraż'], dtype='object')), ('cat', Pipeline(steps=[('onehotencoder', OneHotEncoder())]), Index(['stan', 'rynek'], dtype='object'))])), ('ridge', Ridge())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('columntransformer', ColumnTransformer(transformers=[('num', Pipeline(steps=[('standardscaler', StandardScaler())]), Index(['l pokoi', 'metraż'], dtype='object')), ('cat', Pipeline(steps=[('onehotencoder', OneHotEncoder())]), Index(['stan', 'rynek'], dtype='object'))])), ('ridge', Ridge())])
ColumnTransformer(transformers=[('num', Pipeline(steps=[('standardscaler', StandardScaler())]), Index(['l pokoi', 'metraż'], dtype='object')), ('cat', Pipeline(steps=[('onehotencoder', OneHotEncoder())]), Index(['stan', 'rynek'], dtype='object'))])
Index(['l pokoi', 'metraż'], dtype='object')
StandardScaler()
Index(['stan', 'rynek'], dtype='object')
OneHotEncoder()
Ridge()
Przykład - przewidywanie cen na danych testowych
model1.predict(df_test)
array([ 407917.66889452, 285306.27863261, 444184.94702291, 299459.05926666, 459271.83408962, 614055.95902743, 311243.98561031, 361244.58944658, 329238.65906305, 369470.84812889, 285350.13919824, 329238.65906305, 420020.53940636, 399085.99304054, 368717.32379097, 339834.65864991, 335512.74515277, 304524.88092914, 367036.20209137, 286773.80808993, 354525.20168926, 282085.35025252, 386303.76202814, 477660.29347794, 407152.53644455, 490742.17175818, 529758.94244497, 335044.40697838, 399501.21194489, 431641.29394143, 331214.22563058, 334772.89329194, 404282.20491201, 369968.98599038, 392538.22357404, 354476.13872373, 531690.53563375, 428309.25045513, 298510.41733766, 551966.54555785, 332974.0302655 , 474293.99377424, 247049.65613431, 383931.85143325, 546789.20856336, 438766.28443053, 275556.44383868, 325403.7381088 , 319934.12593671, 277654.95413295, 294800.60980049, 285306.27863261, 283776.01373268, 282811.35937324, 511210.54963994, 327121.56879228, 335039.88788041, 638797.47051695, 521922.40393947, 235305.67479634, 238447.52001532, 465392.89368935, 272046.09327325, 848434.43816552, 512546.27689188, 382253.07914128, 781492.5027453 , 531716.09929903, 442847.05008064, 498968.43044048, 272108.82871587, 275556.44383868, 307110.38376632, 373296.51037872, 320469.71865169, 629040.94693472, 456886.14569235, 463535.64405524, 356361.80264533, 387288.89243253, 249184.65490398, 305443.0398691 , 312129.77494706, 323562.90113092, 286773.80808993, 338640.52949322, 265910.97279499, 414803.86094421, 262352.30513363, 273562.29729465, 959123.60494806, 476401.54226445, 463695.54142089, 474293.99377424, 245933.92683679, 276309.90323886, 95163.71086422, 410945.54601501, 377887.30507851, 332477.49594204, 374627.25336918, 354168.19912956, 369968.98599038, 311900.23521207, 275897.03611221, 272224.40035237, 522497.33812211, 317135.7885512 , 317671.38126618, 357993.86137939, 352637.93422963, 908358.15173798, 288551.8042999 , 398779.94006055, 279465.70834759, 310024.57586448, 367940.58322895, 336039.07916333, 295296.8610478 , 395408.55510658, 289706.87506509, 461286.74212482, 959007.7502354 , 671918.16381992, 357151.93260828, 573837.44243868, 286071.41108258, 453635.41762516, 294033.30766019, 430106.50994353, 361819.52362922, 280070.72529348, 820481.6050142 , 349767.60269709, 364114.92097912, 262468.15984629, 301183.86181458, 297937.65284535, 285306.27863261, 393955.0865278 , 666659.89980777, 560179.02643777, 440551.36965458, 544315.22105819, 460711.80794218, 281978.75424428, 380105.90610726, 501187.31454538, 343001.78445802, 567830.35093743, 262544.39001514, 332442.95664849, 703770.99332145, 369470.84812889, 494101.3824478 , 273562.29729465, 180666.17730277, 452295.35099255, 331304.51667796, 453635.41762516, 323108.3407589 , 507194.68912278, 326474.92353875, 334772.89329194, 335044.40697838, 240054.29816025, 562052.51609502, 274404.22606576, 375397.37008062, 253585.25133645, 311900.23521207, 365811.99017143, 339834.65864991, 395408.55510658, 567728.27402715, 336238.53613507, 490526.69290172, 257086.66903124, 385858.46036054, 344082.22859239, 533220.80053368, 295941.33661099, 220577.95997966, 291229.76787478, 200262.6085879 , 281213.62179431, 338640.52949322, 267288.49428108, 439786.23720462, 400076.14612753, 301183.86181458, 479730.95326697, 319470.24429261, 470581.73347059, 336354.39084773, 491317.10594082, 354168.19912956, 369551.88047185, 435462.4370933 , 514271.0794398 , 331214.22563058, 234999.90489251, 240207.32465024, 395408.55510658, 305290.01337911, 265336.03861235, 349500.60810862, 315911.57663126, 448585.54345538, 310063.91733215, 414803.86094421, 521922.40393947, 768450.42872618, 285306.27863261, 414228.92676157, 408188.43671135, 262917.91567413, 530071.86630062, 284993.07170081, 354168.19912956, 350852.56592942, 341174.72528252, 174670.0129817 , 346185.25798463, 308617.25469129, 319206.16526408, 479496.61135786, 285306.27863261, 338832.89745087, 238677.05975031, 300193.70872759, 284350.94791532, 312129.77494706, 288864.94629397, 447136.31089841, 468759.4764692 , 311243.98561031, 773841.17824564, 309069.24514719, 311818.91979295, 303300.6690092 , 249129.2915516 , 317026.62262684, 270578.56381593, 374826.77527865, 317632.03979852, 218540.23357609, 336303.15819187, 364613.05884061, 419438.51620964, 348842.0716669 , 300494.95953345, 245456.65579176, 632191.76778196, 342245.91071247, 321241.94011573, 345941.94044726, 654441.17458326, 594917.92628041, 403059.87960624, 295368.85519483, 287144.48312671, 504057.36300177, 259834.05683704, 356196.60189099, 671431.17127702, 221915.85692194, 266766.67936848, 862139.34958591, 253061.54980967, 278229.88831559, 360787.39659078, 819749.12524361, 225938.68930354, 337453.4893506 , 417419.08907647, 433477.61182135, 428415.84646337, 369470.84812889, 644948.32980378, 369778.78772305, 486728.1978552 , 220003.02579702, 372225.04187261, 533707.33028312, 268312.96615313, 351337.20906469, 266766.67936848, 433665.17760489, 291427.33823234, 681260.15179974, 386155.25463611, 361819.52362922, 378283.64910587, 312584.05224292, 262442.87925716, 347027.18675575, 388599.15937804, 330653.06925032, 331214.22563058, 392424.82162787, 581111.00288747, 510178.4226015 , 638681.61580429, 273562.29729465, 332934.68879784, 370402.78487122, 251183.25797831, 454520.92388575, 327814.99017136, 317446.36062915, 270745.3681083 , 568020.54920476, 1041637.53573378, 445984.0931255 , 246474.72195167, 353326.27035845, 456478.01580208, 268245.71161255, 476014.4569415 , 305276.23557673, 590784.32443641, 441316.50210455, 265910.97279499, 319470.24429261, 266451.36768408, 331214.22563058, 327784.90740811, 319470.24429261, 244403.66193686, 252780.99555724, 268449.97075841, 912139.95342218, 430604.64780502, 536459.92048882, 1156407.40322869, 395408.55510658, 221600.76337596, 327898.59243044, 237907.12512623, 498752.66850787, 468835.9897142 , 350852.56592942, 300033.9934493 , 232365.80188326, 533897.52855045, 232365.80188326, 369329.42975094, -118069.19958188, 317722.33084589, 323601.95952243, 369203.85354041, 268053.34365489, 1051697.65952951, 254248.58995229, 303300.6690092 , 273638.81053964, 244461.9836068 , 258111.42397945, 388332.16478956, 429074.38290509, 536459.92048882, 800353.81940598, 298918.26415178, 407917.66889452, 388332.16478956, 414613.66267689, 1262607.50420783, 369329.42975094, 319129.65201908, 334197.9591093 , 286263.77904024, 307993.25754312, 741174.54173005, 491064.17223088, 298429.10191855, 292957.60313228, 315911.57663126, 327478.85442812, 451544.27942111, 317135.7885512 , 724682.50318048, 509137.53807322, 361833.3014316 , 693810.21059336, 582168.1274389 , 212609.37224369, 260216.62306202, 710680.29626995, 264840.07044119, 300877.52575844, 309268.70211893, 389929.9023685 , 464347.27192468, 347409.46990458, 693810.21059336, 468363.13244184, 427977.91688601, 251373.45624563, 427817.80138193, 352408.11141849, 728508.16543031, 749166.7415794 , 558648.76153784, 450512.15238267, 571124.93957025, 592161.2797702 , 418937.46278821, 319398.53322173, 415378.79512685, 600960.30294481, 263047.6131269 , 364648.06092761, 325259.97032322, 589476.22718124, 599471.54920288, 983410.6732152 , 405432.07327729, 442158.14779951, 491279.65108734, 636692.27143438, 311900.23521207, 186643.53405465, 454252.04268309, 348902.56302718, 335848.88089601, 377701.62590915, 377701.62590915, 353370.41400023, 376477.41398921, 560336.57202573, 556969.98924588, 402990.3382257 , 568449.1456857 , 448323.35104102, 517259.61509264, 688264.08517628, 606585.11129723, 1137012.09739106, 327010.51625373, 361028.82751397, 628428.55789859, 413554.08535898, 341098.21203752, 401415.92968398, 455318.42593893, 1619425.93740436, 451114.99963823, 267139.98688905, 342658.5597007 , 304976.87138504, 438573.91647287, 304976.87138504, 392508.02366116, 458652.63911557, 547637.54304662, 524873.76781497, 508805.98636568, 382751.50007893, 366338.32418199, 645108.72838401, 532942.19783316])
df_test.columns
Index(['stan', 'l pokoi', 'metraż', 'rynek'], dtype='object')
X_train['rynek'].unique()
array(['wtórny', 'pierwotny'], dtype=object)
Saving the model to file
filename = 'ridge_model.sav'
joblib.dump(model1, filename)
['ridge_model.sav']