Adding pipeline.py

This commit is contained in:
Zofia Zientek 2023-10-24 18:38:02 +02:00
parent 631e775bbe
commit 353a056281
9 changed files with 1570 additions and 0 deletions

Binary file not shown.

Binary file not shown.

462
dev-0/expected.tsv Normal file
View File

@ -0,0 +1,462 @@
373000
299000
365000
369000
483791
430000
312000
397000
302672
325000
302800
300495
375038
437131
373761
337567
359320
326407
294819
266000
355720.5
288000
239000
385000
349000
804500
520000
315441
339000
429000
325000
269000
420000
400000
420000
385000
617232
399000
328338
596232
339000
293000
289000
330000
499000
419000
219598
341670
351259
319000
211426
309000
299000
349000
508000
270000
333750
724120
485000
274000
242775
415125
269000
535000
497000
357124
830000
675000
399000
550000
245000
219598
365000
435000
254231
359000
485000
469758
382740
499000
279000
283080
298200
339000
239900
341145
310000
369000
327000
259000
1308456
434358
449000
293000
203000
273000
175000
368938
299000
415000
333000
345000
350000
297987
290696.04
293571
459000
355000
269000
345000
300000
1156756
242943.1
486600
305370
318240
340000
316000
300500
335000
227000
399000
1100000
389000
370000
659000
249000
555000
315205
349000
385000
397005
469000
490000
316900
254500
280000
294588
275000
369000
857514
560000
310000
596232
459000
238000
440000
499000
327104
650000
235000
338200
412000
329000
609631
309000
250000
469000
311122
450000
299000
499000
330400
295000
315500
254500
467700
393750
530000
270000
297987
360400
337567
380000
568600
322010
490824
269548
361745
359371
579900
340000
199000
255000
255000
245000
303204
290000
349000
360000
350000
469064
280000
417000
318032
579000
320000
382060
379000
420000
250000
211500
254280
383000
355696
249000
359000
429000
605000
325949
340000
420000
579215
304900
380000
229000
325000
675000
409000
355000
388447.5
345015
199000
320292
300841
313000
566999
275000
359371
253000
262000
230000
299000
399000
453040
479000
312000
1100000
308196
355000
336842
248100
352894
242000
330000
315000
299900
454000
325000
499900
369000
429000
244500
661650
357475
321165
330000
404900
1240000
696000
295000
295000
417000
242300
285000
980000
229000
259786
447496.2
239617.2
260000
387000
799000
238638
354944
521683
506600
506363
299000
844990
213000
324900
261000
349000
480000
283000
430000
259786
339000
299000
530000
396683
329000
420863
299000
276458
350286
515000
341670
369000
322000
761976.07
389000
459999
429000
425000
286000
270000
561636.5
550625
324836
221976
699000
579000
514000
282000
345000
534508
299000
545000
250000
379000
269000
299000
329000
249976
430000
303086
303000
238400
339000
241500
1100000
399000
530000
972000
359000
250000
329460
239000
490110
468120
378157
285000
235850
499000
235850
365428
670000
320000
279000
388000
324990
570000
348796
287144
272000
267877
255062
650000
364000
530000
570000
284000
339000
435000
280000
1300000
365428
295000
360000
205000
319000
736450
453040
280500
299000
330000
337110
520000
395000
595000
375000
358575
588000
1025455
245000
238500
1167400
249000
441000
239000
455000
640000
344250
400000
485000
543617
276000
428400
360315
595000
595000
535000
534600
330000
627810
439000
304945
699000
712164
250000
449000
284859
595350
259000
590000
355000
389000
429000
649000
297987
159761
505505
336676
272764
343876
336290.5
308035
335226.5
532317
525937.5
397720
702150
592020
671488
593000
399000
1800000
288728
393211
789325
453040
345015
519745
400920
2318580
469728
247154
328828.5
260927.5
482729
257328.5
305923.5
529623
641395.58
669606.91
655544.02
471397.97
309958
699000
850000
1 373000
2 299000
3 365000
4 369000
5 483791
6 430000
7 312000
8 397000
9 302672
10 325000
11 302800
12 300495
13 375038
14 437131
15 373761
16 337567
17 359320
18 326407
19 294819
20 266000
21 355720.5
22 288000
23 239000
24 385000
25 349000
26 804500
27 520000
28 315441
29 339000
30 429000
31 325000
32 269000
33 420000
34 400000
35 420000
36 385000
37 617232
38 399000
39 328338
40 596232
41 339000
42 293000
43 289000
44 330000
45 499000
46 419000
47 219598
48 341670
49 351259
50 319000
51 211426
52 309000
53 299000
54 349000
55 508000
56 270000
57 333750
58 724120
59 485000
60 274000
61 242775
62 415125
63 269000
64 535000
65 497000
66 357124
67 830000
68 675000
69 399000
70 550000
71 245000
72 219598
73 365000
74 435000
75 254231
76 359000
77 485000
78 469758
79 382740
80 499000
81 279000
82 283080
83 298200
84 339000
85 239900
86 341145
87 310000
88 369000
89 327000
90 259000
91 1308456
92 434358
93 449000
94 293000
95 203000
96 273000
97 175000
98 368938
99 299000
100 415000
101 333000
102 345000
103 350000
104 297987
105 290696.04
106 293571
107 459000
108 355000
109 269000
110 345000
111 300000
112 1156756
113 242943.1
114 486600
115 305370
116 318240
117 340000
118 316000
119 300500
120 335000
121 227000
122 399000
123 1100000
124 389000
125 370000
126 659000
127 249000
128 555000
129 315205
130 349000
131 385000
132 397005
133 469000
134 490000
135 316900
136 254500
137 280000
138 294588
139 275000
140 369000
141 857514
142 560000
143 310000
144 596232
145 459000
146 238000
147 440000
148 499000
149 327104
150 650000
151 235000
152 338200
153 412000
154 329000
155 609631
156 309000
157 250000
158 469000
159 311122
160 450000
161 299000
162 499000
163 330400
164 295000
165 315500
166 254500
167 467700
168 393750
169 530000
170 270000
171 297987
172 360400
173 337567
174 380000
175 568600
176 322010
177 490824
178 269548
179 361745
180 359371
181 579900
182 340000
183 199000
184 255000
185 255000
186 245000
187 303204
188 290000
189 349000
190 360000
191 350000
192 469064
193 280000
194 417000
195 318032
196 579000
197 320000
198 382060
199 379000
200 420000
201 250000
202 211500
203 254280
204 383000
205 355696
206 249000
207 359000
208 429000
209 605000
210 325949
211 340000
212 420000
213 579215
214 304900
215 380000
216 229000
217 325000
218 675000
219 409000
220 355000
221 388447.5
222 345015
223 199000
224 320292
225 300841
226 313000
227 566999
228 275000
229 359371
230 253000
231 262000
232 230000
233 299000
234 399000
235 453040
236 479000
237 312000
238 1100000
239 308196
240 355000
241 336842
242 248100
243 352894
244 242000
245 330000
246 315000
247 299900
248 454000
249 325000
250 499900
251 369000
252 429000
253 244500
254 661650
255 357475
256 321165
257 330000
258 404900
259 1240000
260 696000
261 295000
262 295000
263 417000
264 242300
265 285000
266 980000
267 229000
268 259786
269 447496.2
270 239617.2
271 260000
272 387000
273 799000
274 238638
275 354944
276 521683
277 506600
278 506363
279 299000
280 844990
281 213000
282 324900
283 261000
284 349000
285 480000
286 283000
287 430000
288 259786
289 339000
290 299000
291 530000
292 396683
293 329000
294 420863
295 299000
296 276458
297 350286
298 515000
299 341670
300 369000
301 322000
302 761976.07
303 389000
304 459999
305 429000
306 425000
307 286000
308 270000
309 561636.5
310 550625
311 324836
312 221976
313 699000
314 579000
315 514000
316 282000
317 345000
318 534508
319 299000
320 545000
321 250000
322 379000
323 269000
324 299000
325 329000
326 249976
327 430000
328 303086
329 303000
330 238400
331 339000
332 241500
333 1100000
334 399000
335 530000
336 972000
337 359000
338 250000
339 329460
340 239000
341 490110
342 468120
343 378157
344 285000
345 235850
346 499000
347 235850
348 365428
349 670000
350 320000
351 279000
352 388000
353 324990
354 570000
355 348796
356 287144
357 272000
358 267877
359 255062
360 650000
361 364000
362 530000
363 570000
364 284000
365 339000
366 435000
367 280000
368 1300000
369 365428
370 295000
371 360000
372 205000
373 319000
374 736450
375 453040
376 280500
377 299000
378 330000
379 337110
380 520000
381 395000
382 595000
383 375000
384 358575
385 588000
386 1025455
387 245000
388 238500
389 1167400
390 249000
391 441000
392 239000
393 455000
394 640000
395 344250
396 400000
397 485000
398 543617
399 276000
400 428400
401 360315
402 595000
403 595000
404 535000
405 534600
406 330000
407 627810
408 439000
409 304945
410 699000
411 712164
412 250000
413 449000
414 284859
415 595350
416 259000
417 590000
418 355000
419 389000
420 429000
421 649000
422 297987
423 159761
424 505505
425 336676
426 272764
427 343876
428 336290.5
429 308035
430 335226.5
431 532317
432 525937.5
433 397720
434 702150
435 592020
436 671488
437 593000
438 399000
439 1800000
440 288728
441 393211
442 789325
443 453040
444 345015
445 519745
446 400920
447 2318580
448 469728
449 247154
450 328828.5
451 260927.5
452 482729
453 257328.5
454 305923.5
455 529623
456 641395.58
457 669606.91
458 655544.02
459 471397.97
460 309958
461 699000
462 850000

462
dev-0/in.tsv Normal file

File diff suppressed because one or more lines are too long

106
dev-0/out.tsv Normal file
View File

@ -0,0 +1,106 @@
438785.2913972244
439057.23099919874
386274.2118016632
386722.31769914605
366094.3344721196
516777.10211508814
468621.88667045534
349948.5366470157
444966.9443439897
349657.9314571674
281814.65768045397
380230.24610083
533761.0395582682
284298.6286792335
302760.82473539945
294494.6957395293
303626.5505932587
584528.6660046072
784700.0575206266
752671.6754240954
516071.1433928942
416107.9319851352
381330.4301660287
588211.6199853121
480981.67140597175
484859.78185138875
311068.7302960584
315949.9297491256
257801.58826774202
142550.63848562422
362860.87012622226
388171.5324259206
478092.2326986545
328405.01758869365
380134.098031612
381587.1579842053
288324.8145994118
432696.6726767714
318130.32826012623
435187.4588356118
424704.3103526968
354259.9454763391
390066.9031165759
352230.25542972225
262434.35465128557
286093.8046546493
278652.1534394977
305132.6744196697
379476.8952788627
523803.4038821715
472164.2783154694
557197.9049793513
385707.6208490075
249770.74994324776
447297.85237721645
473926.4254470614
551525.0612915668
455249.75394877745
277287.1035815221
377954.25547762506
265071.3019075517
390541.15477277193
340022.347779955
331980.58217403764
247821.5699980029
335310.7744164435
426980.41512499726
424797.4845629018
505361.1298279477
460097.1617031188
284709.2873732758
335647.83233635745
270052.19102661626
290966.45832854987
279396.1721889902
323414.4972144816
727959.0986630493
390603.4021630825
340741.933773114
264762.50193169154
403417.3623101625
392590.4807311653
387039.147268873
276373.95325510646
298303.8143283458
779346.0980894796
290662.80052611313
353715.6417229106
303551.4737648836
668522.8530199206
398220.6541869035
339215.5191208159
394305.8946513819
355238.0213504333
310163.81687300355
348995.790840848
65517.75407795762
306231.0218035679
325183.35496851837
652223.7740641477
251438.3009618289
488597.6679408548
653295.0728505378
704240.8981666869
387418.8114984069
649756.6035774458
1 438785.2913972244
2 439057.23099919874
3 386274.2118016632
4 386722.31769914605
5 366094.3344721196
6 516777.10211508814
7 468621.88667045534
8 349948.5366470157
9 444966.9443439897
10 349657.9314571674
11 281814.65768045397
12 380230.24610083
13 533761.0395582682
14 284298.6286792335
15 302760.82473539945
16 294494.6957395293
17 303626.5505932587
18 584528.6660046072
19 784700.0575206266
20 752671.6754240954
21 516071.1433928942
22 416107.9319851352
23 381330.4301660287
24 588211.6199853121
25 480981.67140597175
26 484859.78185138875
27 311068.7302960584
28 315949.9297491256
29 257801.58826774202
30 142550.63848562422
31 362860.87012622226
32 388171.5324259206
33 478092.2326986545
34 328405.01758869365
35 380134.098031612
36 381587.1579842053
37 288324.8145994118
38 432696.6726767714
39 318130.32826012623
40 435187.4588356118
41 424704.3103526968
42 354259.9454763391
43 390066.9031165759
44 352230.25542972225
45 262434.35465128557
46 286093.8046546493
47 278652.1534394977
48 305132.6744196697
49 379476.8952788627
50 523803.4038821715
51 472164.2783154694
52 557197.9049793513
53 385707.6208490075
54 249770.74994324776
55 447297.85237721645
56 473926.4254470614
57 551525.0612915668
58 455249.75394877745
59 277287.1035815221
60 377954.25547762506
61 265071.3019075517
62 390541.15477277193
63 340022.347779955
64 331980.58217403764
65 247821.5699980029
66 335310.7744164435
67 426980.41512499726
68 424797.4845629018
69 505361.1298279477
70 460097.1617031188
71 284709.2873732758
72 335647.83233635745
73 270052.19102661626
74 290966.45832854987
75 279396.1721889902
76 323414.4972144816
77 727959.0986630493
78 390603.4021630825
79 340741.933773114
80 264762.50193169154
81 403417.3623101625
82 392590.4807311653
83 387039.147268873
84 276373.95325510646
85 298303.8143283458
86 779346.0980894796
87 290662.80052611313
88 353715.6417229106
89 303551.4737648836
90 668522.8530199206
91 398220.6541869035
92 339215.5191208159
93 394305.8946513819
94 355238.0213504333
95 310163.81687300355
96 348995.790840848
97 65517.75407795762
98 306231.0218035679
99 325183.35496851837
100 652223.7740641477
101 251438.3009618289
102 488597.6679408548
103 653295.0728505378
104 704240.8981666869
105 387418.8114984069
106 649756.6035774458

BIN
flat_model.pkl Normal file

Binary file not shown.

21
pipeline.py Normal file
View File

@ -0,0 +1,21 @@
import csv
import model_training as mt
import pandas as pd
import price_evaluation as pe
def eval(filename_in, filename_out):
data_eval = pd.read_table(filename_in, delimiter='\t', header=None)
data_eval.rename(columns={0: 'stan', 1: 'czynsz', 2: 'x3', 3: 'cenazam', 4: 'link', 5: 'pietro', 6: 'x7', 7: 'metraz', 8: 'rynek', 9: 'liczba pokoi', 10: 'budynek', 11: 'x12', 12: 'x13', 13: 'x14', 14: 'x15', 15: 'x16', 16: 'x17', 17: 'x18', 18: 'x19', 19: 'x20', 20: 'x21', 21: 'x22', 22: 'x23', 23: 'x24', 24: 'x25'}, inplace=True)
data_eval.drop(['x3','cenazam','link','pietro','budynek','x7','x12','x13','x14','x15','x16','x17','x18','x19','x20','x21','x22','x23','x24','x25'], inplace=True, axis=1)
data_eval['czynsz'] = data_eval['czynsz'].str.extract('(\d+)')
data_eval['stan'] = data_eval['stan'].map({'do zamieszkania': 2, 'do remontu': 1, 'do wykończenia': 2})
data_eval['rynek'] = data_eval['rynek'].map({'wtórny': 0, 'pierwotny': 1})
data_eval.dropna(inplace=True)
data_eval = data_eval[['stan', 'czynsz', 'liczba pokoi', 'metraz', 'rynek']]
out = mt.model.predict(data_eval)
pd.DataFrame(out).to_csv(filename_out, sep='\t', index=False, header=False)
eval('dev-0/in.tsv', 'dev-0/out.tsv')
eval('test-A/in.tsv', 'test-A/out.tsv')

418
test-A/in.tsv Normal file

File diff suppressed because one or more lines are too long

101
test-A/out.tsv Normal file
View File

@ -0,0 +1,101 @@
355128.4427831349
334338.4901026813
529063.9425151651
455553.4766766229
297897.73747289146
355097.23031301657
593121.4404591958
224053.5741490908
500224.3245957352
309386.0591294997
362595.45714443433
297020.13034301443
392628.7523117415
390740.85354551824
417512.4555268084
255871.19009756995
303906.69154232735
430706.6866501465
388055.513946502
199553.12140648713
324437.34934717487
287716.39837987634
326783.354171627
344969.65422220883
439943.4301322885
365651.3739072073
473541.93998109805
397190.011401333
339491.08093359147
361314.0333504934
906943.1251904051
379487.3807523678
330066.353096911
746684.0270939263
409639.6845630782
310685.77511363453
267903.0399920683
390741.63975717244
321296.753898358
329296.9204894126
259589.79429047485
383509.25625372084
605335.0230421544
453360.8062374862
275612.1870547221
328711.75370610604
409551.6896071418
161409.09137445962
481964.7347708574
308140.44034038077
211810.63872246095
199931.02910046442
470081.37468967284
293605.6384494368
410235.8627761756
473525.4554782605
766906.700454194
304833.8161031428
291768.67322025984
246247.99513514055
228514.897144243
328736.21085431206
292199.0189778785
1774178.7986119075
371416.8287964024
331230.87479559705
334673.1883525122
380762.55288023036
472908.3991528695
1402111.6523221093
430163.1097101653
253050.0648098148
777659.7566094768
264887.0264718806
538378.2928022667
437593.33760668256
485300.95596167
586242.5075015154
514807.9896312915
510207.3597893864
353014.32668722013
462445.0163546961
271161.07258611044
325183.35496851837
338231.7490905717
365688.57318596356
335687.09607300966
258097.40319777536
589911.7328044499
332230.7078283557
469659.0989816049
242480.32092807116
430706.6866501465
330888.9738887339
695028.778686561
309179.47218438284
314151.65667571756
473926.4254470614
363344.6070129465
198848.53877465893
393844.61698660883
1 355128.4427831349
2 334338.4901026813
3 529063.9425151651
4 455553.4766766229
5 297897.73747289146
6 355097.23031301657
7 593121.4404591958
8 224053.5741490908
9 500224.3245957352
10 309386.0591294997
11 362595.45714443433
12 297020.13034301443
13 392628.7523117415
14 390740.85354551824
15 417512.4555268084
16 255871.19009756995
17 303906.69154232735
18 430706.6866501465
19 388055.513946502
20 199553.12140648713
21 324437.34934717487
22 287716.39837987634
23 326783.354171627
24 344969.65422220883
25 439943.4301322885
26 365651.3739072073
27 473541.93998109805
28 397190.011401333
29 339491.08093359147
30 361314.0333504934
31 906943.1251904051
32 379487.3807523678
33 330066.353096911
34 746684.0270939263
35 409639.6845630782
36 310685.77511363453
37 267903.0399920683
38 390741.63975717244
39 321296.753898358
40 329296.9204894126
41 259589.79429047485
42 383509.25625372084
43 605335.0230421544
44 453360.8062374862
45 275612.1870547221
46 328711.75370610604
47 409551.6896071418
48 161409.09137445962
49 481964.7347708574
50 308140.44034038077
51 211810.63872246095
52 199931.02910046442
53 470081.37468967284
54 293605.6384494368
55 410235.8627761756
56 473525.4554782605
57 766906.700454194
58 304833.8161031428
59 291768.67322025984
60 246247.99513514055
61 228514.897144243
62 328736.21085431206
63 292199.0189778785
64 1774178.7986119075
65 371416.8287964024
66 331230.87479559705
67 334673.1883525122
68 380762.55288023036
69 472908.3991528695
70 1402111.6523221093
71 430163.1097101653
72 253050.0648098148
73 777659.7566094768
74 264887.0264718806
75 538378.2928022667
76 437593.33760668256
77 485300.95596167
78 586242.5075015154
79 514807.9896312915
80 510207.3597893864
81 353014.32668722013
82 462445.0163546961
83 271161.07258611044
84 325183.35496851837
85 338231.7490905717
86 365688.57318596356
87 335687.09607300966
88 258097.40319777536
89 589911.7328044499
90 332230.7078283557
91 469659.0989816049
92 242480.32092807116
93 430706.6866501465
94 330888.9738887339
95 695028.778686561
96 309179.47218438284
97 314151.65667571756
98 473926.4254470614
99 363344.6070129465
100 198848.53877465893
101 393844.61698660883