This commit is contained in:
SzamanFL 2020-12-15 22:46:25 +01:00
parent b4d0fa5b0d
commit a9defbbc5a
11 changed files with 5075 additions and 1 deletions

View File

@ -1,2 +1,6 @@
# mieszkania5
Mieszkania5 challenge
=====================
Guess the price of a flat/house.
The metric is absolute mean error.

1
config.txt Normal file
View File

@ -0,0 +1 @@
--metric MAE --precision 1

462
dev-0/expected.tsv Normal file
View File

@ -0,0 +1,462 @@
373000
299000
365000
369000
483791
430000
312000
397000
302672
325000
302800
300495
375038
437131
373761
337567
359320
326407
294819
266000
355720.5
288000
239000
385000
349000
804500
520000
315441
339000
429000
325000
269000
420000
400000
420000
385000
617232
399000
328338
596232
339000
293000
289000
330000
499000
419000
219598
341670
351259
319000
211426
309000
299000
349000
508000
270000
333750
724120
485000
274000
242775
415125
269000
535000
497000
357124
830000
675000
399000
550000
245000
219598
365000
435000
254231
359000
485000
469758
382740
499000
279000
283080
298200
339000
239900
341145
310000
369000
327000
259000
1308456
434358
449000
293000
203000
273000
175000
368938
299000
415000
333000
345000
350000
297987
290696.04
293571
459000
355000
269000
345000
300000
1156756
242943.1
486600
305370
318240
340000
316000
300500
335000
227000
399000
1100000
389000
370000
659000
249000
555000
315205
349000
385000
397005
469000
490000
316900
254500
280000
294588
275000
369000
857514
560000
310000
596232
459000
238000
440000
499000
327104
650000
235000
338200
412000
329000
609631
309000
250000
469000
311122
450000
299000
499000
330400
295000
315500
254500
467700
393750
530000
270000
297987
360400
337567
380000
568600
322010
490824
269548
361745
359371
579900
340000
199000
255000
255000
245000
303204
290000
349000
360000
350000
469064
280000
417000
318032
579000
320000
382060
379000
420000
250000
211500
254280
383000
355696
249000
359000
429000
605000
325949
340000
420000
579215
304900
380000
229000
325000
675000
409000
355000
388447.5
345015
199000
320292
300841
313000
566999
275000
359371
253000
262000
230000
299000
399000
453040
479000
312000
1100000
308196
355000
336842
248100
352894
242000
330000
315000
299900
454000
325000
499900
369000
429000
244500
661650
357475
321165
330000
404900
1240000
696000
295000
295000
417000
242300
285000
980000
229000
259786
447496.2
239617.2
260000
387000
799000
238638
354944
521683
506600
506363
299000
844990
213000
324900
261000
349000
480000
283000
430000
259786
339000
299000
530000
396683
329000
420863
299000
276458
350286
515000
341670
369000
322000
761976.07
389000
459999
429000
425000
286000
270000
561636.5
550625
324836
221976
699000
579000
514000
282000
345000
534508
299000
545000
250000
379000
269000
299000
329000
249976
430000
303086
303000
238400
339000
241500
1100000
399000
530000
972000
359000
250000
329460
239000
490110
468120
378157
285000
235850
499000
235850
365428
670000
320000
279000
388000
324990
570000
348796
287144
272000
267877
255062
650000
364000
530000
570000
284000
339000
435000
280000
1300000
365428
295000
360000
205000
319000
736450
453040
280500
299000
330000
337110
520000
395000
595000
375000
358575
588000
1025455
245000
238500
1167400
249000
441000
239000
455000
640000
344250
400000
485000
543617
276000
428400
360315
595000
595000
535000
534600
330000
627810
439000
304945
699000
712164
250000
449000
284859
595350
259000
590000
355000
389000
429000
649000
297987
159761
505505
336676
272764
343876
336290.5
308035
335226.5
532317
525937.5
397720
702150
592020
671488
593000
399000
1800000
288728
393211
789325
453040
345015
519745
400920
2318580
469728
247154
328828.5
260927.5
482729
257328.5
305923.5
529623
641395.58
669606.91
655544.02
471397.97
309958
699000
850000
1 373000
2 299000
3 365000
4 369000
5 483791
6 430000
7 312000
8 397000
9 302672
10 325000
11 302800
12 300495
13 375038
14 437131
15 373761
16 337567
17 359320
18 326407
19 294819
20 266000
21 355720.5
22 288000
23 239000
24 385000
25 349000
26 804500
27 520000
28 315441
29 339000
30 429000
31 325000
32 269000
33 420000
34 400000
35 420000
36 385000
37 617232
38 399000
39 328338
40 596232
41 339000
42 293000
43 289000
44 330000
45 499000
46 419000
47 219598
48 341670
49 351259
50 319000
51 211426
52 309000
53 299000
54 349000
55 508000
56 270000
57 333750
58 724120
59 485000
60 274000
61 242775
62 415125
63 269000
64 535000
65 497000
66 357124
67 830000
68 675000
69 399000
70 550000
71 245000
72 219598
73 365000
74 435000
75 254231
76 359000
77 485000
78 469758
79 382740
80 499000
81 279000
82 283080
83 298200
84 339000
85 239900
86 341145
87 310000
88 369000
89 327000
90 259000
91 1308456
92 434358
93 449000
94 293000
95 203000
96 273000
97 175000
98 368938
99 299000
100 415000
101 333000
102 345000
103 350000
104 297987
105 290696.04
106 293571
107 459000
108 355000
109 269000
110 345000
111 300000
112 1156756
113 242943.1
114 486600
115 305370
116 318240
117 340000
118 316000
119 300500
120 335000
121 227000
122 399000
123 1100000
124 389000
125 370000
126 659000
127 249000
128 555000
129 315205
130 349000
131 385000
132 397005
133 469000
134 490000
135 316900
136 254500
137 280000
138 294588
139 275000
140 369000
141 857514
142 560000
143 310000
144 596232
145 459000
146 238000
147 440000
148 499000
149 327104
150 650000
151 235000
152 338200
153 412000
154 329000
155 609631
156 309000
157 250000
158 469000
159 311122
160 450000
161 299000
162 499000
163 330400
164 295000
165 315500
166 254500
167 467700
168 393750
169 530000
170 270000
171 297987
172 360400
173 337567
174 380000
175 568600
176 322010
177 490824
178 269548
179 361745
180 359371
181 579900
182 340000
183 199000
184 255000
185 255000
186 245000
187 303204
188 290000
189 349000
190 360000
191 350000
192 469064
193 280000
194 417000
195 318032
196 579000
197 320000
198 382060
199 379000
200 420000
201 250000
202 211500
203 254280
204 383000
205 355696
206 249000
207 359000
208 429000
209 605000
210 325949
211 340000
212 420000
213 579215
214 304900
215 380000
216 229000
217 325000
218 675000
219 409000
220 355000
221 388447.5
222 345015
223 199000
224 320292
225 300841
226 313000
227 566999
228 275000
229 359371
230 253000
231 262000
232 230000
233 299000
234 399000
235 453040
236 479000
237 312000
238 1100000
239 308196
240 355000
241 336842
242 248100
243 352894
244 242000
245 330000
246 315000
247 299900
248 454000
249 325000
250 499900
251 369000
252 429000
253 244500
254 661650
255 357475
256 321165
257 330000
258 404900
259 1240000
260 696000
261 295000
262 295000
263 417000
264 242300
265 285000
266 980000
267 229000
268 259786
269 447496.2
270 239617.2
271 260000
272 387000
273 799000
274 238638
275 354944
276 521683
277 506600
278 506363
279 299000
280 844990
281 213000
282 324900
283 261000
284 349000
285 480000
286 283000
287 430000
288 259786
289 339000
290 299000
291 530000
292 396683
293 329000
294 420863
295 299000
296 276458
297 350286
298 515000
299 341670
300 369000
301 322000
302 761976.07
303 389000
304 459999
305 429000
306 425000
307 286000
308 270000
309 561636.5
310 550625
311 324836
312 221976
313 699000
314 579000
315 514000
316 282000
317 345000
318 534508
319 299000
320 545000
321 250000
322 379000
323 269000
324 299000
325 329000
326 249976
327 430000
328 303086
329 303000
330 238400
331 339000
332 241500
333 1100000
334 399000
335 530000
336 972000
337 359000
338 250000
339 329460
340 239000
341 490110
342 468120
343 378157
344 285000
345 235850
346 499000
347 235850
348 365428
349 670000
350 320000
351 279000
352 388000
353 324990
354 570000
355 348796
356 287144
357 272000
358 267877
359 255062
360 650000
361 364000
362 530000
363 570000
364 284000
365 339000
366 435000
367 280000
368 1300000
369 365428
370 295000
371 360000
372 205000
373 319000
374 736450
375 453040
376 280500
377 299000
378 330000
379 337110
380 520000
381 395000
382 595000
383 375000
384 358575
385 588000
386 1025455
387 245000
388 238500
389 1167400
390 249000
391 441000
392 239000
393 455000
394 640000
395 344250
396 400000
397 485000
398 543617
399 276000
400 428400
401 360315
402 595000
403 595000
404 535000
405 534600
406 330000
407 627810
408 439000
409 304945
410 699000
411 712164
412 250000
413 449000
414 284859
415 595350
416 259000
417 590000
418 355000
419 389000
420 429000
421 649000
422 297987
423 159761
424 505505
425 336676
426 272764
427 343876
428 336290.5
429 308035
430 335226.5
431 532317
432 525937.5
433 397720
434 702150
435 592020
436 671488
437 593000
438 399000
439 1800000
440 288728
441 393211
442 789325
443 453040
444 345015
445 519745
446 400920
447 2318580
448 469728
449 247154
450 328828.5
451 260927.5
452 482729
453 257328.5
454 305923.5
455 529623
456 641395.58
457 669606.91
458 655544.02
459 471397.97
460 309958
461 699000
462 850000

462
dev-0/in.tsv Normal file

File diff suppressed because one or more lines are too long

462
dev-0/out.tsv Normal file
View File

@ -0,0 +1,462 @@
34.04683
21.822857
36.79288
28.77488
37.8241
55.4243
27.616209
30.512884
25.038166
28.195543
22.112524
25.038166
34.852108
33.66447
33.90779
28.780672
25.513222
23.166914
27.900084
21.822857
29.892996
22.86566
29.470081
40.21675
33.9889
43.258263
48.00881
28.815434
33.40956
39.069668
25.298868
26.457539
36.711773
29.12248
36.711773
25.009201
46.247635
36.480038
23.108982
47.782867
25.432116
36.13244
18.92618
29.290487
44.561768
36.271477
21.370975
28.085472
24.73112
21.24352
19.887877
21.822857
21.70699
32.250893
41.867855
25.878202
25.588535
48.588142
42.678925
18.92618
18.5612
38.287563
17.767511
78.01837
50.78962
32.389935
65.273
43.420475
33.75137
40.94092
17.883377
21.370975
29.354216
28.485212
24.771673
50.78962
37.754578
45.93479
26.202631
36.711773
20.37452
23.236435
24.140198
24.719534
21.822857
29.08772
21.24352
34.568233
20.084852
21.822857
79.00904
42.458782
42.09959
36.13244
17.188175
25.878202
15.102569
34.562443
28.832811
26.28374
39.202915
27.036875
29.12248
24.122818
19.059427
18.781347
39.78225
24.232891
24.273445
27.326542
26.921007
72.22502
20.415071
33.641296
24.209717
26.921007
28.079678
28.890747
22.86566
33.9889
19.215849
35.147568
81.940475
58.263046
31.09222
47.498997
21.880789
34.568233
25.710196
35.726906
27.616209
21.712784
68.51148
29.643883
27.790009
17.153416
20.084852
20.125404
21.822857
30.049417
56.467106
45.575603
37.40698
47.203533
38.044243
22.460125
32.830227
41.10893
29.417942
46.154938
20.988615
28.618458
53.39663
28.195543
44.799297
21.822857
24.516766
37.40698
25.194588
34.568233
27.911669
38.62358
28.166578
26.457539
28.815434
18.682861
48.657665
18.057178
37.46491
17.767511
24.122818
27.80739
28.780672
33.9889
48.976303
28.508387
39.3014
19.575035
29.325249
26.162079
46.3635
26.457539
14.870834
20.617838
16.272825
22.402193
29.08772
17.518396
37.349045
30.512884
20.084852
39.770664
25.298868
42.620995
25.576948
40.361588
27.036875
31.428236
36.13244
42.09959
25.298868
15.073602
18.694447
33.9889
23.224848
24.140198
30.512884
24.140198
34.18587
23.983776
34.568233
42.678925
70.452255
21.822857
37.46491
29.076134
30.74462
44.185196
29.475874
27.036875
29.614916
25.94193
11.394823
29.26152
26.416985
27.616209
40.35579
21.822857
26.162079
18.57858
23.236435
18.810314
24.140198
22.981527
37.302696
38.542473
27.616209
64.693665
23.908463
24.719534
23.07422
18.972528
24.510971
17.767511
28.60108
27.210674
27.384474
26.573406
28.716948
35.2055
30.860485
23.862118
18.694447
50.917072
26.023037
27.372889
29.354216
58.593266
46.154938
34.568233
19.644558
19.91105
42.215458
17.240316
28.079678
51.948288
17.912344
17.36777
69.32835
19.667728
18.346846
28.427279
68.16968
14.673861
31.938047
34.655136
39.20871
36.885574
28.195543
53.28076
26.167871
37.96314
17.767511
32.233513
41.520256
21.822857
26.822521
17.36777
36.885574
22.286325
57.68371
32.685394
27.616209
28.751705
24.777466
16.151165
25.495842
29.643883
28.085472
25.298868
29.93355
47.446857
42.678925
51.519585
21.822857
28.369345
28.154991
17.188175
34.921627
25.327835
27.482962
17.669024
49.109547
84.9704
33.9889
21.822857
30.802553
40.55277
18.480093
39.202915
23.334923
47.892944
37.46491
21.24352
25.298868
20.681566
25.298868
24.928095
25.298868
17.072308
27.036875
17.495224
72.22502
36.65384
43.779663
93.66042
33.9889
27.616209
27.876911
19.123156
40.813465
38.548267
29.614916
25.878202
15.1605015
44.474865
15.1605015
33.954136
0.38745427
27.106394
25.61171
29.064547
21.405735
92.501755
16.817402
23.07422
21.82865
18.619133
20.05009
30.512884
36.537975
43.779663
67.59034
24.140198
34.04683
30.512884
31.613623
105.531006
33.954136
27.610416
29.354216
18.955147
24.429865
65.4468
37.291107
23.705696
22.402193
24.140198
24.90492
33.40956
24.232891
58.03131
49.38763
27.506136
56.58297
51.46744
29.875618
17.269281
60.80053
17.33301
23.891083
23.526102
40.361588
35.379303
29.354216
56.58297
38.62358
35.454617
20.142784
32.899746
30.733032
58.320976
59.88518
45.459736
34.220634
49.63095
51.82663
32.830227
24.690567
31.671556
52.492867
17.483637
37.540222
24.736912
49.08058
43.559513
84.39106
30.91842
37.528637
41.247967
51.368954
24.122818
11.412204
37.841476
26.527058
25.936136
29.1051
29.1051
27.262814
29.012407
48.814083
48.55918
28.079678
43.547928
34.452366
45.55243
53.222824
49.97855
93.08109
28.20713
30.385431
54.572678
37.302696
25.936136
31.503548
38.525093
135.48842
37.60395
20.733706
26.451746
20.658394
39.19712
20.658394
27.285988
35.23447
41.972137
43.18874
41.972137
29.487461
31.184914
52.00622
41.462322
1 34.04683
2 21.822857
3 36.79288
4 28.77488
5 37.8241
6 55.4243
7 27.616209
8 30.512884
9 25.038166
10 28.195543
11 22.112524
12 25.038166
13 34.852108
14 33.66447
15 33.90779
16 28.780672
17 25.513222
18 23.166914
19 27.900084
20 21.822857
21 29.892996
22 22.86566
23 29.470081
24 40.21675
25 33.9889
26 43.258263
27 48.00881
28 28.815434
29 33.40956
30 39.069668
31 25.298868
32 26.457539
33 36.711773
34 29.12248
35 36.711773
36 25.009201
37 46.247635
38 36.480038
39 23.108982
40 47.782867
41 25.432116
42 36.13244
43 18.92618
44 29.290487
45 44.561768
46 36.271477
47 21.370975
48 28.085472
49 24.73112
50 21.24352
51 19.887877
52 21.822857
53 21.70699
54 32.250893
55 41.867855
56 25.878202
57 25.588535
58 48.588142
59 42.678925
60 18.92618
61 18.5612
62 38.287563
63 17.767511
64 78.01837
65 50.78962
66 32.389935
67 65.273
68 43.420475
69 33.75137
70 40.94092
71 17.883377
72 21.370975
73 29.354216
74 28.485212
75 24.771673
76 50.78962
77 37.754578
78 45.93479
79 26.202631
80 36.711773
81 20.37452
82 23.236435
83 24.140198
84 24.719534
85 21.822857
86 29.08772
87 21.24352
88 34.568233
89 20.084852
90 21.822857
91 79.00904
92 42.458782
93 42.09959
94 36.13244
95 17.188175
96 25.878202
97 15.102569
98 34.562443
99 28.832811
100 26.28374
101 39.202915
102 27.036875
103 29.12248
104 24.122818
105 19.059427
106 18.781347
107 39.78225
108 24.232891
109 24.273445
110 27.326542
111 26.921007
112 72.22502
113 20.415071
114 33.641296
115 24.209717
116 26.921007
117 28.079678
118 28.890747
119 22.86566
120 33.9889
121 19.215849
122 35.147568
123 81.940475
124 58.263046
125 31.09222
126 47.498997
127 21.880789
128 34.568233
129 25.710196
130 35.726906
131 27.616209
132 21.712784
133 68.51148
134 29.643883
135 27.790009
136 17.153416
137 20.084852
138 20.125404
139 21.822857
140 30.049417
141 56.467106
142 45.575603
143 37.40698
144 47.203533
145 38.044243
146 22.460125
147 32.830227
148 41.10893
149 29.417942
150 46.154938
151 20.988615
152 28.618458
153 53.39663
154 28.195543
155 44.799297
156 21.822857
157 24.516766
158 37.40698
159 25.194588
160 34.568233
161 27.911669
162 38.62358
163 28.166578
164 26.457539
165 28.815434
166 18.682861
167 48.657665
168 18.057178
169 37.46491
170 17.767511
171 24.122818
172 27.80739
173 28.780672
174 33.9889
175 48.976303
176 28.508387
177 39.3014
178 19.575035
179 29.325249
180 26.162079
181 46.3635
182 26.457539
183 14.870834
184 20.617838
185 16.272825
186 22.402193
187 29.08772
188 17.518396
189 37.349045
190 30.512884
191 20.084852
192 39.770664
193 25.298868
194 42.620995
195 25.576948
196 40.361588
197 27.036875
198 31.428236
199 36.13244
200 42.09959
201 25.298868
202 15.073602
203 18.694447
204 33.9889
205 23.224848
206 24.140198
207 30.512884
208 24.140198
209 34.18587
210 23.983776
211 34.568233
212 42.678925
213 70.452255
214 21.822857
215 37.46491
216 29.076134
217 30.74462
218 44.185196
219 29.475874
220 27.036875
221 29.614916
222 25.94193
223 11.394823
224 29.26152
225 26.416985
226 27.616209
227 40.35579
228 21.822857
229 26.162079
230 18.57858
231 23.236435
232 18.810314
233 24.140198
234 22.981527
235 37.302696
236 38.542473
237 27.616209
238 64.693665
239 23.908463
240 24.719534
241 23.07422
242 18.972528
243 24.510971
244 17.767511
245 28.60108
246 27.210674
247 27.384474
248 26.573406
249 28.716948
250 35.2055
251 30.860485
252 23.862118
253 18.694447
254 50.917072
255 26.023037
256 27.372889
257 29.354216
258 58.593266
259 46.154938
260 34.568233
261 19.644558
262 19.91105
263 42.215458
264 17.240316
265 28.079678
266 51.948288
267 17.912344
268 17.36777
269 69.32835
270 19.667728
271 18.346846
272 28.427279
273 68.16968
274 14.673861
275 31.938047
276 34.655136
277 39.20871
278 36.885574
279 28.195543
280 53.28076
281 26.167871
282 37.96314
283 17.767511
284 32.233513
285 41.520256
286 21.822857
287 26.822521
288 17.36777
289 36.885574
290 22.286325
291 57.68371
292 32.685394
293 27.616209
294 28.751705
295 24.777466
296 16.151165
297 25.495842
298 29.643883
299 28.085472
300 25.298868
301 29.93355
302 47.446857
303 42.678925
304 51.519585
305 21.822857
306 28.369345
307 28.154991
308 17.188175
309 34.921627
310 25.327835
311 27.482962
312 17.669024
313 49.109547
314 84.9704
315 33.9889
316 21.822857
317 30.802553
318 40.55277
319 18.480093
320 39.202915
321 23.334923
322 47.892944
323 37.46491
324 21.24352
325 25.298868
326 20.681566
327 25.298868
328 24.928095
329 25.298868
330 17.072308
331 27.036875
332 17.495224
333 72.22502
334 36.65384
335 43.779663
336 93.66042
337 33.9889
338 27.616209
339 27.876911
340 19.123156
341 40.813465
342 38.548267
343 29.614916
344 25.878202
345 15.1605015
346 44.474865
347 15.1605015
348 33.954136
349 0.38745427
350 27.106394
351 25.61171
352 29.064547
353 21.405735
354 92.501755
355 16.817402
356 23.07422
357 21.82865
358 18.619133
359 20.05009
360 30.512884
361 36.537975
362 43.779663
363 67.59034
364 24.140198
365 34.04683
366 30.512884
367 31.613623
368 105.531006
369 33.954136
370 27.610416
371 29.354216
372 18.955147
373 24.429865
374 65.4468
375 37.291107
376 23.705696
377 22.402193
378 24.140198
379 24.90492
380 33.40956
381 24.232891
382 58.03131
383 49.38763
384 27.506136
385 56.58297
386 51.46744
387 29.875618
388 17.269281
389 60.80053
390 17.33301
391 23.891083
392 23.526102
393 40.361588
394 35.379303
395 29.354216
396 56.58297
397 38.62358
398 35.454617
399 20.142784
400 32.899746
401 30.733032
402 58.320976
403 59.88518
404 45.459736
405 34.220634
406 49.63095
407 51.82663
408 32.830227
409 24.690567
410 31.671556
411 52.492867
412 17.483637
413 37.540222
414 24.736912
415 49.08058
416 43.559513
417 84.39106
418 30.91842
419 37.528637
420 41.247967
421 51.368954
422 24.122818
423 11.412204
424 37.841476
425 26.527058
426 25.936136
427 29.1051
428 29.1051
429 27.262814
430 29.012407
431 48.814083
432 48.55918
433 28.079678
434 43.547928
435 34.452366
436 45.55243
437 53.222824
438 49.97855
439 93.08109
440 28.20713
441 30.385431
442 54.572678
443 37.302696
444 25.936136
445 31.503548
446 38.525093
447 135.48842
448 37.60395
449 20.733706
450 26.451746
451 20.658394
452 39.19712
453 20.658394
454 27.285988
455 35.23447
456 41.972137
457 43.18874
458 41.972137
459 29.487461
460 31.184914
461 52.00622
462 41.462322

162
src/predict.py Normal file
View File

@ -0,0 +1,162 @@
#!/usr/bin/env python
import numpy as np
import torch
import pandas as pd
import sys
import argparse
from Network import Network
import random
#torch.set_printoptions(precision=10)
dict_column_1 = {"":4, "do remontu":1, "do wykończenia":2, "do zamieszkania":3}
dict_column_3 = {"":5, "pełna własność":1, "spółdzielcze wł. z KW":2, "spółdzielcze własnościowe":3, "udział":4}
dict_column_9 = {"pierwotny":0, "wtórny":1}
dict_column_11 = {"":8, "apartamentowiec":1, "blok":2, "dom wolnostojący":3, "kamienica":4, "loft":5, "plomba":6, "szeregowiec":7}
dict_column_17 = {"":7, "elektryczne":1, "gazowe":2, "inne":3, "kotłownia":4, "miejskie":5, "piece kaflowe":6}
dict_column_18 = {"":4, "aluminiowe":1, "drewniane":2, "plastikowe":3}
dict_column_23 = {"":9, "beton":1, "beton komórkowy":2, "cegła":3, "inne":4, "pustak":5, "silikat":6, "wielka płyta":7, "żelbet":8}
def read_data(in_file):
print("Reading in")
all_data = pd.read_csv(in_file, sep='\t', keep_default_na=False, header = 1)
expected = all_data.iloc[:,0]
data = all_data.iloc[:,1:]
print("Data read")
return expected, data
def clean_df(data_):
print("Cleaning data")
data_.iloc[:,0] = data_.iloc[:,0].map(dict_column_1)
data_.iloc[:,2] = data_.iloc[:,2].map(dict_column_3)
data_.iloc[:,8] = data_.iloc[:,8].map(dict_column_9)
data_.iloc[:,10] = data_.iloc[:,10].map(dict_column_11)
data_.iloc[:,16] = data_.iloc[:,16].map(dict_column_17)
data_.iloc[:,17] = data_.iloc[:,17].map(dict_column_18)
data_.iloc[:,21] = data_.iloc[:,21].map(dict_column_23)
for i, col in enumerate(data_.iloc[:,14]):
#import ipdb; ipdb.set_trace()
if col == 'parter':
data_.iloc[i,14] = 22
# clear money
for i, col in enumerate(data_.iloc[:,1]):
try:
if col == "":
data_.iloc[i,1] = 1.0
else:
data_.iloc[i,1] = float(col.replace("", "").replace(" ", ""))
except ValueError:
import ipdb; ipdb.set_trace()
# deleting columns
deleted_columns = [4,13,18,20,22,24]
data_.drop(data_.columns[deleted_columns], axis = 1, inplace=True)
for i, col in enumerate(data_.iloc[:,12]):
if col == '> 10':
data_.iloc[i,12] = 14
elif col == "parter":
data_.iloc[i,12] = 11
elif col == "poddasze":
data_.iloc[i,12] = 12
elif col == "suterena":
data_.iloc[i,12] = 13
for i, col in enumerate(data_.iloc[:,4]):
if col == 'więcej niż 10':
data_.iloc[i,4] = 1
for i, col in enumerate(data_.iloc[:,6]):
data_.iloc[i,6] = col.replace(' ', '')
print("Data cleaned")
return data_
def clear(data_):
for i, col in enumerate(data_.iloc[:,2]):
try:
if col == "":
data_.iloc[i,2] = 1.0
else:
data_.iloc[i,2] = float(col.replace("", "").replace(" ", ""))
except AttributeError:
data_.iloc[i,2] = float(data_.iloc[i,2])
#import ipdb; ipdb.set_trace()
return data_
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--in_file", help="In tsv")
parser.add_argument("--checkpoint")
parser.add_argument("--out")
args = parser.parse_args()
expected, data = read_data(args.in_file)
clean_data = clear(data)
clean_data = clean_data.iloc[:,2]
import ipdb; ipdb.set_trace()
#model = Network(len(clean_data.columns))
model = Network(1)
if args.checkpoint:
print(f"Loading model : {args.checkpoint}")
model.load_state_dict(torch.load(args.checkpoint))
with open(args.out, 'w+') as f:
for i in clean_data:
tensor = torch.tensor([i])
y = model(tensor.float())
try:
f.write(str(y.item()) + '\n')
except:
import ipdb; ipdb.set_trace()
return
lr = 10
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.MSELoss()
model.train()
random_number = random.randint(0, 50)
print("Starting training")
counter = 0
l = [i for i in range(len(clean_data.index))]
#import ipdb; ipdb.set_trace()
for j in range(100):
random.shuffle(l)
for i in l:
data_arr = [clean_data[i]]
#data_arr = clean_data.loc[i].to_numpy()
#data_arr[data_arr == ""] = 0
#try:
# data_arr = pd.to_numeric(data_arr)
#except ValueError:
# import ipdb; ipdb.set_trace()
#import ipdb; ipdb.set_trace()
expected_arr = float(expected.loc[i])
#tensor = torch.from_numpy(data_arr)
tensor = torch.tensor(data_arr)
y = torch.tensor([expected_arr])
optimizer.zero_grad()
y_predcited = model(tensor.float())
loss = criterion(y_predcited, y.float())
loss.backward()
optimizer.step()
if counter % 10000 == 0:
print(f"{counter} : {loss}")
print(f"{y} : {y_predcited}")
if counter % 50000 == 0:
print(f"Saving checkpoint model-{counter}-{lr}-{random_number}.ckpt")
torch.save(model.state_dict(), f"model-{counter}-{lr}-{random_number}.ckpt")
counter += 1
print(f"Saving last model model-final-{lr}-{random_number}")
torch.save(model.state_dict(), f"model-{counter}-{lr}-{random_number}.ckpt")
main()

140
src/train.py Normal file
View File

@ -0,0 +1,140 @@
#!/usr/bin/env python
import numpy as np
import torch
import pandas as pd
import sys
import argparse
from Network import Network
import random
#torch.set_printoptions(precision=10)
dict_column_1 = {"":4, "do remontu":1, "do wykończenia":2, "do zamieszkania":3}
dict_column_3 = {"":5, "pełna własność":1, "spółdzielcze wł. z KW":2, "spółdzielcze własnościowe":3, "udział":4}
dict_column_9 = {"pierwotny":0, "wtórny":1}
dict_column_11 = {"":8, "apartamentowiec":1, "blok":2, "dom wolnostojący":3, "kamienica":4, "loft":5, "plomba":6, "szeregowiec":7}
dict_column_17 = {"":7, "elektryczne":1, "gazowe":2, "inne":3, "kotłownia":4, "miejskie":5, "piece kaflowe":6}
dict_column_18 = {"":4, "aluminiowe":1, "drewniane":2, "plastikowe":3}
dict_column_23 = {"":9, "beton":1, "beton komórkowy":2, "cegła":3, "inne":4, "pustak":5, "silikat":6, "wielka płyta":7, "żelbet":8}
def read_data(in_file):
print("Reading in")
all_data = pd.read_csv(in_file, sep='\t', keep_default_na=False, header = 1)
expected = all_data.iloc[:,0]
data = all_data.iloc[:,1:]
print("Data read")
return expected, data
def clean_df(data_):
print("Cleaning data")
data_.iloc[:,0] = data_.iloc[:,0].map(dict_column_1)
data_.iloc[:,2] = data_.iloc[:,2].map(dict_column_3)
data_.iloc[:,8] = data_.iloc[:,8].map(dict_column_9)
data_.iloc[:,10] = data_.iloc[:,10].map(dict_column_11)
data_.iloc[:,16] = data_.iloc[:,16].map(dict_column_17)
data_.iloc[:,17] = data_.iloc[:,17].map(dict_column_18)
data_.iloc[:,21] = data_.iloc[:,21].map(dict_column_23)
for i, col in enumerate(data_.iloc[:,14]):
#import ipdb; ipdb.set_trace()
if col == 'parter':
data_.iloc[i,14] = 22
# clear money
for i, col in enumerate(data_.iloc[:,1]):
try:
if col == "":
data_.iloc[i,1] = 1.0
else:
data_.iloc[i,1] = float(col.replace("", "").replace(" ", ""))
except ValueError:
import ipdb; ipdb.set_trace()
# deleting columns
deleted_columns = [4,13,18,20,22,24]
data_.drop(data_.columns[deleted_columns], axis = 1, inplace=True)
for i, col in enumerate(data_.iloc[:,12]):
if col == '> 10':
data_.iloc[i,12] = 14
elif col == "parter":
data_.iloc[i,12] = 11
elif col == "poddasze":
data_.iloc[i,12] = 12
elif col == "suterena":
data_.iloc[i,12] = 13
for i, col in enumerate(data_.iloc[:,4]):
if col == 'więcej niż 10':
data_.iloc[i,4] = 1
for i, col in enumerate(data_.iloc[:,6]):
data_.iloc[i,6] = col.replace(' ', '')
print("Data cleaned")
return data_
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--in_file", help="In tsv")
parser.add_argument("--checkpoint")
args = parser.parse_args()
expected, data = read_data(args.in_file)
clean_data = clean_df(data)
clean_data = clean_data.iloc[:,1]
import ipdb; ipdb.set_trace()
#model = Network(len(clean_data.columns))
model = Network(1)
if args.checkpoint:
print(f"Loading model : {args.checkpoint}")
model.load_state_dict(torch.load(args.checkpoint))
lr = 10
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.MSELoss()
model.train()
random_number = random.randint(0, 50)
print("Starting training")
counter = 0
l = [i for i in range(len(clean_data.index))]
#import ipdb; ipdb.set_trace()
for j in range(100):
random.shuffle(l)
for i in l:
data_arr = [clean_data[i]]
#data_arr = clean_data.loc[i].to_numpy()
#data_arr[data_arr == ""] = 0
#try:
# data_arr = pd.to_numeric(data_arr)
#except ValueError:
# import ipdb; ipdb.set_trace()
#import ipdb; ipdb.set_trace()
expected_arr = float(expected.loc[i])
#tensor = torch.from_numpy(data_arr)
tensor = torch.tensor(data_arr)
y = torch.tensor([expected_arr])
optimizer.zero_grad()
y_predcited = model(tensor.float())
loss = criterion(y_predcited, y.float())
loss.backward()
optimizer.step()
if counter % 10000 == 0:
print(f"{counter} : {loss}")
print(f"{y} : {y_predcited}")
if counter % 50000 == 0:
print(f"Saving checkpoint model-{counter}-{lr}-{random_number}.ckpt")
torch.save(model.state_dict(), f"model-{counter}-{lr}-{random_number}.ckpt")
counter += 1
print(f"Saving last model model-final-{lr}-{random_number}")
torch.save(model.state_dict(), f"model-{counter}-{lr}-{random_number}.ckpt")
main()

418
test-A/in.tsv Normal file

File diff suppressed because one or more lines are too long

416
test-A/out.tsv Normal file
View File

@ -0,0 +1,416 @@
-495323.9375
-950439.375
-643593.0
-499419.0
-515375.5625
-348890.5625
-377979.5625
-594170.0
-580049.125
-424013.5625
-586827.125
-507609.0625
-588945.25
-690333.125
-865996.625
-130440.78125
-850746.125
-943237.75
-456915.1875
-761078.625
-813043.375
-839873.0
-755006.625
-615351.3125
-904405.375
-652771.5625
-1003675.125
-523848.0625
-224203.3125
-415682.25
-99657.3125
-1266040.75
-682707.875
-614927.6875
-457480.0
-509162.375
-740179.75
-604619.4375
-569034.875
-503514.0625
-861054.375
-427120.1875
-844391.75
-474142.625
-320225.1875
-553219.5
-755147.875
-430085.5625
-615351.3125
-1034034.875
-1147849.125
-424296.0
-550677.75
-658419.9375
-615351.3125
-1064536.0
-510856.875
-704454.0
-184100.0625
-656019.375
-690333.125
-804429.625
-581320.0
-882094.375
-572847.5
-594170.0
-671269.9375
-754441.75
-561409.5625
-480920.6875
-530626.125
-474142.625
-909206.5
-709113.875
-697393.5
-650653.4375
-615351.3125
-640062.8125
-644581.5
-631307.875
-603489.75
-491934.9375
-730153.875
-923892.25
-629472.125
-175627.5625
-629330.9375
-589368.875
-615351.3125
-601230.4375
-506620.625
-388852.625
-498148.125
-239877.5
-591204.625
-286758.75
-683131.5
-558867.8125
-556749.6875
-615351.3125
-465952.5625
-392806.4375
-524836.5625
-653054.0
-366541.625
-756560.0
-715750.625
-510292.0625
-332934.0
-530626.125
-708831.375
-1035447.0
-583720.5625
-982635.0
-502384.375
-604478.25
-502384.375
-600524.375
-1014548.125
-631307.875
-792003.25
-700076.5
-550395.3125
-756560.0
-1338480.75
-580049.125
-523565.6875
-427120.1875
-1105768.875
-476119.5625
-611679.875
-637097.4375
-474566.25
-897768.625
-486427.8125
-474142.625
-876022.5
-953687.25
-517352.5
-752606.125
-465670.125
-460163.0
-649382.5625
-843262.0
-418788.875
-397183.9375
-743145.125
-522577.1875
-784660.5
-700782.5
-710384.75
-425708.0625
-676353.375
-468070.6875
-770680.75
-742439.125
-545029.375
-608573.25
-383486.6875
-657713.875
-950721.875
-728318.25
-421895.4375
-683837.5
-580049.125
-597417.8125
-479508.5625
-537827.75
-596146.9375
-498289.3125
-580190.3125
-709537.5
-687932.5
-488969.5625
-615351.3125
-991672.375
-976704.25
-693016.0
-607867.25
-493629.4375
-523706.875
-500125.0625
-675929.75
-645569.9375
-493488.25
-1139800.25
-530061.25
-488122.3125
-526107.4375
-608290.875
-597700.1875
-547853.5625
-828858.75
-389135.0625
-546865.125
-756560.0
-707136.875
-427967.4375
-728318.25
-701488.625
-584426.625
-489675.5625
-870797.75
-389417.4375
-799063.75
-298620.3125
-537262.9375
-544746.9375
-474142.625
-750770.375
-1625416.75
-454514.625
-647123.25
-219967.0625
-237476.9375
-798922.5
-728318.25
-210364.875
-549124.4375
-756560.0
-766585.75
-707136.875
-1355990.625
-527801.9375
-768845.125
-356515.8125
-718010.0
-798216.5
-635544.125
-613939.1875
-692592.375
-937165.75
-493488.25
-645993.5625
-460021.75
-182546.8125
-837190.125
-654042.4375
-527378.3125
-383486.6875
-488545.9375
-582449.6875
-365976.8125
-692451.25
-1745020.5
-589368.875
-615351.3125
-523706.875
-328697.75
-856818.125
-502666.8125
-651359.5
-483744.8125
-1116077.125
-824340.125
-369365.8125
-724223.125
-791862.125
-769974.75
-841285.125
-654183.6875
-498571.75
-495182.75
-841285.125
-498854.125
-427826.1875
-927563.625
-580049.125
-701771.0
-629472.125
-427543.8125
-500689.875
-448442.6875
-746816.5
-758960.5
-646558.375
-507185.4375
-950721.875
-558867.8125
-820527.5
-394924.5625
-516505.25
-798922.5
-768562.625
-603348.5625
-364423.5
-656584.1875
-678612.75
-609702.9375
-572847.5
-463693.1875
-401279.0
-475837.125
-645146.3125
-404385.5625
-663644.625
-682990.25
-714197.375
-473295.375
-500407.4375
-1003675.125
-815867.625
-785931.375
-923751.0
-781412.625
-868256.0
-756560.0
-646982.0
-554631.5625
-931799.875
-441523.4375
-615351.3125
-580049.125
-458327.25
-1180185.875
-286617.5625
-580049.125
-6459.59375
-755147.875
-601230.4375
-672964.4375
-540651.9375
-728318.25
-678895.125
-1358673.625
-364564.75
-421613.0
-1029657.5
-672540.8125
-577224.9375
-378685.625
-684402.375
-577931.0
-833236.25
-697675.875
-601230.4375
-656725.4375
-911889.5
-685955.625
-492358.5625
-717021.5
-562680.4375
-544746.9375
-289865.375
-498854.125
-493488.25
-661385.3125
-1250790.25
-628766.125
-424296.0
-784801.625
-403538.3125
-1102521.125
-467082.1875
-302291.6875
-722669.875
-567199.125
-473154.1875
-551807.375
-615351.3125
-558867.8125
-516505.25
-678612.75
-510856.875
-613656.8125
-243972.5625
-332934.0
-587109.5625
-800758.25
-251597.8125
-735519.875
-963289.375
-700076.5
-788755.5
-428955.875
-734672.625
-589933.75
-502384.375
-930529.0
-792144.5
-481909.125
-1391998.875
-565928.25
-401843.8125
-445900.9375
-561692.0
-594170.0
-594170.0
-608290.875
-551525.0
-608290.875
-580049.125
-629472.125
-544746.9375
-563527.6875
-332086.75
-629472.125
-940131.125
-629472.125
-601230.4375
-587109.5625
-587109.5625
-161365.5
-636532.5625
-728318.25
-724929.25
-388005.375
-615351.3125
-629472.125
-516646.4375
-664774.3125
-636532.5625
-544605.75
-961453.75
-943237.75
-544746.9375
1 -495323.9375
2 -950439.375
3 -643593.0
4 -499419.0
5 -515375.5625
6 -348890.5625
7 -377979.5625
8 -594170.0
9 -580049.125
10 -424013.5625
11 -586827.125
12 -507609.0625
13 -588945.25
14 -690333.125
15 -865996.625
16 -130440.78125
17 -850746.125
18 -943237.75
19 -456915.1875
20 -761078.625
21 -813043.375
22 -839873.0
23 -755006.625
24 -615351.3125
25 -904405.375
26 -652771.5625
27 -1003675.125
28 -523848.0625
29 -224203.3125
30 -415682.25
31 -99657.3125
32 -1266040.75
33 -682707.875
34 -614927.6875
35 -457480.0
36 -509162.375
37 -740179.75
38 -604619.4375
39 -569034.875
40 -503514.0625
41 -861054.375
42 -427120.1875
43 -844391.75
44 -474142.625
45 -320225.1875
46 -553219.5
47 -755147.875
48 -430085.5625
49 -615351.3125
50 -1034034.875
51 -1147849.125
52 -424296.0
53 -550677.75
54 -658419.9375
55 -615351.3125
56 -1064536.0
57 -510856.875
58 -704454.0
59 -184100.0625
60 -656019.375
61 -690333.125
62 -804429.625
63 -581320.0
64 -882094.375
65 -572847.5
66 -594170.0
67 -671269.9375
68 -754441.75
69 -561409.5625
70 -480920.6875
71 -530626.125
72 -474142.625
73 -909206.5
74 -709113.875
75 -697393.5
76 -650653.4375
77 -615351.3125
78 -640062.8125
79 -644581.5
80 -631307.875
81 -603489.75
82 -491934.9375
83 -730153.875
84 -923892.25
85 -629472.125
86 -175627.5625
87 -629330.9375
88 -589368.875
89 -615351.3125
90 -601230.4375
91 -506620.625
92 -388852.625
93 -498148.125
94 -239877.5
95 -591204.625
96 -286758.75
97 -683131.5
98 -558867.8125
99 -556749.6875
100 -615351.3125
101 -465952.5625
102 -392806.4375
103 -524836.5625
104 -653054.0
105 -366541.625
106 -756560.0
107 -715750.625
108 -510292.0625
109 -332934.0
110 -530626.125
111 -708831.375
112 -1035447.0
113 -583720.5625
114 -982635.0
115 -502384.375
116 -604478.25
117 -502384.375
118 -600524.375
119 -1014548.125
120 -631307.875
121 -792003.25
122 -700076.5
123 -550395.3125
124 -756560.0
125 -1338480.75
126 -580049.125
127 -523565.6875
128 -427120.1875
129 -1105768.875
130 -476119.5625
131 -611679.875
132 -637097.4375
133 -474566.25
134 -897768.625
135 -486427.8125
136 -474142.625
137 -876022.5
138 -953687.25
139 -517352.5
140 -752606.125
141 -465670.125
142 -460163.0
143 -649382.5625
144 -843262.0
145 -418788.875
146 -397183.9375
147 -743145.125
148 -522577.1875
149 -784660.5
150 -700782.5
151 -710384.75
152 -425708.0625
153 -676353.375
154 -468070.6875
155 -770680.75
156 -742439.125
157 -545029.375
158 -608573.25
159 -383486.6875
160 -657713.875
161 -950721.875
162 -728318.25
163 -421895.4375
164 -683837.5
165 -580049.125
166 -597417.8125
167 -479508.5625
168 -537827.75
169 -596146.9375
170 -498289.3125
171 -580190.3125
172 -709537.5
173 -687932.5
174 -488969.5625
175 -615351.3125
176 -991672.375
177 -976704.25
178 -693016.0
179 -607867.25
180 -493629.4375
181 -523706.875
182 -500125.0625
183 -675929.75
184 -645569.9375
185 -493488.25
186 -1139800.25
187 -530061.25
188 -488122.3125
189 -526107.4375
190 -608290.875
191 -597700.1875
192 -547853.5625
193 -828858.75
194 -389135.0625
195 -546865.125
196 -756560.0
197 -707136.875
198 -427967.4375
199 -728318.25
200 -701488.625
201 -584426.625
202 -489675.5625
203 -870797.75
204 -389417.4375
205 -799063.75
206 -298620.3125
207 -537262.9375
208 -544746.9375
209 -474142.625
210 -750770.375
211 -1625416.75
212 -454514.625
213 -647123.25
214 -219967.0625
215 -237476.9375
216 -798922.5
217 -728318.25
218 -210364.875
219 -549124.4375
220 -756560.0
221 -766585.75
222 -707136.875
223 -1355990.625
224 -527801.9375
225 -768845.125
226 -356515.8125
227 -718010.0
228 -798216.5
229 -635544.125
230 -613939.1875
231 -692592.375
232 -937165.75
233 -493488.25
234 -645993.5625
235 -460021.75
236 -182546.8125
237 -837190.125
238 -654042.4375
239 -527378.3125
240 -383486.6875
241 -488545.9375
242 -582449.6875
243 -365976.8125
244 -692451.25
245 -1745020.5
246 -589368.875
247 -615351.3125
248 -523706.875
249 -328697.75
250 -856818.125
251 -502666.8125
252 -651359.5
253 -483744.8125
254 -1116077.125
255 -824340.125
256 -369365.8125
257 -724223.125
258 -791862.125
259 -769974.75
260 -841285.125
261 -654183.6875
262 -498571.75
263 -495182.75
264 -841285.125
265 -498854.125
266 -427826.1875
267 -927563.625
268 -580049.125
269 -701771.0
270 -629472.125
271 -427543.8125
272 -500689.875
273 -448442.6875
274 -746816.5
275 -758960.5
276 -646558.375
277 -507185.4375
278 -950721.875
279 -558867.8125
280 -820527.5
281 -394924.5625
282 -516505.25
283 -798922.5
284 -768562.625
285 -603348.5625
286 -364423.5
287 -656584.1875
288 -678612.75
289 -609702.9375
290 -572847.5
291 -463693.1875
292 -401279.0
293 -475837.125
294 -645146.3125
295 -404385.5625
296 -663644.625
297 -682990.25
298 -714197.375
299 -473295.375
300 -500407.4375
301 -1003675.125
302 -815867.625
303 -785931.375
304 -923751.0
305 -781412.625
306 -868256.0
307 -756560.0
308 -646982.0
309 -554631.5625
310 -931799.875
311 -441523.4375
312 -615351.3125
313 -580049.125
314 -458327.25
315 -1180185.875
316 -286617.5625
317 -580049.125
318 -6459.59375
319 -755147.875
320 -601230.4375
321 -672964.4375
322 -540651.9375
323 -728318.25
324 -678895.125
325 -1358673.625
326 -364564.75
327 -421613.0
328 -1029657.5
329 -672540.8125
330 -577224.9375
331 -378685.625
332 -684402.375
333 -577931.0
334 -833236.25
335 -697675.875
336 -601230.4375
337 -656725.4375
338 -911889.5
339 -685955.625
340 -492358.5625
341 -717021.5
342 -562680.4375
343 -544746.9375
344 -289865.375
345 -498854.125
346 -493488.25
347 -661385.3125
348 -1250790.25
349 -628766.125
350 -424296.0
351 -784801.625
352 -403538.3125
353 -1102521.125
354 -467082.1875
355 -302291.6875
356 -722669.875
357 -567199.125
358 -473154.1875
359 -551807.375
360 -615351.3125
361 -558867.8125
362 -516505.25
363 -678612.75
364 -510856.875
365 -613656.8125
366 -243972.5625
367 -332934.0
368 -587109.5625
369 -800758.25
370 -251597.8125
371 -735519.875
372 -963289.375
373 -700076.5
374 -788755.5
375 -428955.875
376 -734672.625
377 -589933.75
378 -502384.375
379 -930529.0
380 -792144.5
381 -481909.125
382 -1391998.875
383 -565928.25
384 -401843.8125
385 -445900.9375
386 -561692.0
387 -594170.0
388 -594170.0
389 -608290.875
390 -551525.0
391 -608290.875
392 -580049.125
393 -629472.125
394 -544746.9375
395 -563527.6875
396 -332086.75
397 -629472.125
398 -940131.125
399 -629472.125
400 -601230.4375
401 -587109.5625
402 -587109.5625
403 -161365.5
404 -636532.5625
405 -728318.25
406 -724929.25
407 -388005.375
408 -615351.3125
409 -629472.125
410 -516646.4375
411 -664774.3125
412 -636532.5625
413 -544605.75
414 -961453.75
415 -943237.75
416 -544746.9375

BIN
train/.train.tsv.swp Normal file

Binary file not shown.

2547
train/train.tsv Normal file

File diff suppressed because one or more lines are too long