fix predict.py

This commit is contained in:
s426135 2020-03-22 12:56:42 +01:00
parent 3bb3c26b5d
commit 773683e7d4
9 changed files with 272 additions and 268 deletions

Binary file not shown.

Binary file not shown.

View File

@ -55,7 +55,7 @@
P P
P P
S S
S P
S S
S S
P P
@ -101,7 +101,7 @@
S S
P P
S S
P S
S S
P P
S S
@ -148,16 +148,16 @@
P P
S S
S S
S
S
P P
S S
P P
S S
P P
S S
P
S S
P P
P
S S
P P
S S
@ -200,7 +200,7 @@
P P
S S
P P
P S
S S
S S
P P
@ -309,7 +309,7 @@
S S
P P
S S
P S
S S
S S
P P
@ -319,7 +319,7 @@
S S
S S
S S
P S
P P
S S
P P
@ -515,7 +515,7 @@
S S
P P
S S
P S
S S
S S
S S
@ -560,7 +560,7 @@
S S
S S
S S
P S
S S
P P
S S
@ -689,7 +689,7 @@
P P
S S
S S
S P
S S
P P
S S
@ -739,7 +739,7 @@
S S
S S
S S
S P
P P
S S
S S
@ -749,7 +749,7 @@
S S
S S
S S
P S
P P
S S
S S
@ -763,7 +763,7 @@
S S
P P
S S
S P
S S
P P
S S
@ -902,7 +902,7 @@
P P
P P
P P
S P
P P
P P
P P
@ -1042,7 +1042,7 @@
S S
S S
S S
P S
P P
S S
S S
@ -1069,7 +1069,7 @@
S S
S S
P P
S P
S S
S S
S S
@ -1085,7 +1085,7 @@
S S
S S
S S
S P
S S
P P
S S
@ -1220,7 +1220,7 @@
S S
S S
S S
S P
P P
S S
P P
@ -1252,7 +1252,7 @@
P P
S S
S S
P S
S S
S S
S S
@ -1461,7 +1461,7 @@
S S
S S
P P
S P
S S
S S
S S
@ -1528,7 +1528,7 @@
P P
P P
P P
P S
S S
S S
S S
@ -1543,7 +1543,7 @@
S S
S S
S S
P S
S S
S S
S S
@ -1591,7 +1591,7 @@
S S
P P
P P
P S
S S
S S
S S
@ -1863,7 +1863,7 @@
S S
P P
S S
S P
S S
P P
S S
@ -1900,7 +1900,7 @@
P P
P P
S S
S P
S S
S S
S S
@ -1979,7 +1979,7 @@
P P
P P
S S
S P
S S
S S
S S
@ -2018,7 +2018,7 @@
P P
S S
S S
P S
S S
P P
S S
@ -2051,7 +2051,7 @@
P P
P P
S S
S P
P P
P P
P P
@ -2101,7 +2101,7 @@
S S
S S
S S
S P
S S
S S
S S
@ -2160,7 +2160,7 @@
P P
S S
S S
S P
P P
S S
S S
@ -2171,7 +2171,7 @@
S S
S S
S S
S P
P P
S S
P P
@ -2326,7 +2326,7 @@
S S
S S
S S
P S
S S
S S
S S
@ -2516,7 +2516,7 @@
S S
S S
S S
P S
P P
S S
P P
@ -2608,7 +2608,7 @@
S S
S S
S S
P S
P P
P P
P P
@ -2672,7 +2672,7 @@
S S
S S
S S
P S
S S
S S
S S
@ -2690,7 +2690,7 @@
S S
S S
P P
P S
S S
S S
P P
@ -2825,7 +2825,7 @@
P P
S S
S S
S P
S S
S S
P P
@ -2908,7 +2908,7 @@
P P
S S
S S
P S
P P
S S
P P
@ -2929,7 +2929,7 @@
S S
P P
S S
P S
S S
S S
P P
@ -2944,7 +2944,7 @@
S S
S S
P P
S P
S S
S S
S S
@ -3060,7 +3060,7 @@
S S
S S
S S
P S
S S
S S
S S
@ -3103,7 +3103,7 @@
S S
P P
S S
P S
S S
S S
P P
@ -3165,7 +3165,7 @@
S S
P P
S S
S P
S S
S S
P P
@ -3202,7 +3202,7 @@
S S
P P
P P
S P
S S
S S
S S
@ -3282,7 +3282,7 @@
P P
S S
S S
P S
S S
S S
S S
@ -3344,7 +3344,7 @@
S S
S S
S S
P S
S S
S S
S S
@ -3400,7 +3400,7 @@
S S
S S
S S
P S
S S
P P
P P
@ -3469,11 +3469,11 @@
P P
S S
P P
S P
P P
S S
S S
P S
P P
S S
S S
@ -3507,13 +3507,13 @@
P P
S S
P P
S
P P
S P
S S
S S
P P
P P
P
S S
P P
P P
@ -3579,7 +3579,7 @@
S S
S S
P P
S P
P P
S S
P P
@ -3612,7 +3612,7 @@
P P
S S
S S
P S
P P
S S
P P
@ -3632,7 +3632,7 @@
S S
S S
S S
P S
P P
S S
S S
@ -3891,7 +3891,7 @@
S S
P P
P P
P S
S S
S S
P P
@ -3918,11 +3918,11 @@
P P
P P
S S
S P
S
S S
S S
P P
P
S S
P P
P P
@ -3937,7 +3937,7 @@
S S
S S
S S
P S
S S
S S
S S
@ -3974,7 +3974,7 @@
S S
P P
S S
P S
S S
S S
S S
@ -4144,16 +4144,16 @@
S S
S S
S S
P
S S
S S
S S
S S
S S
S
P
P P
P P
P P
S
P P
S S
S S
@ -4206,7 +4206,7 @@
S S
S S
S S
P S
P P
P P
P P
@ -4293,7 +4293,7 @@
P P
P P
S S
S P
S S
S S
P P
@ -4391,7 +4391,7 @@
P P
S S
P P
S P
S S
P P
S S
@ -4452,9 +4452,9 @@
P P
P P
S S
P
S S
P P
P
S S
S S
S S
@ -4640,7 +4640,7 @@
P P
S S
S S
P S
P P
S S
S S
@ -4752,7 +4752,7 @@
P P
S S
S S
P S
P P
S S
S S
@ -4805,7 +4805,7 @@
S S
S S
P P
S P
S S
S S
P P
@ -5034,14 +5034,14 @@
S S
S S
P P
P
S S
S S
S S
S S
S S
P
S S
P
P
S S
S S
S S
@ -5113,7 +5113,7 @@
S S
P P
S S
S P
S S
S S
S S
@ -5211,10 +5211,10 @@
P P
P P
S S
P
S S
S S
P S
S
P P
S S
S S

1 S
55 P
56 P
57 S
58 S P
59 S
60 S
61 P
101 S
102 P
103 S
104 P S
105 S
106 P
107 S
148 P
149 S
150 S
S
S
151 P
152 S
153 P
154 S
155 P
156 S
157 P
158 S
159 P
160 P
161 S
162 P
163 S
200 P
201 S
202 P
203 P S
204 S
205 S
206 P
309 S
310 P
311 S
312 P S
313 S
314 S
315 P
319 S
320 S
321 S
322 P S
323 P
324 S
325 P
515 S
516 P
517 S
518 P S
519 S
520 S
521 S
560 S
561 S
562 S
563 P S
564 S
565 P
566 S
689 P
690 S
691 S
692 S P
693 S
694 P
695 S
739 S
740 S
741 S
742 S P
743 P
744 S
745 S
749 S
750 S
751 S
752 P S
753 P
754 S
755 S
763 S
764 P
765 S
766 S P
767 S
768 P
769 S
902 P
903 P
904 P
905 S P
906 P
907 P
908 P
1042 S
1043 S
1044 S
1045 P S
1046 P
1047 S
1048 S
1069 S
1070 S
1071 P
1072 S P
1073 S
1074 S
1075 S
1085 S
1086 S
1087 S
1088 S P
1089 S
1090 P
1091 S
1220 S
1221 S
1222 S
1223 S P
1224 P
1225 S
1226 P
1252 P
1253 S
1254 S
1255 P S
1256 S
1257 S
1258 S
1461 S
1462 S
1463 P
1464 S P
1465 S
1466 S
1467 S
1528 P
1529 P
1530 P
1531 P S
1532 S
1533 S
1534 S
1543 S
1544 S
1545 S
1546 P S
1547 S
1548 S
1549 S
1591 S
1592 P
1593 P
1594 P S
1595 S
1596 S
1597 S
1863 S
1864 P
1865 S
1866 S P
1867 S
1868 P
1869 S
1900 P
1901 P
1902 S
1903 S P
1904 S
1905 S
1906 S
1979 P
1980 P
1981 S
1982 S P
1983 S
1984 S
1985 S
2018 P
2019 S
2020 S
2021 P S
2022 S
2023 P
2024 S
2051 P
2052 P
2053 S
2054 S P
2055 P
2056 P
2057 P
2101 S
2102 S
2103 S
2104 S P
2105 S
2106 S
2107 S
2160 P
2161 S
2162 S
2163 S P
2164 P
2165 S
2166 S
2171 S
2172 S
2173 S
2174 S P
2175 P
2176 S
2177 P
2326 S
2327 S
2328 S
2329 P S
2330 S
2331 S
2332 S
2516 S
2517 S
2518 S
2519 P S
2520 P
2521 S
2522 P
2608 S
2609 S
2610 S
2611 P S
2612 P
2613 P
2614 P
2672 S
2673 S
2674 S
2675 P S
2676 S
2677 S
2678 S
2690 S
2691 S
2692 P
2693 P S
2694 S
2695 S
2696 P
2825 P
2826 S
2827 S
2828 S P
2829 S
2830 S
2831 P
2908 P
2909 S
2910 S
2911 P S
2912 P
2913 S
2914 P
2929 S
2930 P
2931 S
2932 P S
2933 S
2934 S
2935 P
2944 S
2945 S
2946 P
2947 S P
2948 S
2949 S
2950 S
3060 S
3061 S
3062 S
3063 P S
3064 S
3065 S
3066 S
3103 S
3104 P
3105 S
3106 P S
3107 S
3108 S
3109 P
3165 S
3166 P
3167 S
3168 S P
3169 S
3170 S
3171 P
3202 S
3203 P
3204 P
3205 S P
3206 S
3207 S
3208 S
3282 P
3283 S
3284 S
3285 P S
3286 S
3287 S
3288 S
3344 S
3345 S
3346 S
3347 P S
3348 S
3349 S
3350 S
3400 S
3401 S
3402 S
3403 P S
3404 S
3405 P
3406 P
3469 P
3470 S
3471 P
3472 S P
3473 P
3474 S
3475 S
3476 P S
3477 P
3478 S
3479 S
3507 P
3508 S
3509 P
S
3510 P
3511 S P
3512 S
3513 S
3514 P
3515 P
3516 P
3517 S
3518 P
3519 P
3579 S
3580 S
3581 P
3582 S P
3583 P
3584 S
3585 P
3612 P
3613 S
3614 S
3615 P S
3616 P
3617 S
3618 P
3632 S
3633 S
3634 S
3635 P S
3636 P
3637 S
3638 S
3891 S
3892 P
3893 P
3894 P S
3895 S
3896 S
3897 P
3918 P
3919 P
3920 S
3921 S P
S
3922 S
3923 S
3924 P
3925 P
3926 S
3927 P
3928 P
3937 S
3938 S
3939 S
3940 P S
3941 S
3942 S
3943 S
3974 S
3975 P
3976 S
3977 P S
3978 S
3979 S
3980 S
4144 S
4145 S
4146 S
4147 P
4148 S
4149 S
4150 S
4151 S
4152 S
S
P
4153 P
4154 P
4155 P
4156 S
4157 P
4158 S
4159 S
4206 S
4207 S
4208 S
4209 P S
4210 P
4211 P
4212 P
4293 P
4294 P
4295 S
4296 S P
4297 S
4298 S
4299 P
4391 P
4392 S
4393 P
4394 S P
4395 S
4396 P
4397 S
4452 P
4453 P
4454 S
P
4455 S
4456 P
4457 P
4458 S
4459 S
4460 S
4640 P
4641 S
4642 S
4643 P S
4644 P
4645 S
4646 S
4752 P
4753 S
4754 S
4755 P S
4756 P
4757 S
4758 S
4805 S
4806 S
4807 P
4808 S P
4809 S
4810 S
4811 P
5034 S
5035 S
5036 P
P
5037 S
5038 S
5039 S
5040 S
5041 S
P
5042 S
5043 P
5044 P
5045 S
5046 S
5047 S
5113 S
5114 P
5115 S
5116 S P
5117 S
5118 S
5119 S
5211 P
5212 P
5213 S
P
5214 S
5215 S
5216 P S
5217 S
5218 P
5219 S
5220 S

1
naive.test-A.md5 Normal file
View File

@ -0,0 +1 @@
e412b617206095df98ac606360b222d0 naive_base_model.pkl

Binary file not shown.

1
out.md5 Normal file
View File

@ -0,0 +1 @@
ef6348d2025d24efeb647366abf2102f test-A/out.tsv

View File

@ -6,10 +6,10 @@ import re
def clear_tokens(tokens): def clear_tokens(tokens):
tokens = tokens.replace('\\n', ' ') tokens = tokens.replace('\\n', ' ')
tokens = re.sub(r'\(((http)|(https)).*((\.com)|(\.net)|(\.jpg)|(\.html))\)'," ", tokens) tokens = re.sub(r'\(((http)|(https)).*((\.com)|(\.net)|(\.jpg)|(\.html))\)'," ", tokens)
tokens = re.sub(r'[\n\&\"\?\\\'\*\[\]\,\;\.\=\+\(\)\!\/\:\`\~\%\^\$\#\@]+', ' ', tokens) tokens = re.sub(r'[\n\&\"\?\\\'\*\[\]\,\;\.\=\+\(\)\!\/\:\`\~\%\^\$\#\@\\\\±]+', ' ', tokens)
tokens = re.sub(r'[\.\-][\.\-]+', ' ', tokens) tokens = re.sub(r'[\.\-][\.\-]+', ' ', tokens)
tokens = re.sub(r'[0-9]+', ' ', tokens) tokens = re.sub(r'œ|·', '', tokens)
tokens = re.sub(r' +', ' ', tokens) tokens = re.sub(r' +', ' ', tokens)
return tokens return tokens
@ -33,7 +33,7 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
product += paranormal_class_logprob product += paranormal_class_logprob
probs[abs(product)] = class_ probs[abs(product)] = class_
#print(probs) #print(probs)
# mozna jeszcze zrobic aby bralo kluczowe slowa i wtedy decydowalo ze paranormal
return probs[max(probs.keys())] return probs[max(probs.keys())]
@ -43,10 +43,10 @@ def main():
paranormal_class_logprob = pickle_list[0] paranormal_class_logprob = pickle_list[0]
sceptic_class_logprob = pickle_list[1] sceptic_class_logprob = pickle_list[1]
word_logprobs = pickle_list[2] word_logprobs = pickle_list[2]
in_file = "test-A/in.tsv" #in_file = "test-A/in.tsv"
#in_file = "dev-0/in.tsv" in_file = "dev-0/in.tsv"
out_file = "test-A/out.tsv" #out_file = "test-A/out.tsv"
#out_file = "dev-0/out.tsv" out_file = "dev-0/out.tsv"
print (f"in {in_file}") print (f"in {in_file}")
print (f"out {out_file}") print (f"out {out_file}")
with open(in_file) as in_f, open(out_file, 'w') as out_f: with open(in_file) as in_f, open(out_file, 'w') as out_f:

File diff suppressed because it is too large Load Diff

View File

@ -25,9 +25,11 @@ def clear_tokens(tokens):
tokens = tokens.replace('\\n', ' ') tokens = tokens.replace('\\n', ' ')
# delete links, special characters, kropki, and \n # delete links, special characters, kropki, and \n
tokens = re.sub(r'\(((http)|(https)).*((\.com)|(\.net)|(\.jpg)|(\.html))\)'," ", tokens) tokens = re.sub(r'\(((http)|(https)).*((\.com)|(\.net)|(\.jpg)|(\.html))\)'," ", tokens)
tokens = re.sub(r'[\n\&\"\?\\\'\*\[\]\,\;\.\=\+\(\)\!\/\:\`\~\%\^\$\#\@]+', ' ', tokens) tokens = re.sub(r'(|\-|\_)([a-z]+(\-|\_))+[a-z]+(|\-|\_)', ' ', tokens)
tokens = re.sub(r'[\n\&\"\?\\\'\*\[\]\,\;\.\=\+\(\)\!\/\:\`\~\%\^\$\#\@\\\\±]+', ' ', tokens)
tokens = re.sub(r'[\.\-][\.\-]+', ' ', tokens) tokens = re.sub(r'[\.\-][\.\-]+', ' ', tokens)
tokens = re.sub(r'[0-9]+', ' ', tokens) tokens = re.sub(r'[0-9]+', ' ', tokens)
tokens = re.sub(r'œ|·', '', tokens)
tokens = re.sub(r' +', ' ', tokens) tokens = re.sub(r' +', ' ', tokens)
return tokens return tokens
@ -67,10 +69,10 @@ def calc_word_logprobs(word_counts):
return word_logprobs return word_logprobs
def main(): def main():
expected = './train/expected.tsv' #expected = './train/expected.tsv'
#expected = './dev-0/expected.tsv' expected = './dev-0/expected.tsv'
in_f = './train/in.tsv' #in_f = './train/in.tsv'
#in_f = './dev-0/in.tsv' in_f = './dev-0/in.tsv'
print (f"expected {expected}") print (f"expected {expected}")
print (f"in {in_f}") print (f"in {in_f}")
paranormal_class_lgprob, skeptic_class_logprob = calc_class_logprob(expected) paranormal_class_lgprob, skeptic_class_logprob = calc_class_logprob(expected)