From a04d3cedb2bb4a2d85ab225d4805d8ed10087d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20Parafin=CC=81ski?= Date: Tue, 26 Apr 2022 16:09:38 +0200 Subject: [PATCH] add solution --- script.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 script.py diff --git a/script.py b/script.py new file mode 100644 index 0000000..6c1f4dd --- /dev/null +++ b/script.py @@ -0,0 +1,72 @@ +import random + +man = ['silnik', 'windows', 'silnika', 'gb', 'mb', 'meczu', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', + 'ubuntu', 'systemu', 'serwer'] +woman = ['ciąży', 'miesiączki', 'ciasto', 'ciążę', 'zadowolona', 'ciąża', 'ciazy', 'antykoncepcyjne', 'ginekologa', + 'tabletki', 'porodzie', 'mąż', 'miesiączkę', 'krwawienie', 'ciasta'] + + +# stemowanie +def steming(man, woman): + man_stem = [] + woman_stem = [] + for i in man: + man_stem.append(i[:5].lower()) + for i in woman: + woman_stem.append(i[:5].lower()) + return man_stem, woman_stem + + +def predict(man_stem, woman_stem, dane): + res = [] + i = 0 + for s in dane: + man_count = 0 + woman_count = 0 + t = s.split() + + for w in t: + if w[:5].lower() in man_stem: + man_count += 1 + elif w[:5].lower() in woman_stem: + woman_count += 1 + if man_count >= woman_count: + res.append('1') + elif man_count < woman_count: + res.append('0') + # else: + # res.append(str(round(random.random()))) + + return res + + +def out_file(res, name): + with open(name, 'w') as file: + for p in res: + file.write(p + "\n") + + +man_dict, woman_dict = steming(man, woman) +dane = [] +with open('dev-0/in.tsv') as data: + for idx, line in enumerate(data.readlines()): + dane.append(line.replace('\n', '')) + +result = predict(man_dict, woman_dict, dane) +out_file(result, 'dev-0/out.tsv') + +dane2 = [] +with open('dev-1/in.tsv') as data: + for idx, line in enumerate(data.readlines()): + dane2.append(line.replace('\n', '')) + +result = predict(man_dict, woman_dict, dane2) +out_file(result, 'dev-1/out.tsv') + +dane3 = [] +with open('test-A/in.tsv') as data: + for idx, line in enumerate(data.readlines()): + dane3.append(line.replace('\n', '')) + +result = predict(man_dict, woman_dict, dane3) +out_file(result, 'test-A/out.tsv') \ No newline at end of file