import random man = ['silnik', 'windows', 'silnika', 'gb', 'mb', 'meczu', 'pc', 'opony', 'apple', 'iphone', 'zwiastuny', 'hd', 'ubuntu', 'systemu', 'serwer'] woman = ['ciąży', 'miesiączki', 'ciasto', 'ciążę', 'zadowolona', 'ciąża', 'ciazy', 'antykoncepcyjne', 'ginekologa', 'tabletki', 'porodzie', 'mąż', 'miesiączkę', 'krwawienie', 'ciasta'] # stemowanie def steming(man, woman): man_stem = [] woman_stem = [] for i in man: man_stem.append(i[:5].lower()) for i in woman: woman_stem.append(i[:5].lower()) return man_stem, woman_stem def predict(man_stem, woman_stem, dane): res = [] i = 0 for s in dane: man_count = 0 woman_count = 0 t = s.split() for w in t: if w[:5].lower() in man_stem: man_count += 1 elif w[:5].lower() in woman_stem: woman_count += 1 if man_count >= woman_count: res.append('1') elif man_count < woman_count: res.append('0') # else: # res.append(str(round(random.random()))) return res def out_file(res, name): with open(name, 'w') as file: for p in res: file.write(p + "\n") man_dict, woman_dict = steming(man, woman) dane = [] with open('dev-0/in.tsv') as data: for idx, line in enumerate(data.readlines()): dane.append(line.replace('\n', '')) result = predict(man_dict, woman_dict, dane) out_file(result, 'dev-0/out.tsv') dane2 = [] with open('dev-1/in.tsv') as data: for idx, line in enumerate(data.readlines()): dane2.append(line.replace('\n', '')) result = predict(man_dict, woman_dict, dane2) out_file(result, 'dev-1/out.tsv') dane3 = [] with open('test-A/in.tsv') as data: for idx, line in enumerate(data.readlines()): dane3.append(line.replace('\n', '')) result = predict(man_dict, woman_dict, dane3) out_file(result, 'test-A/out.tsv')