2022-04-27 00:40:17 +02:00
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
man_words = ['komp', 'kompa', 'komputer', 'wtrysk', 'certyfikat', 'żona', 'żony', 'żonie', 'mecz', 'gra', 'grać', 'gry',
|
2022-04-27 00:56:48 +02:00
|
|
|
'android', 'windows', 'linux', 'samochod', 'samochód', 'silnik', 'silnika', 'gb', 'pc', 'gpl', 'serwer',
|
|
|
|
'piwo', 'laska', 'bmw', 'meczu', 'opony', 'siłka', 'elo', 'auto']
|
2022-04-27 00:40:17 +02:00
|
|
|
|
|
|
|
women_words = ['<3', ':d', ';)', 'ciąża', 'ginekolog', 'ginekologa', 'szminka', 'omg', 'mąż', 'dziecko', 'okres',
|
|
|
|
'chłopak', 'ciąża', 'ciąży', 'sukienka', 'obcasy', 'tampon', 'kino', 'kina', 'odchudzać', 'sukienka',
|
|
|
|
'szminka', 'szminki', 'obcas', 'tabletki', 'antykoncepcyjne', 'antykoncepcyjnym', 'antykoncepcyjna',
|
2022-04-27 00:56:48 +02:00
|
|
|
'porod', 'poród', 'ginekolog', 'ginekologa', 'torebka', 'torebke', 'kocham', 'podpaski', 'podpaska',
|
|
|
|
'ciasto']
|
2022-04-27 00:40:17 +02:00
|
|
|
|
|
|
|
|
|
|
|
def func(file_in_name, file_out_name):
|
|
|
|
with open(file_in_name, encoding='utf8') as f_in, open(file_out_name, 'w') as f_out:
|
|
|
|
for line in f_in:
|
|
|
|
words = line.replace('.', ' ').replace(',', ' ').replace(';', ' ').lower().split()
|
|
|
|
words_count = Counter(words)
|
|
|
|
man_words_count = sum([words_count[x] for x in man_words])
|
|
|
|
woman_words_count = sum([words_count[x] for x in women_words])
|
|
|
|
|
|
|
|
if man_words_count > woman_words_count:
|
|
|
|
f_out.write('1\n')
|
|
|
|
else:
|
|
|
|
f_out.write('0\n')
|
|
|
|
|
|
|
|
|
|
|
|
func('dev-0/in.tsv', 'dev-0/out.tsv')
|
|
|
|
func('dev-1/in.tsv', 'dev-1/out.tsv')
|
|
|
|
func('test-A/in.tsv', 'test-A/out.tsv')
|