This commit is contained in:
Cezary 2022-04-27 00:40:17 +02:00
parent b8eed9b0b5
commit 7950c37b28
5 changed files with 428566 additions and 3 deletions

137314
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

156606
dev-1/out.tsv Normal file

File diff suppressed because it is too large Load Diff

31
run.py
View File

@ -1,3 +1,28 @@
with open('dev-0/in.tsv', encoding='utf8') as f_in, open('dev-0/uot.tsv', 'w') as f_out:
for line in f_in:
f_out.write('0.8\n')
from collections import Counter
man_words = ['komp', 'kompa', 'komputer', 'wtrysk', 'certyfikat', 'żona', 'żony', 'żonie', 'mecz', 'gra', 'grać', 'gry',
'android', 'linux', 'samochod', 'samochód', 'silnik', 'silnika', 'gb', 'pc', 'gpl', 'serwer']
women_words = ['<3', ':d', ';)', 'ciąża', 'ginekolog', 'ginekologa', 'szminka', 'omg', 'mąż', 'dziecko', 'okres',
'chłopak', 'ciąża', 'ciąży', 'sukienka', 'obcasy', 'tampon', 'kino', 'kina', 'odchudzać', 'sukienka',
'szminka', 'szminki', 'obcas', 'tabletki', 'antykoncepcyjne', 'antykoncepcyjnym', 'antykoncepcyjna',
'porod', 'poród', 'ginekolog', 'ginekologa', 'torebka', 'torebke']
def func(file_in_name, file_out_name):
with open(file_in_name, encoding='utf8') as f_in, open(file_out_name, 'w') as f_out:
for line in f_in:
words = line.replace('.', ' ').replace(',', ' ').replace(';', ' ').lower().split()
words_count = Counter(words)
man_words_count = sum([words_count[x] for x in man_words])
woman_words_count = sum([words_count[x] for x in women_words])
if man_words_count > woman_words_count:
f_out.write('1\n')
else:
f_out.write('0\n')
func('dev-0/in.tsv', 'dev-0/out.tsv')
func('dev-1/in.tsv', 'dev-1/out.tsv')
func('test-A/in.tsv', 'test-A/out.tsv')

134618
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.