2022-04-26 22:10:45 +02:00
|
|
|
# słowniki
|
2022-04-27 12:23:10 +02:00
|
|
|
male = ['gry', 'grac', 'grami', 'auto', 'samochod', 'silnik', 'kolegami', 'brode', 'zarost', 'samochodem', 'piwo',
|
|
|
|
'kolega', 'fura', 'kasa', 'pieniadze', 'autem', 'mechanika', 'pilka', 'meczu', 'mecz', 'nozna', 'sport', 'motor',
|
|
|
|
'skuter', 'pograc', 'alkohol', 'ubuntu', 'systemu', 'serwer', 'linux', 'windows', 'procesor', 'dysk', 'gb',
|
2022-04-26 22:10:45 +02:00
|
|
|
'mb']
|
|
|
|
|
2022-04-27 12:23:10 +02:00
|
|
|
female = ['kolezanka', 'kolezankami', 'koleżanką', 'przyjaciółki', 'przyjaciółka', 'przyjaciółkami',
|
2022-04-26 22:10:45 +02:00
|
|
|
'kosmetyczka', 'okres', 'praca', 'paznokcie', 'włosy', 'usta', 'ciąża', 'lekarz', 'lekarza', 'ciąży',
|
2022-04-27 12:23:10 +02:00
|
|
|
'dziecko', 'dzieci', 'wino', 'sukienka', 'sukienke', 'torebka', 'torebkę', 'makijaż', 'pomadka', 'piersi',
|
|
|
|
'wesele', 'slub', 'weselu', 'mąż', 'maz', 'mężem']
|
2022-04-26 22:10:45 +02:00
|
|
|
|
|
|
|
# klasyfikacja
|
|
|
|
def calculation(path_in, path_out):
|
|
|
|
results = []
|
|
|
|
with open(path_in, encoding='utf-8') as file:
|
2022-04-27 12:23:10 +02:00
|
|
|
#removeAccents(path_in)
|
2022-04-26 22:10:45 +02:00
|
|
|
for line in file.readlines():
|
2022-04-27 12:23:10 +02:00
|
|
|
male_score = 0
|
|
|
|
female_score = 0
|
2022-04-26 22:10:45 +02:00
|
|
|
line = line.replace('.', ' ').replace(',', ' ').lower()
|
|
|
|
words = line.split()
|
|
|
|
male_score = len([w for w in words if w in male])
|
|
|
|
female_score = len([w for w in words if w in female])
|
|
|
|
|
|
|
|
if male_score > female_score:
|
|
|
|
results.append('1')
|
|
|
|
else:
|
|
|
|
results.append('0')
|
|
|
|
|
|
|
|
with open(path_out, 'w') as file:
|
|
|
|
for r in results:
|
|
|
|
file.write(r + '\n')
|
|
|
|
|
2022-04-27 11:17:29 +02:00
|
|
|
#pliki
|
2022-04-26 22:10:45 +02:00
|
|
|
calculation('dev-0/in.tsv', 'dev-0/out.tsv')
|
|
|
|
calculation('dev-1/in.tsv', 'dev-1/out.tsv')
|
|
|
|
calculation('test-A/in.tsv', 'test-A/out.tsv')
|