petite-difference-challenge2/run.py

53 lines
2.0 KiB
Python
Raw Permalink Normal View History

2022-04-25 23:53:47 +02:00
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
male = ['windows', 'gb', 'mb', 'meczu', 'pc', 'opony', 'apple', 'iphone', 'zwiast', 'hd', 'ubunt',
'system', 'serwer', "youtub", "sfd", "kfd", "elektr", "autoce", "dobrep",'merced', 'bmw',
'audi', 'porsch', 'gry', 'gra','gram' 'cs', 'counte', 'piłka', 'mecz', 'gol', 'bramka', 'linux',
'robota','felga','lagi' 'żona', 'żona', 'żony', 'żonie', 'żoną', 'zona', 'zony', 'zonie', 'komput', 'inform'
'sserwer', 'ziom', 'ziomków', 'ziomkow', 'kumpel', 'kolega', 'kolegą', 'kolegi', 'pad'
]
female = ['ciąży', 'miesią', 'ciasto', 'ciążę', 'zadowo', 'ciąża', 'ciazy', 'antyko', 'gineko',
'tablet', 'porodz', 'mąż', 'miesią', 'krwawi', 'ciasta', 'sukien', 'podpas', 'szmink',
'maz', 'męża', 'męza', 'mąż', 'chłopak', 'szpilk'
]
def prediction(male,female, in_file):
results = []
with open(in_file, encoding='utf-8',) as file:
for line in file.readlines():
text = line.split("\t")[0].strip()
text = text.replace(",","").replace(".","").replace("/","").replace("","").replace(":","").lower()
stem_words = [word[:6] for word in text.split()]
man_score = len([w for w in stem_words if w in male])
girl_score = len([w for w in stem_words if w in female])
if man_score > girl_score:
results.append('1')
else:
results.append('0')
return results
def out_file(result, out_file):
with open(out_file, 'w') as file:
for r in result:
file.write(r + "\n")
result = prediction(male,female,'dev-0/in.tsv')
out_file(result, 'dev-0/out.tsv')
result = prediction(male,female,'dev-1/in.tsv')
out_file(result, 'dev-1/out.tsv')
result = prediction(male,female,'test-A/in.tsv')
out_file(result, 'test-A/out.tsv')