38 lines
1.4 KiB
Python
38 lines
1.4 KiB
Python
|
import random
|
||
|
|
||
|
|
||
|
women_word_list = ["mąż", "fryzjer", "kosmety", "biżuter", "sukienk", "polk", "kolczy", "rodzin", "obcas",
|
||
|
"bransolet", "spink", "torebk", "szmink", "kobiet", "koleżan", "kuchni", "gotowa", "przepis",
|
||
|
"ciast", "ciąż", "miesiączk"]
|
||
|
men_word_list = ["samoch", "kompute", "pc", "sport", "km", "windows", "paliw", "kierownic", "silnik", "opon", "piw",
|
||
|
"koleg", "śrub", "mecz", "system", "serwer"]
|
||
|
|
||
|
data = []
|
||
|
with open("j:\Desktop\ekstrakcjacw5\petite-difference-challenge2\\test-A\in.tsv", "r", encoding="UTF-8") as read_file:
|
||
|
counter = 0
|
||
|
for line in read_file.readlines():
|
||
|
is_written = False
|
||
|
counter += 1
|
||
|
for word in men_word_list:
|
||
|
if word in line:
|
||
|
data.append("1\n")
|
||
|
is_written = True
|
||
|
break
|
||
|
|
||
|
if is_written is True:
|
||
|
continue
|
||
|
|
||
|
for word in women_word_list:
|
||
|
if word in line:
|
||
|
data.append("0\n")
|
||
|
is_written = True
|
||
|
break
|
||
|
|
||
|
if is_written is True:
|
||
|
continue
|
||
|
else:
|
||
|
data.append(f"{(random.randint(0, 1))}\n")
|
||
|
|
||
|
with open("j:\Desktop\ekstrakcjacw5\petite-difference-challenge2\\test-A\out.tsv", "w", encoding="UTF-8") as output_file:
|
||
|
output_file.writelines(data)
|