petite
This commit is contained in:
parent
b775a221e6
commit
703fadc3d4
137315
dev-0/out.tsv
Normal file
137315
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
156607
dev-1/out.tsv
Normal file
156607
dev-1/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
60
run.py
Normal file
60
run.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
import csv
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
#ładowanie danych
|
||||||
|
# dane 0
|
||||||
|
dane_in_0 = []
|
||||||
|
with open('dev-0/in.tsv') as data:
|
||||||
|
for idx, line in enumerate(data.readlines()):
|
||||||
|
dane_in_0.append(line.replace('\n', ''))
|
||||||
|
|
||||||
|
# dane 1
|
||||||
|
dane_in_1 = []
|
||||||
|
with open('dev-1/in.tsv') as data:
|
||||||
|
for idx, line in enumerate(data.readlines()):
|
||||||
|
dane_in_1.append(line.replace('\n', ''))
|
||||||
|
|
||||||
|
# dane A
|
||||||
|
dane_in_A = []
|
||||||
|
with open('dev-1/in.tsv') as data:
|
||||||
|
for idx, line in enumerate(data.readlines()):
|
||||||
|
dane_in_A.append(line.replace('\n', ''))
|
||||||
|
|
||||||
|
|
||||||
|
#słowa damskie i męskie
|
||||||
|
m = ['samochod', 'bmw', 'porshe', 'mercedes' ,'mecz', 'piłka', 'gol', 'windows', 'linux', 'serwer', 'ubuntu', 'żona', 'dziewczyna', 'siłownia', 'klata' , 'biceps', 'dzik', 'kumpel', 'ziom', 'gry', 'lol', 'cs', 'sport', 'piwo', 'wódka', 'komputer', 'sport', 'dres']
|
||||||
|
w = ['sukienka', 'kwiaty', 'paznokcie', 'koleżanka', 'koleżanki', 'przyjaciółka', 'przyjaciółki', 'włosy', 'lakier', 'hybryda', 'ginekolog', 'okres', 'torebka', 'torebki','kosmetycznka', 'makijaż', 'ciasto', 'mąż', 'lekarz', 'ciąża', 'krwawienie', 'spódniczka', 'fitness', 'ciuchy']
|
||||||
|
|
||||||
|
#stemowanie i normalizacja damskich i męskich wyrazów do 6 znaku
|
||||||
|
m = [x[:6].lower() for x in m]
|
||||||
|
w = [x[:6].lower() for x in w]
|
||||||
|
|
||||||
|
def decyzja(dane_in, m, w):
|
||||||
|
res = []
|
||||||
|
for line in dane_in:
|
||||||
|
m_words = 0
|
||||||
|
w_words = 0
|
||||||
|
t = str(line).lower().split()
|
||||||
|
for i in t:
|
||||||
|
if i in m:
|
||||||
|
m_words += 1
|
||||||
|
elif i in w:
|
||||||
|
w_words += 1
|
||||||
|
if m_words >= w_words:
|
||||||
|
res.append(1)
|
||||||
|
else:
|
||||||
|
res.append(0)
|
||||||
|
return res
|
||||||
|
|
||||||
|
#print("dev-0")
|
||||||
|
tab_resul_0 = pd.DataFrame(decyzja(dane_in_0, m, w))
|
||||||
|
tab_resul_0.to_csv('dev-0/out.tsv', sep = '\t')
|
||||||
|
|
||||||
|
#print("dev-1")
|
||||||
|
tab_resul_1 = pd.DataFrame(decyzja(dane_in_1, m, w))
|
||||||
|
tab_resul_1.to_csv('dev-1/out.tsv', sep = '\t')
|
||||||
|
|
||||||
|
#print("test-A")
|
||||||
|
tab_resul_A = pd.DataFrame(decyzja(dane_in_A, m, w))
|
||||||
|
tab_resul_A.to_csv('test-A/out.tsv', sep = '\t')
|
||||||
|
|
156607
test-A/out.tsv
Normal file
156607
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user