37 lines
1.3 KiB
Python
37 lines
1.3 KiB
Python
|
|
import pandas as pd
|
|
|
|
def read_file(input_file):
|
|
create_file = pd.read_csv(input_file, delimiter=" ", header=None, names=["%%", "Nw","PP","PPwp","Nbo","Noov","OOV"], error_bad_lines=True,
|
|
skip_blank_lines=True)
|
|
create_file = create_file[:900000]
|
|
return create_file
|
|
|
|
def check_results(input_negative,input_positive,output):
|
|
f = open(output,"w")
|
|
for i,j in zip(input_negative[:-1],input_positive[:-1]):
|
|
if (i>j):
|
|
f.write("0\n")
|
|
else:
|
|
f.write("1\n")
|
|
f.close()
|
|
return f
|
|
|
|
def replacer(input_file_negativ,input_file_positive):
|
|
out_n = read_file(input_file_negativ)
|
|
out_p = read_file(input_file_positive)
|
|
# --- Replace prefixes using str.replace ---- #
|
|
out_n_sent_nw = out_n['PP'].str.replace('sent_PP=', "")
|
|
#out_n_sent_nw = out_n_sent_nw.str.replace('PP=', "")
|
|
out_p_sent_nw = out_p['PP'].str.replace('sent_PP=', "")
|
|
#out_p_sent_nw = out_p_sent_nw.str.replace('PP=', "")
|
|
|
|
return out_n_sent_nw,out_p_sent_nw
|
|
|
|
out_n_sent_nw,out_p_sent_nw=replacer("stat/dev-0/out_n.tsv","stat/dev-0/out_p.tsv")
|
|
check_results(out_n_sent_nw,out_p_sent_nw,"dev-0/out.tsv")
|
|
|
|
out_n_sent_nw,out_p_sent_nw=replacer("stat/test-A/out_n.tsv","stat/test-A/out_p.tsv")
|
|
check_results(out_n_sent_nw,out_p_sent_nw,"test-A/out.tsv")
|
|
|