Fixed problem with code

This commit is contained in:
Bartusiak 2020-05-25 18:36:38 +02:00
parent 30328fb77d
commit 4d2a11f59e
5 changed files with 359216 additions and 9595 deletions

View File

@ -9,7 +9,7 @@ from spacy.lang.en import English
def create_train(input,output1,output0):
create_dictionary = pd.read_csv(input, delimiter="\t", header=None, names=["num","txt"], error_bad_lines=False, skip_blank_lines=False)
create_dictionary = create_dictionary[:200000]
create_dictionary = create_dictionary[:900000]
values_1 = (create_dictionary['txt'].where(create_dictionary['num']==1)).dropna()
values_0 = (create_dictionary['txt'].where(create_dictionary['num']==0)).dropna()
values_1.to_csv(output1,header=None, index=None)

File diff suppressed because it is too large Load Diff

View File

@ -4,12 +4,12 @@ import pandas as pd
def read_file(input_file):
create_file = pd.read_csv(input_file, delimiter=" ", header=None, names=["%%", "Nw","PP","PPwp","Nbo","Noov","OOV"], error_bad_lines=True,
skip_blank_lines=True)
create_file = create_file[:200000]
create_file = create_file[:900000]
return create_file
def check_results(input_negative,input_positive,output):
f = open(output,"w")
for i,j in zip(input_negative,input_positive):
for i,j in zip(input_negative[:-1],input_positive[:-1]):
if (i>j):
f.write("0.3\n")
else:
@ -22,9 +22,9 @@ def replacer(input_file_negativ,input_file_positive):
out_p = read_file(input_file_positive)
# --- Replace prefixes using str.replace ---- #
out_n_sent_nw = out_n['PP'].str.replace('sent_PP=', "")
out_n_sent_nw = out_n_sent_nw.str.replace('PP=', "")
#out_n_sent_nw = out_n_sent_nw.str.replace('PP=', "")
out_p_sent_nw = out_p['PP'].str.replace('sent_PP=', "")
out_p_sent_nw = out_p_sent_nw.str.replace('PP=', "")
#out_p_sent_nw = out_p_sent_nw.str.replace('PP=', "")
return out_n_sent_nw,out_p_sent_nw

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff