fix log10 library
This commit is contained in:
parent
cdefe27fd4
commit
feda89c5a9
21038
dev-0/out.tsv
21038
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
24
run.py
24
run.py
@ -1,4 +1,4 @@
|
|||||||
from cmath import log10
|
from math import log10
|
||||||
import csv
|
import csv
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import regex as re
|
import regex as re
|
||||||
@ -90,7 +90,7 @@ def write_output():
|
|||||||
for _, row in dev_data.iterrows():
|
for _, row in dev_data.iterrows():
|
||||||
text = prepare_text(str(row[7]))
|
text = prepare_text(str(row[7]))
|
||||||
words = word_tokenize(text)
|
words = word_tokenize(text)
|
||||||
if len(words) < 3:
|
if len(words) < 4:
|
||||||
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
|
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
|
||||||
else:
|
else:
|
||||||
prediction = predict(words[0], words[1])
|
prediction = predict(words[0], words[1])
|
||||||
@ -100,23 +100,23 @@ def write_output():
|
|||||||
for _, row in test_data.iterrows():
|
for _, row in test_data.iterrows():
|
||||||
text = prepare_text(str(row[7]))
|
text = prepare_text(str(row[7]))
|
||||||
words = word_tokenize(text)
|
words = word_tokenize(text)
|
||||||
if len(words) < 3:
|
if len(words) < 4:
|
||||||
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
|
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
|
||||||
else:
|
else:
|
||||||
prediction = predict(words[0], words[1])
|
prediction = predict(words[0], words[1])
|
||||||
file.write(prediction + "\n")
|
file.write(prediction + "\n")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("Preparing data...")
|
# print("Preparing data...")
|
||||||
train_data = train_data[[6, 7]]
|
# train_data = train_data[[6, 7]]
|
||||||
train_data = pd.concat([train_data, train_labels], axis=1)
|
# train_data = pd.concat([train_data, train_labels], axis=1)
|
||||||
train_data["final"] = train_data[6] + train_data[0] + train_data[7]
|
# train_data["final"] = train_data[6] + train_data[0] + train_data[7]
|
||||||
train = train_data[['final']]
|
# train = train_data[['final']]
|
||||||
|
|
||||||
with open("./train_data.txt", 'a') as f:
|
# with open("./train_data.txt", 'a') as f:
|
||||||
for _, row in train_data.iterrows():
|
# for _, row in train_data.iterrows():
|
||||||
text = prepare_text(str(row["final"]))
|
# text = prepare_text(str(row["final"]))
|
||||||
f.write(text + '\n')
|
# f.write(text + '\n')
|
||||||
|
|
||||||
print("Preparing model...")
|
print("Preparing model...")
|
||||||
os.system('sh ./kenlm.sh')
|
os.system('sh ./kenlm.sh')
|
||||||
|
14828
test-A/out.tsv
14828
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user