gitignore update + run.py reformat

This commit is contained in:
MatOgr 2023-06-14 22:50:53 +02:00
parent 1ff6ba7d37
commit 8b58507f52
2 changed files with 18 additions and 21 deletions

3
.gitignore vendored
View File

@ -6,3 +6,6 @@
*.o
.DS_Store
.token
scripts/*
solution/*
models/*

36
run.py
View File

@ -1,24 +1,18 @@
#!/usr/bin/python3
import sys
from scripts.language_utils import predict_gaps, train_model, load_lexicon
for line in sys.stdin:
_, _, _, _, _, _, left_context, right_context = line.split("\t")
previous_word = left_context.split()[-1]
next_word = right_context.split()[0]
if previous_word == "United" and next_word == "of":
print("States:0.9 :0.1")
elif previous_word == "used":
print("to:0.4 it:0.3 as:0.2 :0.1")
elif previous_word.lower() == "in":
print("the:0.7 a:0.1 an:0.1 :0.1")
elif previous_word.lower() == "i":
print("am:0.3 was:0.3 have:0.3 :0.1")
elif previous_word.lower() in ["he", "she", "it"]:
print("is:0.3 was:0.3 has:0.3 :0.1")
elif previous_word.lower() in "bring":
print("something:0.3 it:0.3 them:0.3 :0.1")
else:
print("the:0.5 a:0.2 an:0.2 :0.1")
WORDS_LIMIT = 10
if __name__ == "__main__":
lexicon = load_lexicon("./words_alpha.txt")
model = train_model("./train/in.tsv.xz", "./train/expected.tsv", lexicon)
for i, (directory, lines_no) in enumerate(zip(["dev-0", "test-A"], (10519, 7414))):
predict_gaps(
model,
f"./{directory}/in.tsv.xz",
f"./{directory}/out.tsv",
lexicon,
lines_no,
WORDS_LIMIT,
)