test version

This commit is contained in:
pietrzakkuba 2022-03-26 19:08:19 +01:00
parent f3c9a87cdf
commit 774b5d8d4a
5 changed files with 35894 additions and 0 deletions

10519
dev-0/in.tsv Normal file

File diff suppressed because it is too large Load Diff

10519
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
import lzma
from nltk.tokenize import word_tokenize
def predict(word_before, word_after):
return 'the'
# with open('./dev-0/in.tsv', 'w', encoding='utf-8') as file:
# text = lzma.open('./dev-0/in.tsv.xz').read().decode('utf-8')
# file.write(text)
# with open('./dev-0/in.tsv', encoding='utf-8') as file_in, open('./dev-0/expected.tsv', encoding='utf-8') as file_expected, open('./dev-0/out.tsv', 'w', encoding='utf-8') as file_out:
# for line_in, line_expected in zip(file_in, file_expected):
# _, _, _, _, _, _, before, after = line_in.split('\t')
# before = word_tokenize(before.replace('\\n', '\n'))
# after = word_tokenize(after.replace('\\n', '\n'))
# file_out.write(predict(before[-1], after[0]) + '\n')
with open('./test-A/in.tsv', 'w', encoding='utf-8') as file:
text = lzma.open('./test-A/in.tsv.xz').read().decode('utf-8')
file.write(text)
with open('./test-A/in.tsv', encoding='utf-8') as file_in, open('./test-A/out.tsv', 'w', encoding='utf-8') as file_out:
for line_in in file_in:
_, _, _, _, _, _, before, after = line_in.split('\t')
before = word_tokenize(before.replace('\\n', '\n'))
after = word_tokenize(after.replace('\\n', '\n'))
file_out.write(predict(before[-1], after[0]) + '\n')

7414
test-A/in.tsv Normal file

File diff suppressed because it is too large Load Diff

7414
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff