test version
This commit is contained in:
parent
f3c9a87cdf
commit
774b5d8d4a
10519
dev-0/in.tsv
Normal file
10519
dev-0/in.tsv
Normal file
File diff suppressed because it is too large
Load Diff
10519
dev-0/out.tsv
Normal file
10519
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
28
model.py
28
model.py
@ -0,0 +1,28 @@
|
||||
import lzma
|
||||
from nltk.tokenize import word_tokenize
|
||||
|
||||
def predict(word_before, word_after):
|
||||
return 'the'
|
||||
|
||||
# with open('./dev-0/in.tsv', 'w', encoding='utf-8') as file:
|
||||
# text = lzma.open('./dev-0/in.tsv.xz').read().decode('utf-8')
|
||||
# file.write(text)
|
||||
|
||||
# with open('./dev-0/in.tsv', encoding='utf-8') as file_in, open('./dev-0/expected.tsv', encoding='utf-8') as file_expected, open('./dev-0/out.tsv', 'w', encoding='utf-8') as file_out:
|
||||
# for line_in, line_expected in zip(file_in, file_expected):
|
||||
# _, _, _, _, _, _, before, after = line_in.split('\t')
|
||||
# before = word_tokenize(before.replace('\\n', '\n'))
|
||||
# after = word_tokenize(after.replace('\\n', '\n'))
|
||||
# file_out.write(predict(before[-1], after[0]) + '\n')
|
||||
|
||||
|
||||
with open('./test-A/in.tsv', 'w', encoding='utf-8') as file:
|
||||
text = lzma.open('./test-A/in.tsv.xz').read().decode('utf-8')
|
||||
file.write(text)
|
||||
|
||||
with open('./test-A/in.tsv', encoding='utf-8') as file_in, open('./test-A/out.tsv', 'w', encoding='utf-8') as file_out:
|
||||
for line_in in file_in:
|
||||
_, _, _, _, _, _, before, after = line_in.split('\t')
|
||||
before = word_tokenize(before.replace('\\n', '\n'))
|
||||
after = word_tokenize(after.replace('\\n', '\n'))
|
||||
file_out.write(predict(before[-1], after[0]) + '\n')
|
7414
test-A/in.tsv
Normal file
7414
test-A/in.tsv
Normal file
File diff suppressed because it is too large
Load Diff
7414
test-A/out.tsv
Normal file
7414
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user