s434704 - trigram predictions word
This commit is contained in:
parent
c50693a79b
commit
b8ad7dd579
20482
dev-0/out.tsv
20482
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
2
run.py
2
run.py
@ -13,7 +13,7 @@ class WordGapPrediction:
|
||||
|
||||
def read_train_data(self, file):
|
||||
data = pd.read_csv(file, sep="\t", error_bad_lines=False, index_col=0, header=None)
|
||||
for index, row in data[:90000].iterrows():
|
||||
for index, row in data[:140000].iterrows():
|
||||
text = str(row[6]) + ' ' + str(row[7])
|
||||
tokens = self.tokenizer.tokenize(text)
|
||||
for w1, w2, w3 in trigrams(tokens, pad_right=True, pad_left=True):
|
||||
|
14426
test-A/out.tsv
14426
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user