s434704 - trigram predictions word

This commit is contained in:
Wojciech Jarmosz 2022-04-03 21:37:57 +02:00
parent c50693a79b
commit b8ad7dd579
3 changed files with 17455 additions and 17455 deletions

File diff suppressed because it is too large Load Diff

2
run.py
View File

@ -13,7 +13,7 @@ class WordGapPrediction:
def read_train_data(self, file):
data = pd.read_csv(file, sep="\t", error_bad_lines=False, index_col=0, header=None)
for index, row in data[:90000].iterrows():
for index, row in data[:140000].iterrows():
text = str(row[6]) + ' ' + str(row[7])
tokens = self.tokenizer.tokenize(text)
for w1, w2, w3 in trigrams(tokens, pad_right=True, pad_left=True):

File diff suppressed because it is too large Load Diff