Bigger vocab

This commit is contained in:
Jan Nowak 2022-05-07 16:39:12 +02:00
parent 9c381a9eea
commit e581a3667e
3 changed files with 17935 additions and 17935 deletions

File diff suppressed because it is too large Load Diff

4
run.py
View File

@ -47,7 +47,7 @@ def get_words_lines_from_file(file_path):
# break
vocab_size = 30000
vocab_size = 40000
vocab = build_vocab_from_iterator(
get_words_lines_from_file('train/in.tsv.xz'),
@ -97,7 +97,7 @@ class Bigrams(IterableDataset):
def train():
batch_size = 15000
batch_size = 10000
train_dataset = Bigrams('train/in.tsv.xz', vocab_size)

File diff suppressed because it is too large Load Diff