Bigger vocab
This commit is contained in:
parent
9c381a9eea
commit
e581a3667e
21038
dev-0/out.tsv
21038
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
4
run.py
4
run.py
@ -47,7 +47,7 @@ def get_words_lines_from_file(file_path):
|
||||
# break
|
||||
|
||||
|
||||
vocab_size = 30000
|
||||
vocab_size = 40000
|
||||
|
||||
vocab = build_vocab_from_iterator(
|
||||
get_words_lines_from_file('train/in.tsv.xz'),
|
||||
@ -97,7 +97,7 @@ class Bigrams(IterableDataset):
|
||||
|
||||
|
||||
def train():
|
||||
batch_size = 15000
|
||||
batch_size = 10000
|
||||
|
||||
train_dataset = Bigrams('train/in.tsv.xz', vocab_size)
|
||||
|
||||
|
14828
test-A/out.tsv
14828
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user