epochs: 3 embed_size: 256 device: cuda vocab_size: 35000 batch_size: 3200 learning_rate: 0.001 k: 15 wildcard_minweight: 0.01 beta: 0.85