challenging-america-word-ga.../roberta_large_finetune_existing/train.logs
2021-07-10 09:05:38 +02:00

18204 lines
4.0 MiB

2021-06-18 18:38:38 | INFO | fairseq_cli.train | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 1, 'log_format': 'simple', 'log_file': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'pytorch_ddp', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_algorithm': 'LocalSGD', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False}, 'dataset': {'_name': None, 'num_workers': 1, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': None, 'batch_size': 1, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 1, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 12500000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': False, 'update_freq': [128], 'lr': [0.0001], 'stop_min_lr': -1.0, 'use_bmuf': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'roberta.large/model.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 0, 'keep_interval_updates': -1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': False, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': Namespace(no_progress_bar=False, log_interval=1, log_format='simple', log_file=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=True, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='masked_lm', tokenizer=None, bpe=None, optimizer='adam', lr_scheduler='polynomial_decay', simul_type=None, scoring='bleu', task='masked_lm', num_workers=1, skip_invalid_size_inputs_valid_test=False, max_tokens=None, batch_size=1, required_batch_size_multiple=8, required_seq_len_multiple=1, dataset_impl=None, data_buffer_size=10, train_subset='train', valid_subset='valid', combine_valid_subsets=None, ignore_unused_valid_subsets=False, validate_interval=1, validate_interval_updates=0, validate_after_updates=0, fixed_validation_seed=None, disable_validation=False, max_tokens_valid=None, batch_size_valid=1, max_valid_steps=None, curriculum=0, gen_subset='test', num_shards=1, shard_id=0, distributed_world_size=1, distributed_num_procs=1, distributed_rank=0, distributed_backend='nccl', distributed_init_method=None, distributed_port=-1, device_id=0, distributed_no_spawn=False, ddp_backend='pytorch_ddp', ddp_comm_hook='none', bucket_cap_mb=25, fix_batches_to_gpus=False, find_unused_parameters=False, fast_stat_sync=False, heartbeat_timeout=-1, broadcast_buffers=False, slowmo_momentum=None, slowmo_algorithm='LocalSGD', localsgd_frequency=3, nprocs_per_node=1, pipeline_model_parallel=False, pipeline_balance=None, pipeline_devices=None, pipeline_chunks=0, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_checkpoint='never', zero_sharding='none', no_reshard_after_forward=False, fp32_reduce_scatter=False, cpu_offload=False, use_sharded_state=False, arch='roberta_large', max_epoch=0, max_update=12500000, stop_time_hours=0, clip_norm=0.0, sentence_avg=False, update_freq=[128], lr=[0.0001], stop_min_lr=-1.0, use_bmuf=False, save_dir='checkpoints', restore_file='roberta.large/model.pt', finetune_from_model=None, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, optimizer_overrides='{}', save_interval=1, save_interval_updates=0, keep_interval_updates=-1, keep_interval_updates_pattern=-1, keep_last_epochs=-1, keep_best_checkpoints=-1, no_save=False, no_epoch_checkpoints=False, no_last_checkpoints=False, no_save_optimizer_state=False, best_checkpoint_metric='loss', maximize_best_checkpoint_metric=False, patience=-1, checkpoint_suffix='', checkpoint_shard_count=1, load_checkpoint_on_all_dp_ranks=False, write_checkpoints_asynchronously=False, encoder_layerdrop=0, encoder_layers_to_keep=None, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, spectral_norm_classification_head=False, min_params_to_wrap=100000000, data='data-bin/wikitext-103', sample_break_mode='complete', tokens_per_sample=512, mask_prob=0.15, leave_unmasked_prob=0.1, random_token_prob=0.1, freq_weighted_replacement=False, mask_whole_words=False, mask_multiple_length=1, mask_stdev=0.0, shorten_method='none', shorten_data_split_list='', adam_betas='(0.9,0.98)', adam_eps=1e-06, weight_decay=0.01, use_old_adam=False, warmup_updates=1000, force_anneal=None, end_learning_rate=0.0, power=1.0, total_num_update='12500000', pad=1, eos=2, unk=3, dropout=0.1, attention_dropout=0.1, no_seed_provided=False, encoder_layers=24, encoder_embed_dim=1024, encoder_ffn_embed_dim=4096, encoder_attention_heads=16, activation_dropout=0.0, pooler_dropout=0.0, max_source_positions=512, no_token_positional_embeddings=False, encoder_learned_pos=True, layernorm_embedding=True, no_scale_embedding=True, activation_fn='gelu', encoder_normalize_before=False, pooler_activation_fn='tanh', untie_weights_roberta=False, adaptive_input=False, _name='roberta_large'), 'task': Namespace(no_progress_bar=False, log_interval=1, log_format='simple', log_file=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=True, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='masked_lm', tokenizer=None, bpe=None, optimizer='adam', lr_scheduler='polynomial_decay', simul_type=None, scoring='bleu', task='masked_lm', num_workers=1, skip_invalid_size_inputs_valid_test=False, max_tokens=None, batch_size=1, required_batch_size_multiple=8, required_seq_len_multiple=1, dataset_impl=None, data_buffer_size=10, train_subset='train', valid_subset='valid', combine_valid_subsets=None, ignore_unused_valid_subsets=False, validate_interval=1, validate_interval_updates=0, validate_after_updates=0, fixed_validation_seed=None, disable_validation=False, max_tokens_valid=None, batch_size_valid=1, max_valid_steps=None, curriculum=0, gen_subset='test', num_shards=1, shard_id=0, distributed_world_size=1, distributed_num_procs=1, distributed_rank=0, distributed_backend='nccl', distributed_init_method=None, distributed_port=-1, device_id=0, distributed_no_spawn=False, ddp_backend='pytorch_ddp', ddp_comm_hook='none', bucket_cap_mb=25, fix_batches_to_gpus=False, find_unused_parameters=False, fast_stat_sync=False, heartbeat_timeout=-1, broadcast_buffers=False, slowmo_momentum=None, slowmo_algorithm='LocalSGD', localsgd_frequency=3, nprocs_per_node=1, pipeline_model_parallel=False, pipeline_balance=None, pipeline_devices=None, pipeline_chunks=0, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_checkpoint='never', zero_sharding='none', no_reshard_after_forward=False, fp32_reduce_scatter=False, cpu_offload=False, use_sharded_state=False, arch='roberta_large', max_epoch=0, max_update=12500000, stop_time_hours=0, clip_norm=0.0, sentence_avg=False, update_freq=[128], lr=[0.0001], stop_min_lr=-1.0, use_bmuf=False, save_dir='checkpoints', restore_file='roberta.large/model.pt', finetune_from_model=None, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, optimizer_overrides='{}', save_interval=1, save_interval_updates=0, keep_interval_updates=-1, keep_interval_updates_pattern=-1, keep_last_epochs=-1, keep_best_checkpoints=-1, no_save=False, no_epoch_checkpoints=False, no_last_checkpoints=False, no_save_optimizer_state=False, best_checkpoint_metric='loss', maximize_best_checkpoint_metric=False, patience=-1, checkpoint_suffix='', checkpoint_shard_count=1, load_checkpoint_on_all_dp_ranks=False, write_checkpoints_asynchronously=False, encoder_layerdrop=0, encoder_layers_to_keep=None, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, spectral_norm_classification_head=False, min_params_to_wrap=100000000, data='data-bin/wikitext-103', sample_break_mode='complete', tokens_per_sample=512, mask_prob=0.15, leave_unmasked_prob=0.1, random_token_prob=0.1, freq_weighted_replacement=False, mask_whole_words=False, mask_multiple_length=1, mask_stdev=0.0, shorten_method='none', shorten_data_split_list='', adam_betas='(0.9,0.98)', adam_eps=1e-06, weight_decay=0.01, use_old_adam=False, warmup_updates=1000, force_anneal=None, end_learning_rate=0.0, power=1.0, total_num_update='12500000', pad=1, eos=2, unk=3, dropout=0.1, attention_dropout=0.1, no_seed_provided=False, encoder_layers=24, encoder_embed_dim=1024, encoder_ffn_embed_dim=4096, encoder_attention_heads=16, activation_dropout=0.0, pooler_dropout=0.0, max_source_positions=512, no_token_positional_embeddings=False, encoder_learned_pos=True, layernorm_embedding=True, no_scale_embedding=True, activation_fn='gelu', encoder_normalize_before=False, pooler_activation_fn='tanh', untie_weights_roberta=False, adaptive_input=False, _name='masked_lm'), 'criterion': Namespace(no_progress_bar=False, log_interval=1, log_format='simple', log_file=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=True, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='masked_lm', tokenizer=None, bpe=None, optimizer='adam', lr_scheduler='polynomial_decay', simul_type=None, scoring='bleu', task='masked_lm', num_workers=1, skip_invalid_size_inputs_valid_test=False, max_tokens=None, batch_size=1, required_batch_size_multiple=8, required_seq_len_multiple=1, dataset_impl=None, data_buffer_size=10, train_subset='train', valid_subset='valid', combine_valid_subsets=None, ignore_unused_valid_subsets=False, validate_interval=1, validate_interval_updates=0, validate_after_updates=0, fixed_validation_seed=None, disable_validation=False, max_tokens_valid=None, batch_size_valid=1, max_valid_steps=None, curriculum=0, gen_subset='test', num_shards=1, shard_id=0, distributed_world_size=1, distributed_num_procs=1, distributed_rank=0, distributed_backend='nccl', distributed_init_method=None, distributed_port=-1, device_id=0, distributed_no_spawn=False, ddp_backend='pytorch_ddp', ddp_comm_hook='none', bucket_cap_mb=25, fix_batches_to_gpus=False, find_unused_parameters=False, fast_stat_sync=False, heartbeat_timeout=-1, broadcast_buffers=False, slowmo_momentum=None, slowmo_algorithm='LocalSGD', localsgd_frequency=3, nprocs_per_node=1, pipeline_model_parallel=False, pipeline_balance=None, pipeline_devices=None, pipeline_chunks=0, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_checkpoint='never', zero_sharding='none', no_reshard_after_forward=False, fp32_reduce_scatter=False, cpu_offload=False, use_sharded_state=False, arch='roberta_large', max_epoch=0, max_update=12500000, stop_time_hours=0, clip_norm=0.0, sentence_avg=False, update_freq=[128], lr=[0.0001], stop_min_lr=-1.0, use_bmuf=False, save_dir='checkpoints', restore_file='roberta.large/model.pt', finetune_from_model=None, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, optimizer_overrides='{}', save_interval=1, save_interval_updates=0, keep_interval_updates=-1, keep_interval_updates_pattern=-1, keep_last_epochs=-1, keep_best_checkpoints=-1, no_save=False, no_epoch_checkpoints=False, no_last_checkpoints=False, no_save_optimizer_state=False, best_checkpoint_metric='loss', maximize_best_checkpoint_metric=False, patience=-1, checkpoint_suffix='', checkpoint_shard_count=1, load_checkpoint_on_all_dp_ranks=False, write_checkpoints_asynchronously=False, encoder_layerdrop=0, encoder_layers_to_keep=None, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, spectral_norm_classification_head=False, min_params_to_wrap=100000000, data='data-bin/wikitext-103', sample_break_mode='complete', tokens_per_sample=512, mask_prob=0.15, leave_unmasked_prob=0.1, random_token_prob=0.1, freq_weighted_replacement=False, mask_whole_words=False, mask_multiple_length=1, mask_stdev=0.0, shorten_method='none', shorten_data_split_list='', adam_betas='(0.9,0.98)', adam_eps=1e-06, weight_decay=0.01, use_old_adam=False, warmup_updates=1000, force_anneal=None, end_learning_rate=0.0, power=1.0, total_num_update='12500000', pad=1, eos=2, unk=3, dropout=0.1, attention_dropout=0.1, no_seed_provided=False, encoder_layers=24, encoder_embed_dim=1024, encoder_ffn_embed_dim=4096, encoder_attention_heads=16, activation_dropout=0.0, pooler_dropout=0.0, max_source_positions=512, no_token_positional_embeddings=False, encoder_learned_pos=True, layernorm_embedding=True, no_scale_embedding=True, activation_fn='gelu', encoder_normalize_before=False, pooler_activation_fn='tanh', untie_weights_roberta=False, adaptive_input=False, _name='masked_lm'), 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-06, 'weight_decay': 0.01, 'use_old_adam': False, 'tpu': False, 'lr': [0.0001]}, 'lr_scheduler': {'_name': 'polynomial_decay', 'warmup_updates': 1000, 'force_anneal': None, 'end_learning_rate': 0.0, 'power': 1.0, 'total_num_update': 12500000.0, 'lr': [0.0001]}, 'scoring': {'_name': 'bleu', 'pad': 1, 'eos': 2, 'unk': 3}, 'bpe': None, 'tokenizer': None, 'simul_type': None}
2021-06-18 18:38:38 | INFO | fairseq.tasks.masked_lm | dictionary: 50264 types
2021-06-18 18:38:44 | INFO | fairseq_cli.train | RobertaModel(
(encoder): RobertaEncoder(
(sentence_encoder): TransformerEncoder(
(dropout_module): FairseqDropout()
(embed_tokens): Embedding(50265, 1024, padding_idx=1)
(embed_positions): LearnedPositionalEmbedding(514, 1024, padding_idx=1)
(layernorm_embedding): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(layers): ModuleList(
(0): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(1): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(2): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(3): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(4): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(5): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(6): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(7): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(8): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(9): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(10): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(11): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(12): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(13): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(14): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(15): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(16): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(17): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(18): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(19): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(20): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(21): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(22): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(23): TransformerEncoderLayer(
(self_attn): MultiheadAttention(
(dropout_module): FairseqDropout()
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout_module): FairseqDropout()
(activation_dropout_module): FairseqDropout()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
(final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
(lm_head): RobertaLMHead(
(dense): Linear(in_features=1024, out_features=1024, bias=True)
(layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
(classification_heads): ModuleDict()
)
2021-06-18 18:38:44 | INFO | fairseq_cli.train | task: MaskedLMTask
2021-06-18 18:38:44 | INFO | fairseq_cli.train | model: RobertaModel
2021-06-18 18:38:44 | INFO | fairseq_cli.train | criterion: MaskedLmLoss
2021-06-18 18:38:44 | INFO | fairseq_cli.train | num. shared model params: 355,411,033 (num. trained: 355,411,033)
2021-06-18 18:38:44 | INFO | fairseq_cli.train | num. expert model params: 0 (num. trained: 0)
2021-06-18 18:38:44 | INFO | fairseq.data.data_utils | loaded 1,632,586 examples from: data-bin/wikitext-103/valid
2021-06-18 18:38:44 | INFO | fairseq.tasks.masked_lm | loaded 34843 blocks from: data-bin/wikitext-103/valid
2021-06-18 18:38:46 | INFO | fairseq.trainer | detected shared parameter: encoder.sentence_encoder.embed_tokens.weight <- encoder.lm_head.weight
2021-06-18 18:38:46 | INFO | fairseq.utils | ***********************CUDA enviroments for all 1 workers***********************
2021-06-18 18:38:46 | INFO | fairseq.utils | rank 0: capabilities = 7.5 ; total memory = 10.761 GB ; name = NVIDIA GeForce RTX 2080 Ti
2021-06-18 18:38:46 | INFO | fairseq.utils | ***********************CUDA enviroments for all 1 workers***********************
2021-06-18 18:38:46 | INFO | fairseq_cli.train | training on 1 devices (GPUs/TPUs)
2021-06-18 18:38:46 | INFO | fairseq_cli.train | max tokens per device = None and max sentences per device = 1
2021-06-18 18:38:46 | INFO | fairseq.trainer | Preparing to load checkpoint roberta.large/model.pt
2021-06-18 18:38:47 | INFO | fairseq.trainer | Loaded checkpoint roberta.large/model.pt (epoch 1 @ 0 updates)
2021-06-18 18:38:47 | INFO | fairseq.trainer | loading train data for epoch 1
2021-06-18 18:38:47 | INFO | fairseq.data.data_utils | loaded 17,956,888 examples from: data-bin/wikitext-103/train
2021-06-18 18:38:47 | INFO | fairseq.tasks.masked_lm | loaded 384203 blocks from: data-bin/wikitext-103/train
2021-06-18 18:38:47 | WARNING | fairseq.tasks.fairseq_task | 3 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[64349, 331776, 59863]
2021-06-18 18:38:48 | INFO | fairseq.trainer | begin training epoch 1
2021-06-18 18:38:48 | INFO | fairseq_cli.train | Start iterating over samples
2021-06-18 18:39:08 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0
2021-06-18 18:39:24 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0
2021-06-18 18:39:43 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0
2021-06-18 18:40:02 | INFO | train_inner | epoch 001: 4 / 3002 loss=3.569, ppl=11.87, wps=0, ups=0, wpb=64860, bsz=128, num_updates=1, lr=1e-07, gnorm=3.964, loss_scale=16, train_wall=72, gb_free=2.8, wall=77
2021-06-18 18:40:13 | INFO | train_inner | epoch 001: 5 / 3002 loss=3.558, ppl=11.77, wps=5882, ups=0.09, wpb=64767, bsz=128, num_updates=2, lr=2e-07, gnorm=3.582, loss_scale=16, train_wall=11, gb_free=2.8, wall=88
2021-06-18 18:40:24 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-18 18:40:35 | INFO | train_inner | epoch 001: 7 / 3002 loss=3.906, ppl=14.99, wps=2932.7, ups=0.05, wpb=64887, bsz=128, num_updates=3, lr=3e-07, gnorm=3.441, loss_scale=8, train_wall=21, gb_free=2.8, wall=110
2021-06-18 18:40:47 | INFO | train_inner | epoch 001: 8 / 3002 loss=3.275, ppl=9.68, wps=5846.8, ups=0.09, wpb=64787, bsz=128, num_updates=4, lr=4e-07, gnorm=3.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=121
2021-06-18 18:40:57 | INFO | train_inner | epoch 001: 9 / 3002 loss=3.637, ppl=12.44, wps=5940.2, ups=0.09, wpb=64809, bsz=128, num_updates=5, lr=5e-07, gnorm=3.445, loss_scale=8, train_wall=10, gb_free=2.8, wall=132
2021-06-18 18:41:08 | INFO | train_inner | epoch 001: 10 / 3002 loss=3.616, ppl=12.26, wps=5926.2, ups=0.09, wpb=64881, bsz=128, num_updates=6, lr=6e-07, gnorm=5.36, loss_scale=8, train_wall=11, gb_free=2.8, wall=143
2021-06-18 18:41:19 | INFO | train_inner | epoch 001: 11 / 3002 loss=3.569, ppl=11.87, wps=5947.9, ups=0.09, wpb=64948, bsz=128, num_updates=7, lr=7e-07, gnorm=3.479, loss_scale=8, train_wall=10, gb_free=2.8, wall=154
2021-06-18 18:41:30 | INFO | train_inner | epoch 001: 12 / 3002 loss=3.726, ppl=13.23, wps=5854.5, ups=0.09, wpb=64832, bsz=128, num_updates=8, lr=8e-07, gnorm=3.433, loss_scale=8, train_wall=11, gb_free=2.8, wall=165
2021-06-18 18:41:41 | INFO | train_inner | epoch 001: 13 / 3002 loss=3.573, ppl=11.9, wps=5861.8, ups=0.09, wpb=64828, bsz=128, num_updates=9, lr=9e-07, gnorm=3.643, loss_scale=8, train_wall=11, gb_free=2.8, wall=176
2021-06-18 18:41:53 | INFO | train_inner | epoch 001: 14 / 3002 loss=3.462, ppl=11.02, wps=5807.1, ups=0.09, wpb=64797, bsz=128, num_updates=10, lr=1e-06, gnorm=3.72, loss_scale=8, train_wall=11, gb_free=2.8, wall=187
2021-06-18 18:42:04 | INFO | train_inner | epoch 001: 15 / 3002 loss=3.599, ppl=12.11, wps=5857.8, ups=0.09, wpb=64869, bsz=128, num_updates=11, lr=1.1e-06, gnorm=3.424, loss_scale=8, train_wall=11, gb_free=2.8, wall=198
2021-06-18 18:42:15 | INFO | train_inner | epoch 001: 16 / 3002 loss=3.492, ppl=11.25, wps=5926.9, ups=0.09, wpb=64829, bsz=128, num_updates=12, lr=1.2e-06, gnorm=3.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=209
2021-06-18 18:42:26 | INFO | train_inner | epoch 001: 17 / 3002 loss=3.548, ppl=11.69, wps=5841.6, ups=0.09, wpb=64835, bsz=128, num_updates=13, lr=1.3e-06, gnorm=3.787, loss_scale=8, train_wall=11, gb_free=2.8, wall=220
2021-06-18 18:42:37 | INFO | train_inner | epoch 001: 18 / 3002 loss=3.546, ppl=11.68, wps=5985.2, ups=0.09, wpb=64822, bsz=128, num_updates=14, lr=1.4e-06, gnorm=3.763, loss_scale=8, train_wall=10, gb_free=2.8, wall=231
2021-06-18 18:42:48 | INFO | train_inner | epoch 001: 19 / 3002 loss=3.859, ppl=14.51, wps=5799.5, ups=0.09, wpb=64786, bsz=128, num_updates=15, lr=1.5e-06, gnorm=3.282, loss_scale=8, train_wall=11, gb_free=2.8, wall=242
2021-06-18 18:42:59 | INFO | train_inner | epoch 001: 20 / 3002 loss=3.651, ppl=12.56, wps=5880.6, ups=0.09, wpb=64800, bsz=128, num_updates=16, lr=1.6e-06, gnorm=3.145, loss_scale=8, train_wall=11, gb_free=2.8, wall=253
2021-06-18 18:43:10 | INFO | train_inner | epoch 001: 21 / 3002 loss=3.564, ppl=11.83, wps=5874.5, ups=0.09, wpb=64823, bsz=128, num_updates=17, lr=1.7e-06, gnorm=3.388, loss_scale=8, train_wall=11, gb_free=2.8, wall=264
2021-06-18 18:43:21 | INFO | train_inner | epoch 001: 22 / 3002 loss=3.633, ppl=12.41, wps=5830.1, ups=0.09, wpb=64878, bsz=128, num_updates=18, lr=1.8e-06, gnorm=3.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=275
2021-06-18 18:43:32 | INFO | train_inner | epoch 001: 23 / 3002 loss=3.678, ppl=12.8, wps=5835, ups=0.09, wpb=64879, bsz=128, num_updates=19, lr=1.9e-06, gnorm=3.333, loss_scale=8, train_wall=11, gb_free=2.8, wall=286
2021-06-18 18:43:43 | INFO | train_inner | epoch 001: 24 / 3002 loss=3.615, ppl=12.25, wps=5806.7, ups=0.09, wpb=64756, bsz=128, num_updates=20, lr=2e-06, gnorm=3.177, loss_scale=8, train_wall=11, gb_free=2.8, wall=297
2021-06-18 18:43:54 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-18 18:44:05 | INFO | train_inner | epoch 001: 26 / 3002 loss=3.55, ppl=11.71, wps=2924.9, ups=0.05, wpb=64849, bsz=128, num_updates=21, lr=2.1e-06, gnorm=3.453, loss_scale=4, train_wall=21, gb_free=2.8, wall=320
2021-06-18 18:44:16 | INFO | train_inner | epoch 001: 27 / 3002 loss=3.533, ppl=11.57, wps=5817.8, ups=0.09, wpb=64831, bsz=128, num_updates=22, lr=2.2e-06, gnorm=3.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=331
2021-06-18 18:44:27 | INFO | train_inner | epoch 001: 28 / 3002 loss=3.649, ppl=12.54, wps=5885.1, ups=0.09, wpb=64771, bsz=128, num_updates=23, lr=2.3e-06, gnorm=3.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=342
2021-06-18 18:44:38 | INFO | train_inner | epoch 001: 29 / 3002 loss=3.733, ppl=13.3, wps=5933.2, ups=0.09, wpb=64882, bsz=128, num_updates=24, lr=2.4e-06, gnorm=3.132, loss_scale=4, train_wall=10, gb_free=2.8, wall=353
2021-06-18 18:44:49 | INFO | train_inner | epoch 001: 30 / 3002 loss=3.907, ppl=15, wps=5908.7, ups=0.09, wpb=64748, bsz=128, num_updates=25, lr=2.5e-06, gnorm=3.306, loss_scale=4, train_wall=10, gb_free=2.8, wall=364
2021-06-18 18:45:00 | INFO | train_inner | epoch 001: 31 / 3002 loss=3.697, ppl=12.97, wps=5858.9, ups=0.09, wpb=64800, bsz=128, num_updates=26, lr=2.6e-06, gnorm=3.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=375
2021-06-18 18:45:11 | INFO | train_inner | epoch 001: 32 / 3002 loss=3.447, ppl=10.91, wps=5855.1, ups=0.09, wpb=64823, bsz=128, num_updates=27, lr=2.7e-06, gnorm=3.21, loss_scale=4, train_wall=11, gb_free=2.8, wall=386
2021-06-18 18:45:22 | INFO | train_inner | epoch 001: 33 / 3002 loss=3.61, ppl=12.21, wps=5905.7, ups=0.09, wpb=64738, bsz=128, num_updates=28, lr=2.8e-06, gnorm=3.213, loss_scale=4, train_wall=11, gb_free=2.8, wall=397
2021-06-18 18:45:33 | INFO | train_inner | epoch 001: 34 / 3002 loss=3.881, ppl=14.73, wps=5919.6, ups=0.09, wpb=64822, bsz=128, num_updates=29, lr=2.9e-06, gnorm=3.462, loss_scale=4, train_wall=10, gb_free=2.8, wall=408
2021-06-18 18:45:44 | INFO | train_inner | epoch 001: 35 / 3002 loss=3.607, ppl=12.18, wps=6026.8, ups=0.09, wpb=64879, bsz=128, num_updates=30, lr=3e-06, gnorm=3.014, loss_scale=4, train_wall=10, gb_free=2.8, wall=419
2021-06-18 18:45:55 | INFO | train_inner | epoch 001: 36 / 3002 loss=3.572, ppl=11.89, wps=5864, ups=0.09, wpb=64796, bsz=128, num_updates=31, lr=3.1e-06, gnorm=3.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=430
2021-06-18 18:46:06 | INFO | train_inner | epoch 001: 37 / 3002 loss=3.424, ppl=10.73, wps=5846.3, ups=0.09, wpb=64799, bsz=128, num_updates=32, lr=3.2e-06, gnorm=2.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=441
2021-06-18 18:46:17 | INFO | train_inner | epoch 001: 38 / 3002 loss=3.658, ppl=12.63, wps=5851.2, ups=0.09, wpb=64874, bsz=128, num_updates=33, lr=3.3e-06, gnorm=3.793, loss_scale=4, train_wall=11, gb_free=2.8, wall=452
2021-06-18 18:46:28 | INFO | train_inner | epoch 001: 39 / 3002 loss=3.602, ppl=12.14, wps=5884.3, ups=0.09, wpb=64825, bsz=128, num_updates=34, lr=3.4e-06, gnorm=3.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=463
2021-06-18 18:46:40 | INFO | train_inner | epoch 001: 40 / 3002 loss=3.65, ppl=12.56, wps=5818.2, ups=0.09, wpb=64809, bsz=128, num_updates=35, lr=3.5e-06, gnorm=3.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=474
2021-06-18 18:46:51 | INFO | train_inner | epoch 001: 41 / 3002 loss=3.643, ppl=12.49, wps=5911.2, ups=0.09, wpb=64797, bsz=128, num_updates=36, lr=3.6e-06, gnorm=3.168, loss_scale=4, train_wall=10, gb_free=2.8, wall=485
2021-06-18 18:47:02 | INFO | train_inner | epoch 001: 42 / 3002 loss=3.551, ppl=11.72, wps=5876.4, ups=0.09, wpb=64805, bsz=128, num_updates=37, lr=3.7e-06, gnorm=2.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=496
2021-06-18 18:47:12 | INFO | train_inner | epoch 001: 43 / 3002 loss=3.48, ppl=11.16, wps=5942.3, ups=0.09, wpb=64876, bsz=128, num_updates=38, lr=3.8e-06, gnorm=3.414, loss_scale=4, train_wall=10, gb_free=2.8, wall=507
2021-06-18 18:47:24 | INFO | train_inner | epoch 001: 44 / 3002 loss=3.493, ppl=11.26, wps=5821, ups=0.09, wpb=64663, bsz=128, num_updates=39, lr=3.9e-06, gnorm=3.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=518
2021-06-18 18:47:35 | INFO | train_inner | epoch 001: 45 / 3002 loss=3.484, ppl=11.19, wps=5874.8, ups=0.09, wpb=64839, bsz=128, num_updates=40, lr=4e-06, gnorm=3.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=529
2021-06-18 18:47:46 | INFO | train_inner | epoch 001: 46 / 3002 loss=3.58, ppl=11.96, wps=5845.6, ups=0.09, wpb=64851, bsz=128, num_updates=41, lr=4.1e-06, gnorm=3.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=540
2021-06-18 18:47:57 | INFO | train_inner | epoch 001: 47 / 3002 loss=3.605, ppl=12.17, wps=5803.7, ups=0.09, wpb=64794, bsz=128, num_updates=42, lr=4.2e-06, gnorm=3.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=551
2021-06-18 18:48:08 | INFO | train_inner | epoch 001: 48 / 3002 loss=3.779, ppl=13.72, wps=5853.9, ups=0.09, wpb=64833, bsz=128, num_updates=43, lr=4.3e-06, gnorm=3.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=562
2021-06-18 18:48:19 | INFO | train_inner | epoch 001: 49 / 3002 loss=3.782, ppl=13.76, wps=5831.2, ups=0.09, wpb=64742, bsz=128, num_updates=44, lr=4.4e-06, gnorm=5.321, loss_scale=4, train_wall=11, gb_free=2.8, wall=573
2021-06-18 18:48:30 | INFO | train_inner | epoch 001: 50 / 3002 loss=3.66, ppl=12.64, wps=5727.1, ups=0.09, wpb=64875, bsz=128, num_updates=45, lr=4.5e-06, gnorm=3.136, loss_scale=4, train_wall=11, gb_free=2.8, wall=585
2021-06-18 18:48:42 | INFO | train_inner | epoch 001: 51 / 3002 loss=3.668, ppl=12.71, wps=5742.3, ups=0.09, wpb=64719, bsz=128, num_updates=46, lr=4.6e-06, gnorm=3.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=596
2021-06-18 18:48:53 | INFO | train_inner | epoch 001: 52 / 3002 loss=3.53, ppl=11.55, wps=5880.5, ups=0.09, wpb=64846, bsz=128, num_updates=47, lr=4.7e-06, gnorm=3.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=607
2021-06-18 18:49:04 | INFO | train_inner | epoch 001: 53 / 3002 loss=3.608, ppl=12.19, wps=5858.2, ups=0.09, wpb=64883, bsz=128, num_updates=48, lr=4.8e-06, gnorm=2.834, loss_scale=4, train_wall=11, gb_free=2.8, wall=618
2021-06-18 18:49:15 | INFO | train_inner | epoch 001: 54 / 3002 loss=3.487, ppl=11.22, wps=5852.3, ups=0.09, wpb=64756, bsz=128, num_updates=49, lr=4.9e-06, gnorm=2.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=629
2021-06-18 18:49:26 | INFO | train_inner | epoch 001: 55 / 3002 loss=3.507, ppl=11.37, wps=5812.6, ups=0.09, wpb=64801, bsz=128, num_updates=50, lr=5e-06, gnorm=2.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=640
2021-06-18 18:49:37 | INFO | train_inner | epoch 001: 56 / 3002 loss=3.645, ppl=12.51, wps=5817.4, ups=0.09, wpb=64834, bsz=128, num_updates=51, lr=5.1e-06, gnorm=3.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=651
2021-06-18 18:49:48 | INFO | train_inner | epoch 001: 57 / 3002 loss=3.564, ppl=11.83, wps=5793.8, ups=0.09, wpb=64793, bsz=128, num_updates=52, lr=5.2e-06, gnorm=3.108, loss_scale=4, train_wall=11, gb_free=2.8, wall=663
2021-06-18 18:49:59 | INFO | train_inner | epoch 001: 58 / 3002 loss=3.505, ppl=11.36, wps=5914.7, ups=0.09, wpb=64779, bsz=128, num_updates=53, lr=5.3e-06, gnorm=3.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=674
2021-06-18 18:50:10 | INFO | train_inner | epoch 001: 59 / 3002 loss=3.495, ppl=11.28, wps=5829.9, ups=0.09, wpb=64947, bsz=128, num_updates=54, lr=5.4e-06, gnorm=3.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=685
2021-06-18 18:50:21 | INFO | train_inner | epoch 001: 60 / 3002 loss=3.575, ppl=11.92, wps=5852.8, ups=0.09, wpb=64851, bsz=128, num_updates=55, lr=5.5e-06, gnorm=3.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=696
2021-06-18 18:50:33 | INFO | train_inner | epoch 001: 61 / 3002 loss=3.729, ppl=13.26, wps=5843.5, ups=0.09, wpb=64876, bsz=128, num_updates=56, lr=5.6e-06, gnorm=3.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=707
2021-06-18 18:50:44 | INFO | train_inner | epoch 001: 62 / 3002 loss=3.527, ppl=11.53, wps=5894.7, ups=0.09, wpb=64912, bsz=128, num_updates=57, lr=5.7e-06, gnorm=3.38, loss_scale=4, train_wall=11, gb_free=2.8, wall=718
2021-06-18 18:50:55 | INFO | train_inner | epoch 001: 63 / 3002 loss=3.683, ppl=12.84, wps=5855.9, ups=0.09, wpb=64833, bsz=128, num_updates=58, lr=5.8e-06, gnorm=3.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=729
2021-06-18 18:51:06 | INFO | train_inner | epoch 001: 64 / 3002 loss=3.427, ppl=10.75, wps=5883.1, ups=0.09, wpb=64809, bsz=128, num_updates=59, lr=5.9e-06, gnorm=2.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=740
2021-06-18 18:51:17 | INFO | train_inner | epoch 001: 65 / 3002 loss=3.546, ppl=11.68, wps=5743.5, ups=0.09, wpb=64794, bsz=128, num_updates=60, lr=6e-06, gnorm=3.001, loss_scale=4, train_wall=11, gb_free=2.8, wall=751
2021-06-18 18:51:28 | INFO | train_inner | epoch 001: 66 / 3002 loss=3.402, ppl=10.57, wps=5846.9, ups=0.09, wpb=64814, bsz=128, num_updates=61, lr=6.1e-06, gnorm=3.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=762
2021-06-18 18:51:39 | INFO | train_inner | epoch 001: 67 / 3002 loss=3.629, ppl=12.37, wps=5871.9, ups=0.09, wpb=64886, bsz=128, num_updates=62, lr=6.2e-06, gnorm=3.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=773
2021-06-18 18:51:50 | INFO | train_inner | epoch 001: 68 / 3002 loss=3.508, ppl=11.38, wps=5799.7, ups=0.09, wpb=64813, bsz=128, num_updates=63, lr=6.3e-06, gnorm=3.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=785
2021-06-18 18:52:01 | INFO | train_inner | epoch 001: 69 / 3002 loss=3.765, ppl=13.59, wps=5947.1, ups=0.09, wpb=64780, bsz=128, num_updates=64, lr=6.4e-06, gnorm=3.147, loss_scale=4, train_wall=10, gb_free=2.8, wall=795
2021-06-18 18:52:12 | INFO | train_inner | epoch 001: 70 / 3002 loss=3.443, ppl=10.87, wps=5924.5, ups=0.09, wpb=64779, bsz=128, num_updates=65, lr=6.5e-06, gnorm=7.358, loss_scale=4, train_wall=10, gb_free=2.8, wall=806
2021-06-18 18:52:23 | INFO | train_inner | epoch 001: 71 / 3002 loss=3.686, ppl=12.87, wps=5879.2, ups=0.09, wpb=64866, bsz=128, num_updates=66, lr=6.6e-06, gnorm=2.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=817
2021-06-18 18:52:34 | INFO | train_inner | epoch 001: 72 / 3002 loss=3.743, ppl=13.39, wps=5807.3, ups=0.09, wpb=64782, bsz=128, num_updates=67, lr=6.7e-06, gnorm=4.679, loss_scale=4, train_wall=11, gb_free=2.8, wall=829
2021-06-18 18:52:45 | INFO | train_inner | epoch 001: 73 / 3002 loss=3.491, ppl=11.24, wps=5811.3, ups=0.09, wpb=64812, bsz=128, num_updates=68, lr=6.8e-06, gnorm=2.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=840
2021-06-18 18:52:56 | INFO | train_inner | epoch 001: 74 / 3002 loss=3.331, ppl=10.07, wps=5876.1, ups=0.09, wpb=64865, bsz=128, num_updates=69, lr=6.9e-06, gnorm=3.603, loss_scale=4, train_wall=11, gb_free=2.8, wall=851
2021-06-18 18:53:07 | INFO | train_inner | epoch 001: 75 / 3002 loss=3.667, ppl=12.71, wps=5988.2, ups=0.09, wpb=64837, bsz=128, num_updates=70, lr=7e-06, gnorm=3.21, loss_scale=4, train_wall=10, gb_free=2.8, wall=862
2021-06-18 18:53:18 | INFO | train_inner | epoch 001: 76 / 3002 loss=3.651, ppl=12.56, wps=6019.5, ups=0.09, wpb=64814, bsz=128, num_updates=71, lr=7.1e-06, gnorm=3.076, loss_scale=4, train_wall=10, gb_free=2.8, wall=872
2021-06-18 18:53:29 | INFO | train_inner | epoch 001: 77 / 3002 loss=3.605, ppl=12.17, wps=5801.9, ups=0.09, wpb=64806, bsz=128, num_updates=72, lr=7.2e-06, gnorm=3.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=884
2021-06-18 18:53:40 | INFO | train_inner | epoch 001: 78 / 3002 loss=3.463, ppl=11.02, wps=5917.9, ups=0.09, wpb=64821, bsz=128, num_updates=73, lr=7.3e-06, gnorm=3.143, loss_scale=4, train_wall=10, gb_free=2.8, wall=895
2021-06-18 18:53:51 | INFO | train_inner | epoch 001: 79 / 3002 loss=3.452, ppl=10.94, wps=5983.2, ups=0.09, wpb=64793, bsz=128, num_updates=74, lr=7.4e-06, gnorm=3.109, loss_scale=4, train_wall=10, gb_free=2.8, wall=905
2021-06-18 18:54:02 | INFO | train_inner | epoch 001: 80 / 3002 loss=3.514, ppl=11.43, wps=5844.3, ups=0.09, wpb=64847, bsz=128, num_updates=75, lr=7.5e-06, gnorm=3.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=916
2021-06-18 18:54:13 | INFO | train_inner | epoch 001: 81 / 3002 loss=3.581, ppl=11.97, wps=5896.9, ups=0.09, wpb=64862, bsz=128, num_updates=76, lr=7.6e-06, gnorm=2.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=927
2021-06-18 18:54:24 | INFO | train_inner | epoch 001: 82 / 3002 loss=3.385, ppl=10.44, wps=5886, ups=0.09, wpb=64814, bsz=128, num_updates=77, lr=7.7e-06, gnorm=2.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=938
2021-06-18 18:54:35 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-18 18:54:46 | INFO | train_inner | epoch 001: 84 / 3002 loss=3.588, ppl=12.02, wps=2943.6, ups=0.05, wpb=64827, bsz=128, num_updates=78, lr=7.8e-06, gnorm=3.169, loss_scale=2, train_wall=21, gb_free=2.8, wall=960
2021-06-18 18:54:57 | INFO | train_inner | epoch 001: 85 / 3002 loss=3.544, ppl=11.66, wps=5882, ups=0.09, wpb=64908, bsz=128, num_updates=79, lr=7.9e-06, gnorm=3.094, loss_scale=2, train_wall=11, gb_free=2.8, wall=972
2021-06-18 18:55:08 | INFO | train_inner | epoch 001: 86 / 3002 loss=3.604, ppl=12.16, wps=5851.6, ups=0.09, wpb=64866, bsz=128, num_updates=80, lr=8e-06, gnorm=3.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=983
2021-06-18 18:55:19 | INFO | train_inner | epoch 001: 87 / 3002 loss=3.467, ppl=11.06, wps=5878.5, ups=0.09, wpb=64863, bsz=128, num_updates=81, lr=8.1e-06, gnorm=16.576, loss_scale=2, train_wall=11, gb_free=2.8, wall=994
2021-06-18 18:55:31 | INFO | train_inner | epoch 001: 88 / 3002 loss=3.567, ppl=11.85, wps=5734.4, ups=0.09, wpb=64866, bsz=128, num_updates=82, lr=8.2e-06, gnorm=2.973, loss_scale=2, train_wall=11, gb_free=2.8, wall=1005
2021-06-18 18:55:42 | INFO | train_inner | epoch 001: 89 / 3002 loss=3.566, ppl=11.84, wps=5799.7, ups=0.09, wpb=64781, bsz=128, num_updates=83, lr=8.3e-06, gnorm=3.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=1016
2021-06-18 18:55:53 | INFO | train_inner | epoch 001: 90 / 3002 loss=3.55, ppl=11.71, wps=5931.1, ups=0.09, wpb=64837, bsz=128, num_updates=84, lr=8.4e-06, gnorm=2.954, loss_scale=2, train_wall=10, gb_free=2.8, wall=1027
2021-06-18 18:56:04 | INFO | train_inner | epoch 001: 91 / 3002 loss=3.445, ppl=10.89, wps=5843, ups=0.09, wpb=64800, bsz=128, num_updates=85, lr=8.5e-06, gnorm=3.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=1038
2021-06-18 18:56:15 | INFO | train_inner | epoch 001: 92 / 3002 loss=3.574, ppl=11.91, wps=5823.7, ups=0.09, wpb=64844, bsz=128, num_updates=86, lr=8.6e-06, gnorm=6.47, loss_scale=2, train_wall=11, gb_free=2.8, wall=1049
2021-06-18 18:56:26 | INFO | train_inner | epoch 001: 93 / 3002 loss=3.539, ppl=11.62, wps=5845.5, ups=0.09, wpb=64863, bsz=128, num_updates=87, lr=8.7e-06, gnorm=7.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=1060
2021-06-18 18:56:37 | INFO | train_inner | epoch 001: 94 / 3002 loss=3.604, ppl=12.16, wps=5870.7, ups=0.09, wpb=64844, bsz=128, num_updates=88, lr=8.8e-06, gnorm=3.057, loss_scale=2, train_wall=11, gb_free=2.8, wall=1071
2021-06-18 18:56:48 | INFO | train_inner | epoch 001: 95 / 3002 loss=3.525, ppl=11.51, wps=5776, ups=0.09, wpb=64846, bsz=128, num_updates=89, lr=8.9e-06, gnorm=2.861, loss_scale=2, train_wall=11, gb_free=2.8, wall=1083
2021-06-18 18:56:59 | INFO | train_inner | epoch 001: 96 / 3002 loss=3.582, ppl=11.98, wps=5887.9, ups=0.09, wpb=64835, bsz=128, num_updates=90, lr=9e-06, gnorm=3.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=1094
2021-06-18 18:57:10 | INFO | train_inner | epoch 001: 97 / 3002 loss=3.531, ppl=11.56, wps=5943.5, ups=0.09, wpb=64842, bsz=128, num_updates=91, lr=9.1e-06, gnorm=3.046, loss_scale=2, train_wall=10, gb_free=2.8, wall=1105
2021-06-18 18:57:22 | INFO | train_inner | epoch 001: 98 / 3002 loss=3.597, ppl=12.1, wps=5745.5, ups=0.09, wpb=64815, bsz=128, num_updates=92, lr=9.2e-06, gnorm=3.365, loss_scale=2, train_wall=11, gb_free=2.8, wall=1116
2021-06-18 18:57:33 | INFO | train_inner | epoch 001: 99 / 3002 loss=3.484, ppl=11.19, wps=5871, ups=0.09, wpb=64838, bsz=128, num_updates=93, lr=9.3e-06, gnorm=3.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=1127
2021-06-18 18:57:44 | INFO | train_inner | epoch 001: 100 / 3002 loss=3.581, ppl=11.96, wps=5910.5, ups=0.09, wpb=64829, bsz=128, num_updates=94, lr=9.4e-06, gnorm=23.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=1138
2021-06-18 18:57:54 | INFO | train_inner | epoch 001: 101 / 3002 loss=3.488, ppl=11.22, wps=5916.8, ups=0.09, wpb=64848, bsz=128, num_updates=95, lr=9.5e-06, gnorm=2.964, loss_scale=2, train_wall=11, gb_free=2.8, wall=1149
2021-06-18 18:58:06 | INFO | train_inner | epoch 001: 102 / 3002 loss=3.486, ppl=11.21, wps=5767.9, ups=0.09, wpb=64885, bsz=128, num_updates=96, lr=9.6e-06, gnorm=2.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=1160
2021-06-18 18:58:17 | INFO | train_inner | epoch 001: 103 / 3002 loss=3.417, ppl=10.68, wps=5783.4, ups=0.09, wpb=64753, bsz=128, num_updates=97, lr=9.7e-06, gnorm=3.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=1171
2021-06-18 18:58:28 | INFO | train_inner | epoch 001: 104 / 3002 loss=3.475, ppl=11.12, wps=5785.9, ups=0.09, wpb=64789, bsz=128, num_updates=98, lr=9.8e-06, gnorm=3.417, loss_scale=2, train_wall=11, gb_free=2.8, wall=1182
2021-06-18 18:58:39 | INFO | train_inner | epoch 001: 105 / 3002 loss=3.638, ppl=12.45, wps=5872.3, ups=0.09, wpb=64763, bsz=128, num_updates=99, lr=9.9e-06, gnorm=3.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=1193
2021-06-18 18:58:50 | INFO | train_inner | epoch 001: 106 / 3002 loss=3.477, ppl=11.13, wps=5922.1, ups=0.09, wpb=64871, bsz=128, num_updates=100, lr=1e-05, gnorm=2.973, loss_scale=2, train_wall=11, gb_free=2.8, wall=1204
2021-06-18 18:59:01 | INFO | train_inner | epoch 001: 107 / 3002 loss=3.347, ppl=10.18, wps=5863.7, ups=0.09, wpb=64814, bsz=128, num_updates=101, lr=1.01e-05, gnorm=3.047, loss_scale=2, train_wall=11, gb_free=2.8, wall=1215
2021-06-18 18:59:12 | INFO | train_inner | epoch 001: 108 / 3002 loss=3.382, ppl=10.42, wps=5945.6, ups=0.09, wpb=64883, bsz=128, num_updates=102, lr=1.02e-05, gnorm=3.033, loss_scale=2, train_wall=10, gb_free=2.8, wall=1226
2021-06-18 18:59:23 | INFO | train_inner | epoch 001: 109 / 3002 loss=3.581, ppl=11.96, wps=5840.4, ups=0.09, wpb=64847, bsz=128, num_updates=103, lr=1.03e-05, gnorm=2.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=1238
2021-06-18 18:59:34 | INFO | train_inner | epoch 001: 110 / 3002 loss=3.603, ppl=12.15, wps=5843.3, ups=0.09, wpb=64799, bsz=128, num_updates=104, lr=1.04e-05, gnorm=3.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=1249
2021-06-18 18:59:45 | INFO | train_inner | epoch 001: 111 / 3002 loss=3.383, ppl=10.43, wps=5840.8, ups=0.09, wpb=64792, bsz=128, num_updates=105, lr=1.05e-05, gnorm=3.087, loss_scale=2, train_wall=11, gb_free=2.8, wall=1260
2021-06-18 18:59:56 | INFO | train_inner | epoch 001: 112 / 3002 loss=3.447, ppl=10.9, wps=5847.8, ups=0.09, wpb=64856, bsz=128, num_updates=106, lr=1.06e-05, gnorm=2.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=1271
2021-06-18 19:00:08 | INFO | train_inner | epoch 001: 113 / 3002 loss=3.45, ppl=10.93, wps=5762.9, ups=0.09, wpb=64824, bsz=128, num_updates=107, lr=1.07e-05, gnorm=3.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=1282
2021-06-18 19:00:19 | INFO | train_inner | epoch 001: 114 / 3002 loss=3.716, ppl=13.14, wps=5829.3, ups=0.09, wpb=64877, bsz=128, num_updates=108, lr=1.08e-05, gnorm=2.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=1293
2021-06-18 19:00:30 | INFO | train_inner | epoch 001: 115 / 3002 loss=3.588, ppl=12.02, wps=5979.2, ups=0.09, wpb=64730, bsz=128, num_updates=109, lr=1.09e-05, gnorm=3.095, loss_scale=2, train_wall=10, gb_free=2.8, wall=1304
2021-06-18 19:00:41 | INFO | train_inner | epoch 001: 116 / 3002 loss=3.468, ppl=11.07, wps=5803.5, ups=0.09, wpb=64820, bsz=128, num_updates=110, lr=1.1e-05, gnorm=3.117, loss_scale=2, train_wall=11, gb_free=2.8, wall=1315
2021-06-18 19:00:52 | INFO | train_inner | epoch 001: 117 / 3002 loss=3.584, ppl=11.99, wps=5779.5, ups=0.09, wpb=64792, bsz=128, num_updates=111, lr=1.11e-05, gnorm=3.027, loss_scale=2, train_wall=11, gb_free=2.8, wall=1326
2021-06-18 19:01:03 | INFO | train_inner | epoch 001: 118 / 3002 loss=3.58, ppl=11.96, wps=5717.5, ups=0.09, wpb=64824, bsz=128, num_updates=112, lr=1.12e-05, gnorm=3.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=1338
2021-06-18 19:01:14 | INFO | train_inner | epoch 001: 119 / 3002 loss=3.382, ppl=10.42, wps=5844.6, ups=0.09, wpb=64822, bsz=128, num_updates=113, lr=1.13e-05, gnorm=3.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=1349
2021-06-18 19:01:25 | INFO | train_inner | epoch 001: 120 / 3002 loss=3.632, ppl=12.4, wps=5913.8, ups=0.09, wpb=64801, bsz=128, num_updates=114, lr=1.14e-05, gnorm=3.396, loss_scale=2, train_wall=11, gb_free=2.8, wall=1360
2021-06-18 19:01:37 | INFO | train_inner | epoch 001: 121 / 3002 loss=3.39, ppl=10.49, wps=5838.9, ups=0.09, wpb=64857, bsz=128, num_updates=115, lr=1.15e-05, gnorm=3.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=1371
2021-06-18 19:01:48 | INFO | train_inner | epoch 001: 122 / 3002 loss=3.321, ppl=9.99, wps=5886.8, ups=0.09, wpb=64862, bsz=128, num_updates=116, lr=1.16e-05, gnorm=3.513, loss_scale=2, train_wall=11, gb_free=2.8, wall=1382
2021-06-18 19:01:59 | INFO | train_inner | epoch 001: 123 / 3002 loss=3.45, ppl=10.93, wps=5881.6, ups=0.09, wpb=64896, bsz=128, num_updates=117, lr=1.17e-05, gnorm=2.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=1393
2021-06-18 19:02:10 | INFO | train_inner | epoch 001: 124 / 3002 loss=3.449, ppl=10.92, wps=5923.3, ups=0.09, wpb=64871, bsz=128, num_updates=118, lr=1.18e-05, gnorm=2.902, loss_scale=2, train_wall=10, gb_free=2.8, wall=1404
2021-06-18 19:02:21 | INFO | train_inner | epoch 001: 125 / 3002 loss=3.623, ppl=12.32, wps=5829, ups=0.09, wpb=64841, bsz=128, num_updates=119, lr=1.19e-05, gnorm=3.024, loss_scale=2, train_wall=11, gb_free=2.8, wall=1415
2021-06-18 19:02:32 | INFO | train_inner | epoch 001: 126 / 3002 loss=3.378, ppl=10.4, wps=5930.4, ups=0.09, wpb=64879, bsz=128, num_updates=120, lr=1.2e-05, gnorm=7.351, loss_scale=2, train_wall=10, gb_free=2.8, wall=1426
2021-06-18 19:02:43 | INFO | train_inner | epoch 001: 127 / 3002 loss=3.348, ppl=10.18, wps=5849.2, ups=0.09, wpb=64822, bsz=128, num_updates=121, lr=1.21e-05, gnorm=2.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=1437
2021-06-18 19:02:54 | INFO | train_inner | epoch 001: 128 / 3002 loss=3.554, ppl=11.75, wps=5826, ups=0.09, wpb=64753, bsz=128, num_updates=122, lr=1.22e-05, gnorm=17.434, loss_scale=2, train_wall=11, gb_free=2.8, wall=1448
2021-06-18 19:03:05 | INFO | train_inner | epoch 001: 129 / 3002 loss=3.537, ppl=11.61, wps=5921.9, ups=0.09, wpb=64842, bsz=128, num_updates=123, lr=1.23e-05, gnorm=2.883, loss_scale=2, train_wall=10, gb_free=2.8, wall=1459
2021-06-18 19:03:16 | INFO | train_inner | epoch 001: 130 / 3002 loss=3.508, ppl=11.38, wps=5876.6, ups=0.09, wpb=64847, bsz=128, num_updates=124, lr=1.24e-05, gnorm=3.024, loss_scale=2, train_wall=11, gb_free=2.8, wall=1470
2021-06-18 19:03:27 | INFO | train_inner | epoch 001: 131 / 3002 loss=3.607, ppl=12.19, wps=5944.3, ups=0.09, wpb=64818, bsz=128, num_updates=125, lr=1.25e-05, gnorm=3.138, loss_scale=2, train_wall=10, gb_free=2.8, wall=1481
2021-06-18 19:03:38 | INFO | train_inner | epoch 001: 132 / 3002 loss=3.462, ppl=11.02, wps=5803.8, ups=0.09, wpb=64804, bsz=128, num_updates=126, lr=1.26e-05, gnorm=3.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=1492
2021-06-18 19:03:49 | INFO | train_inner | epoch 001: 133 / 3002 loss=3.424, ppl=10.73, wps=5864.9, ups=0.09, wpb=64800, bsz=128, num_updates=127, lr=1.27e-05, gnorm=3.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=1503
2021-06-18 19:04:00 | INFO | train_inner | epoch 001: 134 / 3002 loss=3.443, ppl=10.88, wps=5811.6, ups=0.09, wpb=64827, bsz=128, num_updates=128, lr=1.28e-05, gnorm=2.986, loss_scale=2, train_wall=11, gb_free=2.8, wall=1514
2021-06-18 19:04:11 | INFO | train_inner | epoch 001: 135 / 3002 loss=3.407, ppl=10.61, wps=5810.3, ups=0.09, wpb=64878, bsz=128, num_updates=129, lr=1.29e-05, gnorm=2.95, loss_scale=2, train_wall=11, gb_free=2.8, wall=1526
2021-06-18 19:04:22 | INFO | train_inner | epoch 001: 136 / 3002 loss=3.467, ppl=11.06, wps=5862.2, ups=0.09, wpb=64742, bsz=128, num_updates=130, lr=1.3e-05, gnorm=4.61, loss_scale=2, train_wall=11, gb_free=2.8, wall=1537
2021-06-18 19:04:33 | INFO | train_inner | epoch 001: 137 / 3002 loss=3.528, ppl=11.53, wps=5932.7, ups=0.09, wpb=64823, bsz=128, num_updates=131, lr=1.31e-05, gnorm=3.098, loss_scale=2, train_wall=10, gb_free=2.8, wall=1548
2021-06-18 19:04:44 | INFO | train_inner | epoch 001: 138 / 3002 loss=3.404, ppl=10.59, wps=5904.9, ups=0.09, wpb=64865, bsz=128, num_updates=132, lr=1.32e-05, gnorm=2.954, loss_scale=2, train_wall=11, gb_free=2.8, wall=1559
2021-06-18 19:04:55 | INFO | train_inner | epoch 001: 139 / 3002 loss=3.435, ppl=10.82, wps=5783.1, ups=0.09, wpb=64787, bsz=128, num_updates=133, lr=1.33e-05, gnorm=3.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=1570
2021-06-18 19:05:07 | INFO | train_inner | epoch 001: 140 / 3002 loss=3.447, ppl=10.91, wps=5799.7, ups=0.09, wpb=64787, bsz=128, num_updates=134, lr=1.34e-05, gnorm=3.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=1581
2021-06-18 19:05:18 | INFO | train_inner | epoch 001: 141 / 3002 loss=3.367, ppl=10.32, wps=5858.6, ups=0.09, wpb=64773, bsz=128, num_updates=135, lr=1.35e-05, gnorm=4.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=1592
2021-06-18 19:05:29 | INFO | train_inner | epoch 001: 142 / 3002 loss=3.391, ppl=10.49, wps=5766.8, ups=0.09, wpb=64821, bsz=128, num_updates=136, lr=1.36e-05, gnorm=2.888, loss_scale=2, train_wall=11, gb_free=2.8, wall=1603
2021-06-18 19:05:40 | INFO | train_inner | epoch 001: 143 / 3002 loss=3.413, ppl=10.65, wps=5856.9, ups=0.09, wpb=64828, bsz=128, num_updates=137, lr=1.37e-05, gnorm=3.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=1614
2021-06-18 19:05:51 | INFO | train_inner | epoch 001: 144 / 3002 loss=3.643, ppl=12.5, wps=5819.1, ups=0.09, wpb=64898, bsz=128, num_updates=138, lr=1.38e-05, gnorm=3.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=1625
2021-06-18 19:06:02 | INFO | train_inner | epoch 001: 145 / 3002 loss=3.348, ppl=10.18, wps=5823.7, ups=0.09, wpb=64824, bsz=128, num_updates=139, lr=1.39e-05, gnorm=12.752, loss_scale=2, train_wall=11, gb_free=2.8, wall=1637
2021-06-18 19:06:13 | INFO | train_inner | epoch 001: 146 / 3002 loss=3.541, ppl=11.64, wps=5879.5, ups=0.09, wpb=64803, bsz=128, num_updates=140, lr=1.4e-05, gnorm=2.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=1648
2021-06-18 19:06:24 | INFO | train_inner | epoch 001: 147 / 3002 loss=3.5, ppl=11.32, wps=5820.1, ups=0.09, wpb=64846, bsz=128, num_updates=141, lr=1.41e-05, gnorm=3.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=1659
2021-06-18 19:06:35 | INFO | train_inner | epoch 001: 148 / 3002 loss=3.533, ppl=11.58, wps=5839.8, ups=0.09, wpb=64727, bsz=128, num_updates=142, lr=1.42e-05, gnorm=3.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=1670
2021-06-18 19:06:47 | INFO | train_inner | epoch 001: 149 / 3002 loss=3.292, ppl=9.8, wps=5843, ups=0.09, wpb=64761, bsz=128, num_updates=143, lr=1.43e-05, gnorm=3.472, loss_scale=2, train_wall=11, gb_free=2.8, wall=1681
2021-06-18 19:06:57 | INFO | train_inner | epoch 001: 150 / 3002 loss=3.415, ppl=10.67, wps=6021.2, ups=0.09, wpb=64921, bsz=128, num_updates=144, lr=1.44e-05, gnorm=2.809, loss_scale=2, train_wall=10, gb_free=2.8, wall=1692
2021-06-18 19:07:08 | INFO | train_inner | epoch 001: 151 / 3002 loss=3.769, ppl=13.63, wps=5812.2, ups=0.09, wpb=64866, bsz=128, num_updates=145, lr=1.45e-05, gnorm=3.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=1703
2021-06-18 19:07:20 | INFO | train_inner | epoch 001: 152 / 3002 loss=3.233, ppl=9.4, wps=5868.6, ups=0.09, wpb=64826, bsz=128, num_updates=146, lr=1.46e-05, gnorm=6.437, loss_scale=2, train_wall=11, gb_free=2.8, wall=1714
2021-06-18 19:07:31 | INFO | train_inner | epoch 001: 153 / 3002 loss=3.38, ppl=10.41, wps=5822.9, ups=0.09, wpb=64818, bsz=128, num_updates=147, lr=1.47e-05, gnorm=3.188, loss_scale=2, train_wall=11, gb_free=2.8, wall=1725
2021-06-18 19:07:42 | INFO | train_inner | epoch 001: 154 / 3002 loss=3.429, ppl=10.77, wps=5942.2, ups=0.09, wpb=64760, bsz=128, num_updates=148, lr=1.48e-05, gnorm=3.248, loss_scale=2, train_wall=10, gb_free=2.8, wall=1736
2021-06-18 19:07:52 | INFO | train_inner | epoch 001: 155 / 3002 loss=3.559, ppl=11.79, wps=6051.6, ups=0.09, wpb=64943, bsz=128, num_updates=149, lr=1.49e-05, gnorm=3.4, loss_scale=2, train_wall=10, gb_free=2.8, wall=1747
2021-06-18 19:08:03 | INFO | train_inner | epoch 001: 156 / 3002 loss=3.588, ppl=12.03, wps=5874.2, ups=0.09, wpb=64801, bsz=128, num_updates=150, lr=1.5e-05, gnorm=11.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=1758
2021-06-18 19:08:14 | INFO | train_inner | epoch 001: 157 / 3002 loss=3.446, ppl=10.9, wps=5932.6, ups=0.09, wpb=64811, bsz=128, num_updates=151, lr=1.51e-05, gnorm=2.893, loss_scale=2, train_wall=10, gb_free=2.8, wall=1769
2021-06-18 19:08:25 | INFO | train_inner | epoch 001: 158 / 3002 loss=3.567, ppl=11.85, wps=5804.1, ups=0.09, wpb=64907, bsz=128, num_updates=152, lr=1.52e-05, gnorm=3.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=1780
2021-06-18 19:08:36 | INFO | train_inner | epoch 001: 159 / 3002 loss=3.389, ppl=10.47, wps=5901.7, ups=0.09, wpb=64862, bsz=128, num_updates=153, lr=1.53e-05, gnorm=3.414, loss_scale=2, train_wall=11, gb_free=2.8, wall=1791
2021-06-18 19:08:47 | INFO | train_inner | epoch 001: 160 / 3002 loss=3.366, ppl=10.31, wps=5898.9, ups=0.09, wpb=64898, bsz=128, num_updates=154, lr=1.54e-05, gnorm=5.62, loss_scale=2, train_wall=11, gb_free=2.8, wall=1802
2021-06-18 19:08:58 | INFO | train_inner | epoch 001: 161 / 3002 loss=3.262, ppl=9.59, wps=5873, ups=0.09, wpb=64909, bsz=128, num_updates=155, lr=1.55e-05, gnorm=3.032, loss_scale=2, train_wall=11, gb_free=2.8, wall=1813
2021-06-18 19:09:10 | INFO | train_inner | epoch 001: 162 / 3002 loss=3.356, ppl=10.24, wps=5772.7, ups=0.09, wpb=64855, bsz=128, num_updates=156, lr=1.56e-05, gnorm=3.294, loss_scale=2, train_wall=11, gb_free=2.8, wall=1824
2021-06-18 19:09:21 | INFO | train_inner | epoch 001: 163 / 3002 loss=3.42, ppl=10.71, wps=5863.4, ups=0.09, wpb=64873, bsz=128, num_updates=157, lr=1.57e-05, gnorm=3.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=1835
2021-06-18 19:09:32 | INFO | train_inner | epoch 001: 164 / 3002 loss=3.529, ppl=11.54, wps=5804.4, ups=0.09, wpb=64882, bsz=128, num_updates=158, lr=1.58e-05, gnorm=2.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=1846
2021-06-18 19:09:43 | INFO | train_inner | epoch 001: 165 / 3002 loss=3.487, ppl=11.21, wps=5897.5, ups=0.09, wpb=64824, bsz=128, num_updates=159, lr=1.59e-05, gnorm=3.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=1857
2021-06-18 19:09:54 | INFO | train_inner | epoch 001: 166 / 3002 loss=3.576, ppl=11.92, wps=5844.6, ups=0.09, wpb=64881, bsz=128, num_updates=160, lr=1.6e-05, gnorm=3.118, loss_scale=2, train_wall=11, gb_free=2.8, wall=1868
2021-06-18 19:10:05 | INFO | train_inner | epoch 001: 167 / 3002 loss=3.5, ppl=11.32, wps=5842.5, ups=0.09, wpb=64863, bsz=128, num_updates=161, lr=1.61e-05, gnorm=3.346, loss_scale=2, train_wall=11, gb_free=2.8, wall=1879
2021-06-18 19:10:16 | INFO | train_inner | epoch 001: 168 / 3002 loss=3.649, ppl=12.54, wps=5869, ups=0.09, wpb=64868, bsz=128, num_updates=162, lr=1.62e-05, gnorm=3.137, loss_scale=2, train_wall=11, gb_free=2.8, wall=1891
2021-06-18 19:10:27 | INFO | train_inner | epoch 001: 169 / 3002 loss=3.38, ppl=10.41, wps=5922, ups=0.09, wpb=64767, bsz=128, num_updates=163, lr=1.63e-05, gnorm=3.086, loss_scale=2, train_wall=10, gb_free=2.8, wall=1901
2021-06-18 19:10:38 | INFO | train_inner | epoch 001: 170 / 3002 loss=3.452, ppl=10.94, wps=6013.5, ups=0.09, wpb=64911, bsz=128, num_updates=164, lr=1.64e-05, gnorm=3.112, loss_scale=2, train_wall=10, gb_free=2.8, wall=1912
2021-06-18 19:10:49 | INFO | train_inner | epoch 001: 171 / 3002 loss=3.33, ppl=10.06, wps=5962.9, ups=0.09, wpb=64879, bsz=128, num_updates=165, lr=1.65e-05, gnorm=3.192, loss_scale=2, train_wall=10, gb_free=2.8, wall=1923
2021-06-18 19:11:00 | INFO | train_inner | epoch 001: 172 / 3002 loss=3.442, ppl=10.87, wps=5955.2, ups=0.09, wpb=64804, bsz=128, num_updates=166, lr=1.66e-05, gnorm=3.35, loss_scale=2, train_wall=10, gb_free=2.8, wall=1934
2021-06-18 19:11:11 | INFO | train_inner | epoch 001: 173 / 3002 loss=3.369, ppl=10.33, wps=5796.5, ups=0.09, wpb=64871, bsz=128, num_updates=167, lr=1.67e-05, gnorm=2.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=1945
2021-06-18 19:11:22 | INFO | train_inner | epoch 001: 174 / 3002 loss=3.391, ppl=10.49, wps=5966.1, ups=0.09, wpb=64751, bsz=128, num_updates=168, lr=1.68e-05, gnorm=3.05, loss_scale=2, train_wall=10, gb_free=2.8, wall=1956
2021-06-18 19:11:33 | INFO | train_inner | epoch 001: 175 / 3002 loss=3.51, ppl=11.39, wps=5939.7, ups=0.09, wpb=64854, bsz=128, num_updates=169, lr=1.69e-05, gnorm=2.916, loss_scale=2, train_wall=10, gb_free=2.8, wall=1967
2021-06-18 19:11:44 | INFO | train_inner | epoch 001: 176 / 3002 loss=3.422, ppl=10.72, wps=5854.5, ups=0.09, wpb=64754, bsz=128, num_updates=170, lr=1.7e-05, gnorm=3.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=1978
2021-06-18 19:11:55 | INFO | train_inner | epoch 001: 177 / 3002 loss=3.559, ppl=11.78, wps=5864.7, ups=0.09, wpb=64868, bsz=128, num_updates=171, lr=1.71e-05, gnorm=2.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=1989
2021-06-18 19:12:06 | INFO | train_inner | epoch 001: 178 / 3002 loss=3.482, ppl=11.17, wps=5986.7, ups=0.09, wpb=64891, bsz=128, num_updates=172, lr=1.72e-05, gnorm=3.149, loss_scale=2, train_wall=10, gb_free=2.8, wall=2000
2021-06-18 19:12:17 | INFO | train_inner | epoch 001: 179 / 3002 loss=3.358, ppl=10.26, wps=5867.3, ups=0.09, wpb=64836, bsz=128, num_updates=173, lr=1.73e-05, gnorm=3.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=2011
2021-06-18 19:12:28 | INFO | train_inner | epoch 001: 180 / 3002 loss=3.508, ppl=11.38, wps=5832.6, ups=0.09, wpb=64859, bsz=128, num_updates=174, lr=1.74e-05, gnorm=3.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=2022
2021-06-18 19:12:39 | INFO | train_inner | epoch 001: 181 / 3002 loss=3.473, ppl=11.1, wps=5878.3, ups=0.09, wpb=64898, bsz=128, num_updates=175, lr=1.75e-05, gnorm=3.311, loss_scale=2, train_wall=11, gb_free=2.8, wall=2033
2021-06-18 19:12:50 | INFO | train_inner | epoch 001: 182 / 3002 loss=3.449, ppl=10.92, wps=5900.9, ups=0.09, wpb=64903, bsz=128, num_updates=176, lr=1.76e-05, gnorm=2.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=2044
2021-06-18 19:13:01 | INFO | train_inner | epoch 001: 183 / 3002 loss=3.744, ppl=13.4, wps=5814.4, ups=0.09, wpb=64757, bsz=128, num_updates=177, lr=1.77e-05, gnorm=3.065, loss_scale=2, train_wall=11, gb_free=2.8, wall=2055
2021-06-18 19:13:12 | INFO | train_inner | epoch 001: 184 / 3002 loss=3.471, ppl=11.09, wps=5797.8, ups=0.09, wpb=64834, bsz=128, num_updates=178, lr=1.78e-05, gnorm=3.413, loss_scale=2, train_wall=11, gb_free=2.8, wall=2066
2021-06-18 19:13:23 | INFO | train_inner | epoch 001: 185 / 3002 loss=3.397, ppl=10.53, wps=5901.7, ups=0.09, wpb=64842, bsz=128, num_updates=179, lr=1.79e-05, gnorm=3.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=2077
2021-06-18 19:13:34 | INFO | train_inner | epoch 001: 186 / 3002 loss=3.404, ppl=10.58, wps=5860.5, ups=0.09, wpb=64856, bsz=128, num_updates=180, lr=1.8e-05, gnorm=3.012, loss_scale=2, train_wall=11, gb_free=2.8, wall=2089
2021-06-18 19:13:45 | INFO | train_inner | epoch 001: 187 / 3002 loss=3.425, ppl=10.74, wps=5875.1, ups=0.09, wpb=64851, bsz=128, num_updates=181, lr=1.81e-05, gnorm=2.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=2100
2021-06-18 19:13:56 | INFO | train_inner | epoch 001: 188 / 3002 loss=3.193, ppl=9.14, wps=5898.3, ups=0.09, wpb=64812, bsz=128, num_updates=182, lr=1.82e-05, gnorm=2.886, loss_scale=2, train_wall=11, gb_free=2.8, wall=2111
2021-06-18 19:14:07 | INFO | train_inner | epoch 001: 189 / 3002 loss=3.446, ppl=10.9, wps=5896.1, ups=0.09, wpb=64795, bsz=128, num_updates=183, lr=1.83e-05, gnorm=14.423, loss_scale=2, train_wall=11, gb_free=2.8, wall=2122
2021-06-18 19:14:18 | INFO | train_inner | epoch 001: 190 / 3002 loss=3.494, ppl=11.26, wps=5846.3, ups=0.09, wpb=64770, bsz=128, num_updates=184, lr=1.84e-05, gnorm=9.766, loss_scale=2, train_wall=11, gb_free=2.8, wall=2133
2021-06-18 19:14:30 | INFO | train_inner | epoch 001: 191 / 3002 loss=3.347, ppl=10.17, wps=5743.1, ups=0.09, wpb=64918, bsz=128, num_updates=185, lr=1.85e-05, gnorm=2.956, loss_scale=2, train_wall=11, gb_free=2.8, wall=2144
2021-06-18 19:14:41 | INFO | train_inner | epoch 001: 192 / 3002 loss=3.419, ppl=10.7, wps=5870.6, ups=0.09, wpb=64742, bsz=128, num_updates=186, lr=1.86e-05, gnorm=2.935, loss_scale=2, train_wall=11, gb_free=2.8, wall=2155
2021-06-18 19:14:52 | INFO | train_inner | epoch 001: 193 / 3002 loss=3.36, ppl=10.27, wps=5802.6, ups=0.09, wpb=64865, bsz=128, num_updates=187, lr=1.87e-05, gnorm=2.964, loss_scale=2, train_wall=11, gb_free=2.8, wall=2166
2021-06-18 19:15:03 | INFO | train_inner | epoch 001: 194 / 3002 loss=3.439, ppl=10.84, wps=5970.1, ups=0.09, wpb=64815, bsz=128, num_updates=188, lr=1.88e-05, gnorm=2.915, loss_scale=2, train_wall=10, gb_free=2.8, wall=2177
2021-06-18 19:15:14 | INFO | train_inner | epoch 001: 195 / 3002 loss=3.327, ppl=10.03, wps=5790.1, ups=0.09, wpb=64769, bsz=128, num_updates=189, lr=1.89e-05, gnorm=7.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=2188
2021-06-18 19:15:25 | INFO | train_inner | epoch 001: 196 / 3002 loss=3.35, ppl=10.19, wps=5843.9, ups=0.09, wpb=64816, bsz=128, num_updates=190, lr=1.9e-05, gnorm=3.441, loss_scale=2, train_wall=11, gb_free=2.8, wall=2199
2021-06-18 19:15:36 | INFO | train_inner | epoch 001: 197 / 3002 loss=3.313, ppl=9.94, wps=5877.9, ups=0.09, wpb=64801, bsz=128, num_updates=191, lr=1.91e-05, gnorm=2.91, loss_scale=2, train_wall=11, gb_free=2.8, wall=2210
2021-06-18 19:15:47 | INFO | train_inner | epoch 001: 198 / 3002 loss=3.42, ppl=10.71, wps=5806.5, ups=0.09, wpb=64898, bsz=128, num_updates=192, lr=1.92e-05, gnorm=2.945, loss_scale=2, train_wall=11, gb_free=2.8, wall=2221
2021-06-18 19:15:58 | INFO | train_inner | epoch 001: 199 / 3002 loss=3.533, ppl=11.58, wps=5790.3, ups=0.09, wpb=64868, bsz=128, num_updates=193, lr=1.93e-05, gnorm=3.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=2233
2021-06-18 19:16:10 | INFO | train_inner | epoch 001: 200 / 3002 loss=3.446, ppl=10.9, wps=5795, ups=0.09, wpb=64915, bsz=128, num_updates=194, lr=1.94e-05, gnorm=4.842, loss_scale=2, train_wall=11, gb_free=2.8, wall=2244
2021-06-18 19:16:21 | INFO | train_inner | epoch 001: 201 / 3002 loss=3.439, ppl=10.85, wps=5874.6, ups=0.09, wpb=64750, bsz=128, num_updates=195, lr=1.95e-05, gnorm=3.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=2255
2021-06-18 19:16:32 | INFO | train_inner | epoch 001: 202 / 3002 loss=3.51, ppl=11.39, wps=5892.5, ups=0.09, wpb=64779, bsz=128, num_updates=196, lr=1.96e-05, gnorm=3.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=2266
2021-06-18 19:16:43 | INFO | train_inner | epoch 001: 203 / 3002 loss=3.338, ppl=10.11, wps=5849.1, ups=0.09, wpb=64843, bsz=128, num_updates=197, lr=1.97e-05, gnorm=22.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=2277
2021-06-18 19:16:54 | INFO | train_inner | epoch 001: 204 / 3002 loss=3.314, ppl=9.95, wps=5780.5, ups=0.09, wpb=64841, bsz=128, num_updates=198, lr=1.98e-05, gnorm=2.946, loss_scale=2, train_wall=11, gb_free=2.8, wall=2288
2021-06-18 19:17:05 | INFO | train_inner | epoch 001: 205 / 3002 loss=3.386, ppl=10.45, wps=5898.2, ups=0.09, wpb=64798, bsz=128, num_updates=199, lr=1.99e-05, gnorm=3.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=2299
2021-06-18 19:17:16 | INFO | train_inner | epoch 001: 206 / 3002 loss=3.462, ppl=11.02, wps=5921.7, ups=0.09, wpb=64887, bsz=128, num_updates=200, lr=2e-05, gnorm=2.987, loss_scale=2, train_wall=11, gb_free=2.8, wall=2310
2021-06-18 19:17:27 | INFO | train_inner | epoch 001: 207 / 3002 loss=3.57, ppl=11.88, wps=5985.1, ups=0.09, wpb=64901, bsz=128, num_updates=201, lr=2.01e-05, gnorm=3.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=2321
2021-06-18 19:17:38 | INFO | train_inner | epoch 001: 208 / 3002 loss=3.482, ppl=11.18, wps=5895.1, ups=0.09, wpb=64826, bsz=128, num_updates=202, lr=2.02e-05, gnorm=3.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=2332
2021-06-18 19:17:49 | INFO | train_inner | epoch 001: 209 / 3002 loss=3.588, ppl=12.03, wps=5776, ups=0.09, wpb=64763, bsz=128, num_updates=203, lr=2.03e-05, gnorm=3.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=2343
2021-06-18 19:18:00 | INFO | train_inner | epoch 001: 210 / 3002 loss=3.174, ppl=9.02, wps=5890.1, ups=0.09, wpb=64907, bsz=128, num_updates=204, lr=2.04e-05, gnorm=3.306, loss_scale=2, train_wall=11, gb_free=2.8, wall=2354
2021-06-18 19:18:11 | INFO | train_inner | epoch 001: 211 / 3002 loss=3.432, ppl=10.79, wps=5893.5, ups=0.09, wpb=64823, bsz=128, num_updates=205, lr=2.05e-05, gnorm=3.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=2365
2021-06-18 19:18:22 | INFO | train_inner | epoch 001: 212 / 3002 loss=3.2, ppl=9.19, wps=5888.4, ups=0.09, wpb=64864, bsz=128, num_updates=206, lr=2.06e-05, gnorm=3.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=2376
2021-06-18 19:18:33 | INFO | train_inner | epoch 001: 213 / 3002 loss=3.423, ppl=10.73, wps=5870.3, ups=0.09, wpb=64827, bsz=128, num_updates=207, lr=2.07e-05, gnorm=2.898, loss_scale=4, train_wall=11, gb_free=2.8, wall=2387
2021-06-18 19:18:44 | INFO | train_inner | epoch 001: 214 / 3002 loss=3.449, ppl=10.92, wps=5776.5, ups=0.09, wpb=64749, bsz=128, num_updates=208, lr=2.08e-05, gnorm=2.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=2398
2021-06-18 19:18:55 | INFO | train_inner | epoch 001: 215 / 3002 loss=3.231, ppl=9.39, wps=5916.7, ups=0.09, wpb=64911, bsz=128, num_updates=209, lr=2.09e-05, gnorm=2.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=2409
2021-06-18 19:19:06 | INFO | train_inner | epoch 001: 216 / 3002 loss=3.397, ppl=10.54, wps=5851.3, ups=0.09, wpb=64852, bsz=128, num_updates=210, lr=2.1e-05, gnorm=3.301, loss_scale=4, train_wall=11, gb_free=2.8, wall=2421
2021-06-18 19:19:17 | INFO | train_inner | epoch 001: 217 / 3002 loss=3.417, ppl=10.68, wps=5876.1, ups=0.09, wpb=64867, bsz=128, num_updates=211, lr=2.11e-05, gnorm=2.915, loss_scale=4, train_wall=11, gb_free=2.8, wall=2432
2021-06-18 19:19:28 | INFO | train_inner | epoch 001: 218 / 3002 loss=3.413, ppl=10.65, wps=5805.6, ups=0.09, wpb=64866, bsz=128, num_updates=212, lr=2.12e-05, gnorm=3.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=2443
2021-06-18 19:19:40 | INFO | train_inner | epoch 001: 219 / 3002 loss=3.542, ppl=11.65, wps=5847.3, ups=0.09, wpb=64784, bsz=128, num_updates=213, lr=2.13e-05, gnorm=2.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=2454
2021-06-18 19:19:50 | INFO | train_inner | epoch 001: 220 / 3002 loss=3.353, ppl=10.22, wps=5953.6, ups=0.09, wpb=64809, bsz=128, num_updates=214, lr=2.14e-05, gnorm=3.491, loss_scale=4, train_wall=10, gb_free=2.8, wall=2465
2021-06-18 19:20:01 | INFO | train_inner | epoch 001: 221 / 3002 loss=3.545, ppl=11.67, wps=5865.9, ups=0.09, wpb=64836, bsz=128, num_updates=215, lr=2.15e-05, gnorm=3.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=2476
2021-06-18 19:20:12 | INFO | train_inner | epoch 001: 222 / 3002 loss=3.55, ppl=11.72, wps=5867.6, ups=0.09, wpb=64693, bsz=128, num_updates=216, lr=2.16e-05, gnorm=2.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=2487
2021-06-18 19:20:23 | INFO | train_inner | epoch 001: 223 / 3002 loss=3.473, ppl=11.1, wps=5886.7, ups=0.09, wpb=64800, bsz=128, num_updates=217, lr=2.17e-05, gnorm=3.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=2498
2021-06-18 19:20:34 | INFO | train_inner | epoch 001: 224 / 3002 loss=3.355, ppl=10.23, wps=5899.4, ups=0.09, wpb=64875, bsz=128, num_updates=218, lr=2.18e-05, gnorm=3.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=2509
2021-06-18 19:20:46 | INFO | train_inner | epoch 001: 225 / 3002 loss=3.395, ppl=10.52, wps=5806.1, ups=0.09, wpb=64789, bsz=128, num_updates=219, lr=2.19e-05, gnorm=2.891, loss_scale=4, train_wall=11, gb_free=2.8, wall=2520
2021-06-18 19:20:57 | INFO | train_inner | epoch 001: 226 / 3002 loss=3.369, ppl=10.33, wps=5841.8, ups=0.09, wpb=64895, bsz=128, num_updates=220, lr=2.2e-05, gnorm=3.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=2531
2021-06-18 19:21:08 | INFO | train_inner | epoch 001: 227 / 3002 loss=3.363, ppl=10.29, wps=5915.1, ups=0.09, wpb=64853, bsz=128, num_updates=221, lr=2.21e-05, gnorm=2.839, loss_scale=4, train_wall=11, gb_free=2.8, wall=2542
2021-06-18 19:21:19 | INFO | train_inner | epoch 001: 228 / 3002 loss=3.469, ppl=11.08, wps=5880.1, ups=0.09, wpb=64877, bsz=128, num_updates=222, lr=2.22e-05, gnorm=2.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=2553
2021-06-18 19:21:30 | INFO | train_inner | epoch 001: 229 / 3002 loss=3.359, ppl=10.26, wps=5990.1, ups=0.09, wpb=64849, bsz=128, num_updates=223, lr=2.23e-05, gnorm=2.982, loss_scale=4, train_wall=10, gb_free=2.8, wall=2564
2021-06-18 19:21:41 | INFO | train_inner | epoch 001: 230 / 3002 loss=3.429, ppl=10.77, wps=5715.6, ups=0.09, wpb=64727, bsz=128, num_updates=224, lr=2.24e-05, gnorm=2.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=2575
2021-06-18 19:21:52 | INFO | train_inner | epoch 001: 231 / 3002 loss=3.252, ppl=9.53, wps=5897.5, ups=0.09, wpb=64851, bsz=128, num_updates=225, lr=2.25e-05, gnorm=2.81, loss_scale=4, train_wall=11, gb_free=2.8, wall=2586
2021-06-18 19:22:03 | INFO | train_inner | epoch 001: 232 / 3002 loss=3.692, ppl=12.92, wps=5750.8, ups=0.09, wpb=64680, bsz=128, num_updates=226, lr=2.26e-05, gnorm=2.957, loss_scale=4, train_wall=11, gb_free=2.8, wall=2597
2021-06-18 19:22:14 | INFO | train_inner | epoch 001: 233 / 3002 loss=3.341, ppl=10.13, wps=5903.4, ups=0.09, wpb=64815, bsz=128, num_updates=227, lr=2.27e-05, gnorm=2.896, loss_scale=4, train_wall=11, gb_free=2.8, wall=2608
2021-06-18 19:22:25 | INFO | train_inner | epoch 001: 234 / 3002 loss=3.277, ppl=9.69, wps=5826.4, ups=0.09, wpb=64887, bsz=128, num_updates=228, lr=2.28e-05, gnorm=3.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=2620
2021-06-18 19:22:36 | INFO | train_inner | epoch 001: 235 / 3002 loss=3.479, ppl=11.15, wps=5845.9, ups=0.09, wpb=64817, bsz=128, num_updates=229, lr=2.29e-05, gnorm=3.12, loss_scale=4, train_wall=11, gb_free=2.8, wall=2631
2021-06-18 19:22:48 | INFO | train_inner | epoch 001: 236 / 3002 loss=3.306, ppl=9.89, wps=5801.8, ups=0.09, wpb=64827, bsz=128, num_updates=230, lr=2.3e-05, gnorm=2.929, loss_scale=4, train_wall=11, gb_free=2.8, wall=2642
2021-06-18 19:22:59 | INFO | train_inner | epoch 001: 237 / 3002 loss=3.467, ppl=11.06, wps=5798.7, ups=0.09, wpb=64740, bsz=128, num_updates=231, lr=2.31e-05, gnorm=3.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=2653
2021-06-18 19:23:10 | INFO | train_inner | epoch 001: 238 / 3002 loss=3.416, ppl=10.67, wps=5843.2, ups=0.09, wpb=64822, bsz=128, num_updates=232, lr=2.32e-05, gnorm=2.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=2664
2021-06-18 19:23:21 | INFO | train_inner | epoch 001: 239 / 3002 loss=3.419, ppl=10.7, wps=5810.9, ups=0.09, wpb=64838, bsz=128, num_updates=233, lr=2.33e-05, gnorm=3, loss_scale=4, train_wall=11, gb_free=2.8, wall=2675
2021-06-18 19:23:32 | INFO | train_inner | epoch 001: 240 / 3002 loss=3.375, ppl=10.37, wps=5817.8, ups=0.09, wpb=64845, bsz=128, num_updates=234, lr=2.34e-05, gnorm=3.042, loss_scale=4, train_wall=11, gb_free=2.8, wall=2686
2021-06-18 19:23:43 | INFO | train_inner | epoch 001: 241 / 3002 loss=3.419, ppl=10.7, wps=5815.9, ups=0.09, wpb=64891, bsz=128, num_updates=235, lr=2.35e-05, gnorm=3.407, loss_scale=4, train_wall=11, gb_free=2.8, wall=2698
2021-06-18 19:23:54 | INFO | train_inner | epoch 001: 242 / 3002 loss=3.355, ppl=10.23, wps=5766, ups=0.09, wpb=64780, bsz=128, num_updates=236, lr=2.36e-05, gnorm=4.515, loss_scale=4, train_wall=11, gb_free=2.8, wall=2709
2021-06-18 19:24:06 | INFO | train_inner | epoch 001: 243 / 3002 loss=3.469, ppl=11.07, wps=5783.3, ups=0.09, wpb=64775, bsz=128, num_updates=237, lr=2.37e-05, gnorm=3.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=2720
2021-06-18 19:24:17 | INFO | train_inner | epoch 001: 244 / 3002 loss=3.474, ppl=11.11, wps=5938.3, ups=0.09, wpb=64818, bsz=128, num_updates=238, lr=2.38e-05, gnorm=3.276, loss_scale=4, train_wall=10, gb_free=2.8, wall=2731
2021-06-18 19:24:27 | INFO | train_inner | epoch 001: 245 / 3002 loss=3.454, ppl=10.96, wps=5969.9, ups=0.09, wpb=64865, bsz=128, num_updates=239, lr=2.39e-05, gnorm=2.836, loss_scale=4, train_wall=10, gb_free=2.8, wall=2742
2021-06-18 19:24:39 | INFO | train_inner | epoch 001: 246 / 3002 loss=3.061, ppl=8.35, wps=5758.1, ups=0.09, wpb=64823, bsz=128, num_updates=240, lr=2.4e-05, gnorm=2.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=2753
2021-06-18 19:24:50 | INFO | train_inner | epoch 001: 247 / 3002 loss=3.247, ppl=9.49, wps=5908, ups=0.09, wpb=64891, bsz=128, num_updates=241, lr=2.41e-05, gnorm=2.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=2764
2021-06-18 19:25:01 | INFO | train_inner | epoch 001: 248 / 3002 loss=3.31, ppl=9.92, wps=5855.8, ups=0.09, wpb=64877, bsz=128, num_updates=242, lr=2.42e-05, gnorm=3.054, loss_scale=4, train_wall=11, gb_free=2.8, wall=2775
2021-06-18 19:25:12 | INFO | train_inner | epoch 001: 249 / 3002 loss=3.446, ppl=10.9, wps=5941.7, ups=0.09, wpb=64922, bsz=128, num_updates=243, lr=2.43e-05, gnorm=2.865, loss_scale=4, train_wall=10, gb_free=2.8, wall=2786
2021-06-18 19:25:23 | INFO | train_inner | epoch 001: 250 / 3002 loss=3.396, ppl=10.52, wps=5726.5, ups=0.09, wpb=64907, bsz=128, num_updates=244, lr=2.44e-05, gnorm=2.944, loss_scale=4, train_wall=11, gb_free=2.8, wall=2797
2021-06-18 19:25:34 | INFO | train_inner | epoch 001: 251 / 3002 loss=3.429, ppl=10.77, wps=5827.2, ups=0.09, wpb=64752, bsz=128, num_updates=245, lr=2.45e-05, gnorm=2.898, loss_scale=4, train_wall=11, gb_free=2.8, wall=2808
2021-06-18 19:25:45 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-18 19:25:56 | INFO | train_inner | epoch 001: 253 / 3002 loss=3.437, ppl=10.83, wps=2953, ups=0.05, wpb=64838, bsz=128, num_updates=246, lr=2.46e-05, gnorm=5.197, loss_scale=2, train_wall=21, gb_free=2.8, wall=2830
2021-06-18 19:26:07 | INFO | train_inner | epoch 001: 254 / 3002 loss=3.352, ppl=10.21, wps=5851.3, ups=0.09, wpb=64807, bsz=128, num_updates=247, lr=2.47e-05, gnorm=2.896, loss_scale=2, train_wall=11, gb_free=2.8, wall=2842
2021-06-18 19:26:18 | INFO | train_inner | epoch 001: 255 / 3002 loss=3.537, ppl=11.61, wps=5865.6, ups=0.09, wpb=64804, bsz=128, num_updates=248, lr=2.48e-05, gnorm=2.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=2853
2021-06-18 19:26:29 | INFO | train_inner | epoch 001: 256 / 3002 loss=3.209, ppl=9.25, wps=5852.5, ups=0.09, wpb=64761, bsz=128, num_updates=249, lr=2.49e-05, gnorm=3.827, loss_scale=2, train_wall=11, gb_free=2.8, wall=2864
2021-06-18 19:26:40 | INFO | train_inner | epoch 001: 257 / 3002 loss=3.41, ppl=10.63, wps=5974.3, ups=0.09, wpb=64776, bsz=128, num_updates=250, lr=2.5e-05, gnorm=15.947, loss_scale=2, train_wall=10, gb_free=2.8, wall=2874
2021-06-18 19:26:51 | INFO | train_inner | epoch 001: 258 / 3002 loss=3.335, ppl=10.09, wps=5879.5, ups=0.09, wpb=64796, bsz=128, num_updates=251, lr=2.51e-05, gnorm=2.979, loss_scale=2, train_wall=11, gb_free=2.8, wall=2885
2021-06-18 19:27:02 | INFO | train_inner | epoch 001: 259 / 3002 loss=3.333, ppl=10.08, wps=5803.6, ups=0.09, wpb=64868, bsz=128, num_updates=252, lr=2.52e-05, gnorm=3.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=2897
2021-06-18 19:27:13 | INFO | train_inner | epoch 001: 260 / 3002 loss=3.361, ppl=10.27, wps=5805.8, ups=0.09, wpb=64754, bsz=128, num_updates=253, lr=2.53e-05, gnorm=3.661, loss_scale=2, train_wall=11, gb_free=2.8, wall=2908
2021-06-18 19:27:25 | INFO | train_inner | epoch 001: 261 / 3002 loss=3.37, ppl=10.34, wps=5766.4, ups=0.09, wpb=64807, bsz=128, num_updates=254, lr=2.54e-05, gnorm=3.504, loss_scale=2, train_wall=11, gb_free=2.8, wall=2919
2021-06-18 19:27:36 | INFO | train_inner | epoch 001: 262 / 3002 loss=3.255, ppl=9.55, wps=5848.2, ups=0.09, wpb=64788, bsz=128, num_updates=255, lr=2.55e-05, gnorm=3.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=2930
2021-06-18 19:27:47 | INFO | train_inner | epoch 001: 263 / 3002 loss=3.538, ppl=11.61, wps=5846.5, ups=0.09, wpb=64892, bsz=128, num_updates=256, lr=2.56e-05, gnorm=3.047, loss_scale=2, train_wall=11, gb_free=2.8, wall=2941
2021-06-18 19:27:58 | INFO | train_inner | epoch 001: 264 / 3002 loss=3.329, ppl=10.05, wps=5858.8, ups=0.09, wpb=64745, bsz=128, num_updates=257, lr=2.57e-05, gnorm=3.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=2952
2021-06-18 19:28:09 | INFO | train_inner | epoch 001: 265 / 3002 loss=3.27, ppl=9.65, wps=5811.4, ups=0.09, wpb=64794, bsz=128, num_updates=258, lr=2.58e-05, gnorm=2.907, loss_scale=2, train_wall=11, gb_free=2.8, wall=2963
2021-06-18 19:28:20 | INFO | train_inner | epoch 001: 266 / 3002 loss=3.25, ppl=9.51, wps=5862.7, ups=0.09, wpb=64925, bsz=128, num_updates=259, lr=2.59e-05, gnorm=2.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=2975
2021-06-18 19:28:31 | INFO | train_inner | epoch 001: 267 / 3002 loss=3.367, ppl=10.32, wps=5917, ups=0.09, wpb=64824, bsz=128, num_updates=260, lr=2.6e-05, gnorm=3.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=2985
2021-06-18 19:28:42 | INFO | train_inner | epoch 001: 268 / 3002 loss=3.292, ppl=9.79, wps=5910.3, ups=0.09, wpb=64846, bsz=128, num_updates=261, lr=2.61e-05, gnorm=2.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=2996
2021-06-18 19:28:53 | INFO | train_inner | epoch 001: 269 / 3002 loss=3.464, ppl=11.04, wps=5927.7, ups=0.09, wpb=64909, bsz=128, num_updates=262, lr=2.62e-05, gnorm=2.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=3007
2021-06-18 19:29:04 | INFO | train_inner | epoch 001: 270 / 3002 loss=3.356, ppl=10.24, wps=5896.6, ups=0.09, wpb=64912, bsz=128, num_updates=263, lr=2.63e-05, gnorm=2.866, loss_scale=2, train_wall=11, gb_free=2.8, wall=3018
2021-06-18 19:29:15 | INFO | train_inner | epoch 001: 271 / 3002 loss=3.327, ppl=10.04, wps=5851.4, ups=0.09, wpb=64872, bsz=128, num_updates=264, lr=2.64e-05, gnorm=5.543, loss_scale=2, train_wall=11, gb_free=2.8, wall=3029
2021-06-18 19:29:26 | INFO | train_inner | epoch 001: 272 / 3002 loss=3.398, ppl=10.54, wps=5801.2, ups=0.09, wpb=64812, bsz=128, num_updates=265, lr=2.65e-05, gnorm=3.957, loss_scale=2, train_wall=11, gb_free=2.8, wall=3041
2021-06-18 19:29:37 | INFO | train_inner | epoch 001: 273 / 3002 loss=3.32, ppl=9.98, wps=5881.6, ups=0.09, wpb=64887, bsz=128, num_updates=266, lr=2.66e-05, gnorm=2.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=3052
2021-06-18 19:29:48 | INFO | train_inner | epoch 001: 274 / 3002 loss=3.464, ppl=11.03, wps=5912.5, ups=0.09, wpb=64879, bsz=128, num_updates=267, lr=2.67e-05, gnorm=3.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=3063
2021-06-18 19:29:59 | INFO | train_inner | epoch 001: 275 / 3002 loss=3.4, ppl=10.56, wps=6027.6, ups=0.09, wpb=64732, bsz=128, num_updates=268, lr=2.68e-05, gnorm=3.232, loss_scale=2, train_wall=10, gb_free=2.8, wall=3073
2021-06-18 19:30:10 | INFO | train_inner | epoch 001: 276 / 3002 loss=3.439, ppl=10.84, wps=5942.4, ups=0.09, wpb=64759, bsz=128, num_updates=269, lr=2.69e-05, gnorm=2.999, loss_scale=2, train_wall=10, gb_free=2.8, wall=3084
2021-06-18 19:30:21 | INFO | train_inner | epoch 001: 277 / 3002 loss=3.262, ppl=9.59, wps=5881.4, ups=0.09, wpb=64848, bsz=128, num_updates=270, lr=2.7e-05, gnorm=2.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=3095
2021-06-18 19:30:32 | INFO | train_inner | epoch 001: 278 / 3002 loss=3.384, ppl=10.44, wps=5936.9, ups=0.09, wpb=64782, bsz=128, num_updates=271, lr=2.71e-05, gnorm=3.044, loss_scale=2, train_wall=10, gb_free=2.8, wall=3106
2021-06-18 19:30:43 | INFO | train_inner | epoch 001: 279 / 3002 loss=3.248, ppl=9.5, wps=5757.5, ups=0.09, wpb=64832, bsz=128, num_updates=272, lr=2.72e-05, gnorm=3.903, loss_scale=2, train_wall=11, gb_free=2.8, wall=3118
2021-06-18 19:30:54 | INFO | train_inner | epoch 001: 280 / 3002 loss=3.354, ppl=10.23, wps=5834.6, ups=0.09, wpb=64859, bsz=128, num_updates=273, lr=2.73e-05, gnorm=3.846, loss_scale=2, train_wall=11, gb_free=2.8, wall=3129
2021-06-18 19:31:06 | INFO | train_inner | epoch 001: 281 / 3002 loss=3.387, ppl=10.46, wps=5767.9, ups=0.09, wpb=64776, bsz=128, num_updates=274, lr=2.74e-05, gnorm=2.903, loss_scale=2, train_wall=11, gb_free=2.8, wall=3140
2021-06-18 19:31:17 | INFO | train_inner | epoch 001: 282 / 3002 loss=3.306, ppl=9.89, wps=5857.9, ups=0.09, wpb=64825, bsz=128, num_updates=275, lr=2.75e-05, gnorm=3.031, loss_scale=2, train_wall=11, gb_free=2.8, wall=3151
2021-06-18 19:31:28 | INFO | train_inner | epoch 001: 283 / 3002 loss=3.253, ppl=9.54, wps=5906.8, ups=0.09, wpb=64792, bsz=128, num_updates=276, lr=2.76e-05, gnorm=3.055, loss_scale=2, train_wall=11, gb_free=2.8, wall=3162
2021-06-18 19:31:39 | INFO | train_inner | epoch 001: 284 / 3002 loss=3.318, ppl=9.98, wps=5862.2, ups=0.09, wpb=64795, bsz=128, num_updates=277, lr=2.77e-05, gnorm=2.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=3173
2021-06-18 19:31:50 | INFO | train_inner | epoch 001: 285 / 3002 loss=3.28, ppl=9.72, wps=5871.7, ups=0.09, wpb=64857, bsz=128, num_updates=278, lr=2.78e-05, gnorm=2.874, loss_scale=2, train_wall=11, gb_free=2.8, wall=3184
2021-06-18 19:32:01 | INFO | train_inner | epoch 001: 286 / 3002 loss=3.541, ppl=11.64, wps=5821.3, ups=0.09, wpb=64766, bsz=128, num_updates=279, lr=2.79e-05, gnorm=3.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=3195
2021-06-18 19:32:12 | INFO | train_inner | epoch 001: 287 / 3002 loss=3.362, ppl=10.28, wps=5859.4, ups=0.09, wpb=64819, bsz=128, num_updates=280, lr=2.8e-05, gnorm=3.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=3206
2021-06-18 19:32:23 | INFO | train_inner | epoch 001: 288 / 3002 loss=3.312, ppl=9.93, wps=5896.4, ups=0.09, wpb=64843, bsz=128, num_updates=281, lr=2.81e-05, gnorm=3.121, loss_scale=2, train_wall=11, gb_free=2.8, wall=3217
2021-06-18 19:32:34 | INFO | train_inner | epoch 001: 289 / 3002 loss=3.489, ppl=11.23, wps=5877.5, ups=0.09, wpb=64860, bsz=128, num_updates=282, lr=2.82e-05, gnorm=3.063, loss_scale=2, train_wall=11, gb_free=2.8, wall=3228
2021-06-18 19:32:45 | INFO | train_inner | epoch 001: 290 / 3002 loss=3.255, ppl=9.55, wps=5897, ups=0.09, wpb=64841, bsz=128, num_updates=283, lr=2.83e-05, gnorm=3.062, loss_scale=2, train_wall=11, gb_free=2.8, wall=3239
2021-06-18 19:32:56 | INFO | train_inner | epoch 001: 291 / 3002 loss=3.447, ppl=10.91, wps=5778.8, ups=0.09, wpb=64849, bsz=128, num_updates=284, lr=2.84e-05, gnorm=2.938, loss_scale=2, train_wall=11, gb_free=2.8, wall=3250
2021-06-18 19:33:07 | INFO | train_inner | epoch 001: 292 / 3002 loss=3.11, ppl=8.63, wps=5786, ups=0.09, wpb=64834, bsz=128, num_updates=285, lr=2.85e-05, gnorm=2.874, loss_scale=2, train_wall=11, gb_free=2.8, wall=3262
2021-06-18 19:33:18 | INFO | train_inner | epoch 001: 293 / 3002 loss=3.21, ppl=9.25, wps=5899.3, ups=0.09, wpb=64857, bsz=128, num_updates=286, lr=2.86e-05, gnorm=2.886, loss_scale=2, train_wall=11, gb_free=2.8, wall=3273
2021-06-18 19:33:29 | INFO | train_inner | epoch 001: 294 / 3002 loss=3.421, ppl=10.71, wps=5843.8, ups=0.09, wpb=64835, bsz=128, num_updates=287, lr=2.87e-05, gnorm=3.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=3284
2021-06-18 19:33:41 | INFO | train_inner | epoch 001: 295 / 3002 loss=3.382, ppl=10.43, wps=5761.9, ups=0.09, wpb=64796, bsz=128, num_updates=288, lr=2.88e-05, gnorm=3.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=3295
2021-06-18 19:33:52 | INFO | train_inner | epoch 001: 296 / 3002 loss=3.344, ppl=10.15, wps=5824.4, ups=0.09, wpb=64775, bsz=128, num_updates=289, lr=2.89e-05, gnorm=2.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=3306
2021-06-18 19:34:03 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-18 19:34:14 | INFO | train_inner | epoch 001: 298 / 3002 loss=3.373, ppl=10.36, wps=2914.8, ups=0.04, wpb=64780, bsz=128, num_updates=290, lr=2.9e-05, gnorm=4.879, loss_scale=1, train_wall=21, gb_free=2.8, wall=3328
2021-06-18 19:34:25 | INFO | train_inner | epoch 001: 299 / 3002 loss=3.356, ppl=10.24, wps=5848.8, ups=0.09, wpb=64794, bsz=128, num_updates=291, lr=2.91e-05, gnorm=10.523, loss_scale=1, train_wall=11, gb_free=2.8, wall=3339
2021-06-18 19:34:36 | INFO | train_inner | epoch 001: 300 / 3002 loss=3.312, ppl=9.93, wps=5759.6, ups=0.09, wpb=64845, bsz=128, num_updates=292, lr=2.92e-05, gnorm=12.127, loss_scale=1, train_wall=11, gb_free=2.8, wall=3351
2021-06-18 19:34:47 | INFO | train_inner | epoch 001: 301 / 3002 loss=3.187, ppl=9.11, wps=5842.6, ups=0.09, wpb=64828, bsz=128, num_updates=293, lr=2.93e-05, gnorm=3.189, loss_scale=1, train_wall=11, gb_free=2.8, wall=3362
2021-06-18 19:34:58 | INFO | train_inner | epoch 001: 302 / 3002 loss=3.282, ppl=9.73, wps=5902.1, ups=0.09, wpb=64773, bsz=128, num_updates=294, lr=2.94e-05, gnorm=3.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=3373
2021-06-18 19:35:09 | INFO | train_inner | epoch 001: 303 / 3002 loss=3.269, ppl=9.64, wps=5926, ups=0.09, wpb=64862, bsz=128, num_updates=295, lr=2.95e-05, gnorm=2.786, loss_scale=1, train_wall=10, gb_free=2.8, wall=3384
2021-06-18 19:35:20 | INFO | train_inner | epoch 001: 304 / 3002 loss=3.387, ppl=10.46, wps=5878.1, ups=0.09, wpb=64848, bsz=128, num_updates=296, lr=2.96e-05, gnorm=2.936, loss_scale=1, train_wall=11, gb_free=2.8, wall=3395
2021-06-18 19:35:31 | INFO | train_inner | epoch 001: 305 / 3002 loss=3.602, ppl=12.15, wps=5876.2, ups=0.09, wpb=64846, bsz=128, num_updates=297, lr=2.97e-05, gnorm=3.042, loss_scale=1, train_wall=11, gb_free=2.8, wall=3406
2021-06-18 19:35:43 | INFO | train_inner | epoch 001: 306 / 3002 loss=3.295, ppl=9.81, wps=5799.1, ups=0.09, wpb=64834, bsz=128, num_updates=298, lr=2.98e-05, gnorm=3.04, loss_scale=1, train_wall=11, gb_free=2.8, wall=3417
2021-06-18 19:35:54 | INFO | train_inner | epoch 001: 307 / 3002 loss=3.356, ppl=10.24, wps=5813.5, ups=0.09, wpb=64874, bsz=128, num_updates=299, lr=2.99e-05, gnorm=2.991, loss_scale=1, train_wall=11, gb_free=2.8, wall=3428
2021-06-18 19:36:05 | INFO | train_inner | epoch 001: 308 / 3002 loss=3.378, ppl=10.4, wps=5848.9, ups=0.09, wpb=64836, bsz=128, num_updates=300, lr=3e-05, gnorm=2.82, loss_scale=1, train_wall=11, gb_free=2.8, wall=3439
2021-06-18 19:36:16 | INFO | train_inner | epoch 001: 309 / 3002 loss=3.34, ppl=10.12, wps=5932.5, ups=0.09, wpb=64893, bsz=128, num_updates=301, lr=3.01e-05, gnorm=3.001, loss_scale=1, train_wall=10, gb_free=2.8, wall=3450
2021-06-18 19:36:27 | INFO | train_inner | epoch 001: 310 / 3002 loss=3.227, ppl=9.37, wps=5827.5, ups=0.09, wpb=64853, bsz=128, num_updates=302, lr=3.02e-05, gnorm=2.987, loss_scale=1, train_wall=11, gb_free=2.8, wall=3461
2021-06-18 19:36:38 | INFO | train_inner | epoch 001: 311 / 3002 loss=3.311, ppl=9.93, wps=5902.6, ups=0.09, wpb=64786, bsz=128, num_updates=303, lr=3.03e-05, gnorm=3.005, loss_scale=1, train_wall=11, gb_free=2.8, wall=3472
2021-06-18 19:36:49 | INFO | train_inner | epoch 001: 312 / 3002 loss=3.245, ppl=9.48, wps=5913.2, ups=0.09, wpb=64880, bsz=128, num_updates=304, lr=3.04e-05, gnorm=2.95, loss_scale=1, train_wall=10, gb_free=2.8, wall=3483
2021-06-18 19:37:00 | INFO | train_inner | epoch 001: 313 / 3002 loss=3.301, ppl=9.86, wps=5816, ups=0.09, wpb=64773, bsz=128, num_updates=305, lr=3.05e-05, gnorm=2.867, loss_scale=1, train_wall=11, gb_free=2.8, wall=3494
2021-06-18 19:37:11 | INFO | train_inner | epoch 001: 314 / 3002 loss=3.345, ppl=10.16, wps=5922.4, ups=0.09, wpb=64874, bsz=128, num_updates=306, lr=3.06e-05, gnorm=2.909, loss_scale=1, train_wall=11, gb_free=2.8, wall=3505
2021-06-18 19:37:22 | INFO | train_inner | epoch 001: 315 / 3002 loss=3.219, ppl=9.31, wps=5905.9, ups=0.09, wpb=64838, bsz=128, num_updates=307, lr=3.07e-05, gnorm=2.935, loss_scale=1, train_wall=11, gb_free=2.8, wall=3516
2021-06-18 19:37:33 | INFO | train_inner | epoch 001: 316 / 3002 loss=3.443, ppl=10.87, wps=5838.9, ups=0.09, wpb=64803, bsz=128, num_updates=308, lr=3.08e-05, gnorm=2.843, loss_scale=1, train_wall=11, gb_free=2.8, wall=3527
2021-06-18 19:37:44 | INFO | train_inner | epoch 001: 317 / 3002 loss=3.431, ppl=10.79, wps=6022.8, ups=0.09, wpb=64864, bsz=128, num_updates=309, lr=3.09e-05, gnorm=2.947, loss_scale=1, train_wall=10, gb_free=2.8, wall=3538
2021-06-18 19:37:55 | INFO | train_inner | epoch 001: 318 / 3002 loss=3.317, ppl=9.96, wps=5765.3, ups=0.09, wpb=64851, bsz=128, num_updates=310, lr=3.1e-05, gnorm=3.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=3549
2021-06-18 19:38:06 | INFO | train_inner | epoch 001: 319 / 3002 loss=3.268, ppl=9.63, wps=5852.4, ups=0.09, wpb=64830, bsz=128, num_updates=311, lr=3.11e-05, gnorm=2.969, loss_scale=1, train_wall=11, gb_free=2.8, wall=3560
2021-06-18 19:38:17 | INFO | train_inner | epoch 001: 320 / 3002 loss=3.16, ppl=8.94, wps=5849, ups=0.09, wpb=64848, bsz=128, num_updates=312, lr=3.12e-05, gnorm=2.788, loss_scale=1, train_wall=11, gb_free=2.8, wall=3572
2021-06-18 19:38:28 | INFO | train_inner | epoch 001: 321 / 3002 loss=3.216, ppl=9.29, wps=5840.5, ups=0.09, wpb=64795, bsz=128, num_updates=313, lr=3.13e-05, gnorm=2.889, loss_scale=1, train_wall=11, gb_free=2.8, wall=3583
2021-06-18 19:38:39 | INFO | train_inner | epoch 001: 322 / 3002 loss=3.474, ppl=11.11, wps=6006.8, ups=0.09, wpb=64869, bsz=128, num_updates=314, lr=3.14e-05, gnorm=2.923, loss_scale=1, train_wall=10, gb_free=2.8, wall=3593
2021-06-18 19:38:50 | INFO | train_inner | epoch 001: 323 / 3002 loss=3.361, ppl=10.27, wps=5874.5, ups=0.09, wpb=64844, bsz=128, num_updates=315, lr=3.15e-05, gnorm=3.07, loss_scale=1, train_wall=11, gb_free=2.8, wall=3604
2021-06-18 19:39:01 | INFO | train_inner | epoch 001: 324 / 3002 loss=3.439, ppl=10.85, wps=5914.1, ups=0.09, wpb=64837, bsz=128, num_updates=316, lr=3.16e-05, gnorm=3.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=3615
2021-06-18 19:39:12 | INFO | train_inner | epoch 001: 325 / 3002 loss=3.335, ppl=10.09, wps=5808.9, ups=0.09, wpb=64896, bsz=128, num_updates=317, lr=3.17e-05, gnorm=3.182, loss_scale=1, train_wall=11, gb_free=2.8, wall=3627
2021-06-18 19:39:23 | INFO | train_inner | epoch 001: 326 / 3002 loss=3.479, ppl=11.15, wps=5817, ups=0.09, wpb=64851, bsz=128, num_updates=318, lr=3.18e-05, gnorm=2.893, loss_scale=1, train_wall=11, gb_free=2.8, wall=3638
2021-06-18 19:39:34 | INFO | train_inner | epoch 001: 327 / 3002 loss=3.264, ppl=9.61, wps=5872.4, ups=0.09, wpb=64871, bsz=128, num_updates=319, lr=3.19e-05, gnorm=3.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=3649
2021-06-18 19:39:45 | INFO | train_inner | epoch 001: 328 / 3002 loss=3.298, ppl=9.83, wps=5923.4, ups=0.09, wpb=64899, bsz=128, num_updates=320, lr=3.2e-05, gnorm=3.117, loss_scale=1, train_wall=10, gb_free=2.8, wall=3660
2021-06-18 19:39:56 | INFO | train_inner | epoch 001: 329 / 3002 loss=3.441, ppl=10.86, wps=5864, ups=0.09, wpb=64889, bsz=128, num_updates=321, lr=3.21e-05, gnorm=4.425, loss_scale=1, train_wall=11, gb_free=2.8, wall=3671
2021-06-18 19:40:07 | INFO | train_inner | epoch 001: 330 / 3002 loss=3.36, ppl=10.27, wps=5890.7, ups=0.09, wpb=64872, bsz=128, num_updates=322, lr=3.22e-05, gnorm=2.909, loss_scale=1, train_wall=11, gb_free=2.8, wall=3682
2021-06-18 19:40:19 | INFO | train_inner | epoch 001: 331 / 3002 loss=3.346, ppl=10.17, wps=5828.2, ups=0.09, wpb=64843, bsz=128, num_updates=323, lr=3.23e-05, gnorm=3.209, loss_scale=1, train_wall=11, gb_free=2.8, wall=3693
2021-06-18 19:40:30 | INFO | train_inner | epoch 001: 332 / 3002 loss=3.253, ppl=9.54, wps=5829.7, ups=0.09, wpb=64846, bsz=128, num_updates=324, lr=3.24e-05, gnorm=3.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=3704
2021-06-18 19:40:41 | INFO | train_inner | epoch 001: 333 / 3002 loss=3.275, ppl=9.68, wps=5772, ups=0.09, wpb=64832, bsz=128, num_updates=325, lr=3.25e-05, gnorm=2.937, loss_scale=1, train_wall=11, gb_free=2.8, wall=3715
2021-06-18 19:40:52 | INFO | train_inner | epoch 001: 334 / 3002 loss=3.25, ppl=9.51, wps=5780.3, ups=0.09, wpb=64833, bsz=128, num_updates=326, lr=3.26e-05, gnorm=3.496, loss_scale=1, train_wall=11, gb_free=2.8, wall=3727
2021-06-18 19:41:03 | INFO | train_inner | epoch 001: 335 / 3002 loss=3.37, ppl=10.34, wps=5856.9, ups=0.09, wpb=64846, bsz=128, num_updates=327, lr=3.27e-05, gnorm=3.112, loss_scale=1, train_wall=11, gb_free=2.8, wall=3738
2021-06-18 19:41:14 | INFO | train_inner | epoch 001: 336 / 3002 loss=3.32, ppl=9.98, wps=5793.4, ups=0.09, wpb=64751, bsz=128, num_updates=328, lr=3.28e-05, gnorm=3, loss_scale=1, train_wall=11, gb_free=2.8, wall=3749
2021-06-18 19:41:26 | INFO | train_inner | epoch 001: 337 / 3002 loss=3.157, ppl=8.92, wps=5787.6, ups=0.09, wpb=64828, bsz=128, num_updates=329, lr=3.29e-05, gnorm=3.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=3760
2021-06-18 19:41:37 | INFO | train_inner | epoch 001: 338 / 3002 loss=3.336, ppl=10.1, wps=5847.6, ups=0.09, wpb=64828, bsz=128, num_updates=330, lr=3.3e-05, gnorm=4.822, loss_scale=1, train_wall=11, gb_free=2.8, wall=3771
2021-06-18 19:41:48 | INFO | train_inner | epoch 001: 339 / 3002 loss=3.246, ppl=9.49, wps=5753.9, ups=0.09, wpb=64826, bsz=128, num_updates=331, lr=3.31e-05, gnorm=3.174, loss_scale=1, train_wall=11, gb_free=2.8, wall=3782
2021-06-18 19:41:59 | INFO | train_inner | epoch 001: 340 / 3002 loss=3.175, ppl=9.03, wps=5922.8, ups=0.09, wpb=64825, bsz=128, num_updates=332, lr=3.32e-05, gnorm=2.868, loss_scale=1, train_wall=10, gb_free=2.8, wall=3793
2021-06-18 19:42:10 | INFO | train_inner | epoch 001: 341 / 3002 loss=3.175, ppl=9.03, wps=5921.1, ups=0.09, wpb=64857, bsz=128, num_updates=333, lr=3.33e-05, gnorm=3.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=3804
2021-06-18 19:42:21 | INFO | train_inner | epoch 001: 342 / 3002 loss=3.196, ppl=9.17, wps=5941.9, ups=0.09, wpb=64829, bsz=128, num_updates=334, lr=3.34e-05, gnorm=3.071, loss_scale=1, train_wall=10, gb_free=2.8, wall=3815
2021-06-18 19:42:32 | INFO | train_inner | epoch 001: 343 / 3002 loss=3.307, ppl=9.9, wps=5775.3, ups=0.09, wpb=64839, bsz=128, num_updates=335, lr=3.35e-05, gnorm=11.528, loss_scale=1, train_wall=11, gb_free=2.8, wall=3826
2021-06-18 19:42:43 | INFO | train_inner | epoch 001: 344 / 3002 loss=3.433, ppl=10.8, wps=5986.9, ups=0.09, wpb=64873, bsz=128, num_updates=336, lr=3.36e-05, gnorm=2.967, loss_scale=1, train_wall=10, gb_free=2.8, wall=3837
2021-06-18 19:42:54 | INFO | train_inner | epoch 001: 345 / 3002 loss=3.352, ppl=10.21, wps=5768.5, ups=0.09, wpb=64864, bsz=128, num_updates=337, lr=3.37e-05, gnorm=3.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=3848
2021-06-18 19:43:05 | INFO | train_inner | epoch 001: 346 / 3002 loss=3.354, ppl=10.23, wps=5823, ups=0.09, wpb=64785, bsz=128, num_updates=338, lr=3.38e-05, gnorm=3.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=3860
2021-06-18 19:43:17 | INFO | train_inner | epoch 001: 347 / 3002 loss=3.23, ppl=9.38, wps=5735.2, ups=0.09, wpb=64880, bsz=128, num_updates=339, lr=3.39e-05, gnorm=2.94, loss_scale=1, train_wall=11, gb_free=2.8, wall=3871
2021-06-18 19:43:28 | INFO | train_inner | epoch 001: 348 / 3002 loss=3.242, ppl=9.46, wps=5855.6, ups=0.09, wpb=64834, bsz=128, num_updates=340, lr=3.4e-05, gnorm=3.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=3882
2021-06-18 19:43:39 | INFO | train_inner | epoch 001: 349 / 3002 loss=3.331, ppl=10.06, wps=5803.7, ups=0.09, wpb=64787, bsz=128, num_updates=341, lr=3.41e-05, gnorm=2.794, loss_scale=1, train_wall=11, gb_free=2.8, wall=3893
2021-06-18 19:43:50 | INFO | train_inner | epoch 001: 350 / 3002 loss=3.371, ppl=10.35, wps=5907.9, ups=0.09, wpb=64786, bsz=128, num_updates=342, lr=3.42e-05, gnorm=3.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=3904
2021-06-18 19:44:01 | INFO | train_inner | epoch 001: 351 / 3002 loss=3.155, ppl=8.91, wps=5788.2, ups=0.09, wpb=64811, bsz=128, num_updates=343, lr=3.43e-05, gnorm=2.887, loss_scale=1, train_wall=11, gb_free=2.8, wall=3915
2021-06-18 19:44:12 | INFO | train_inner | epoch 001: 352 / 3002 loss=3.103, ppl=8.59, wps=6073.2, ups=0.09, wpb=64887, bsz=128, num_updates=344, lr=3.44e-05, gnorm=2.917, loss_scale=1, train_wall=10, gb_free=2.8, wall=3926
2021-06-18 19:44:23 | INFO | train_inner | epoch 001: 353 / 3002 loss=3.388, ppl=10.47, wps=5927, ups=0.09, wpb=64877, bsz=128, num_updates=345, lr=3.45e-05, gnorm=3.169, loss_scale=1, train_wall=10, gb_free=2.8, wall=3937
2021-06-18 19:44:34 | INFO | train_inner | epoch 001: 354 / 3002 loss=3.254, ppl=9.54, wps=5891.9, ups=0.09, wpb=64843, bsz=128, num_updates=346, lr=3.46e-05, gnorm=3.303, loss_scale=1, train_wall=11, gb_free=2.8, wall=3948
2021-06-18 19:44:45 | INFO | train_inner | epoch 001: 355 / 3002 loss=3.348, ppl=10.18, wps=5917.7, ups=0.09, wpb=64838, bsz=128, num_updates=347, lr=3.47e-05, gnorm=3.182, loss_scale=1, train_wall=11, gb_free=2.8, wall=3959
2021-06-18 19:44:56 | INFO | train_inner | epoch 001: 356 / 3002 loss=3.32, ppl=9.99, wps=5762.9, ups=0.09, wpb=64773, bsz=128, num_updates=348, lr=3.48e-05, gnorm=2.985, loss_scale=1, train_wall=11, gb_free=2.8, wall=3970
2021-06-18 19:45:07 | INFO | train_inner | epoch 001: 357 / 3002 loss=3.191, ppl=9.13, wps=5773.7, ups=0.09, wpb=64866, bsz=128, num_updates=349, lr=3.49e-05, gnorm=2.971, loss_scale=1, train_wall=11, gb_free=2.8, wall=3981
2021-06-18 19:45:18 | INFO | train_inner | epoch 001: 358 / 3002 loss=3.165, ppl=8.97, wps=5768.1, ups=0.09, wpb=64843, bsz=128, num_updates=350, lr=3.5e-05, gnorm=2.83, loss_scale=1, train_wall=11, gb_free=2.8, wall=3993
2021-06-18 19:45:29 | INFO | train_inner | epoch 001: 359 / 3002 loss=3.231, ppl=9.39, wps=5788, ups=0.09, wpb=64782, bsz=128, num_updates=351, lr=3.51e-05, gnorm=3.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=4004
2021-06-18 19:45:41 | INFO | train_inner | epoch 001: 360 / 3002 loss=3.077, ppl=8.44, wps=5819, ups=0.09, wpb=64846, bsz=128, num_updates=352, lr=3.52e-05, gnorm=2.913, loss_scale=1, train_wall=11, gb_free=2.8, wall=4015
2021-06-18 19:45:52 | INFO | train_inner | epoch 001: 361 / 3002 loss=3.392, ppl=10.5, wps=5806.4, ups=0.09, wpb=64824, bsz=128, num_updates=353, lr=3.53e-05, gnorm=3.132, loss_scale=1, train_wall=11, gb_free=2.8, wall=4026
2021-06-18 19:46:03 | INFO | train_inner | epoch 001: 362 / 3002 loss=3.575, ppl=11.91, wps=5768.4, ups=0.09, wpb=64721, bsz=128, num_updates=354, lr=3.54e-05, gnorm=2.946, loss_scale=1, train_wall=11, gb_free=2.8, wall=4037
2021-06-18 19:46:14 | INFO | train_inner | epoch 001: 363 / 3002 loss=3.4, ppl=10.55, wps=5855.7, ups=0.09, wpb=64849, bsz=128, num_updates=355, lr=3.55e-05, gnorm=3.204, loss_scale=1, train_wall=11, gb_free=2.8, wall=4048
2021-06-18 19:46:25 | INFO | train_inner | epoch 001: 364 / 3002 loss=3.254, ppl=9.54, wps=5905, ups=0.09, wpb=64834, bsz=128, num_updates=356, lr=3.56e-05, gnorm=3.187, loss_scale=1, train_wall=11, gb_free=2.8, wall=4059
2021-06-18 19:46:36 | INFO | train_inner | epoch 001: 365 / 3002 loss=3.356, ppl=10.24, wps=5826.5, ups=0.09, wpb=64845, bsz=128, num_updates=357, lr=3.57e-05, gnorm=3.292, loss_scale=1, train_wall=11, gb_free=2.8, wall=4071
2021-06-18 19:46:47 | INFO | train_inner | epoch 001: 366 / 3002 loss=3.261, ppl=9.58, wps=5924.6, ups=0.09, wpb=64788, bsz=128, num_updates=358, lr=3.58e-05, gnorm=2.876, loss_scale=1, train_wall=10, gb_free=2.8, wall=4081
2021-06-18 19:46:58 | INFO | train_inner | epoch 001: 367 / 3002 loss=3.158, ppl=8.93, wps=5892.5, ups=0.09, wpb=64837, bsz=128, num_updates=359, lr=3.59e-05, gnorm=3.195, loss_scale=1, train_wall=11, gb_free=2.8, wall=4092
2021-06-18 19:47:09 | INFO | train_inner | epoch 001: 368 / 3002 loss=3.359, ppl=10.26, wps=5921.1, ups=0.09, wpb=64822, bsz=128, num_updates=360, lr=3.6e-05, gnorm=3.045, loss_scale=1, train_wall=10, gb_free=2.8, wall=4103
2021-06-18 19:47:20 | INFO | train_inner | epoch 001: 369 / 3002 loss=3.354, ppl=10.22, wps=5844.6, ups=0.09, wpb=64867, bsz=128, num_updates=361, lr=3.61e-05, gnorm=3.082, loss_scale=1, train_wall=11, gb_free=2.8, wall=4114
2021-06-18 19:47:31 | INFO | train_inner | epoch 001: 370 / 3002 loss=3.391, ppl=10.49, wps=5921.5, ups=0.09, wpb=64884, bsz=128, num_updates=362, lr=3.62e-05, gnorm=2.954, loss_scale=1, train_wall=10, gb_free=2.8, wall=4125
2021-06-18 19:47:42 | INFO | train_inner | epoch 001: 371 / 3002 loss=3.385, ppl=10.44, wps=5743.6, ups=0.09, wpb=64859, bsz=128, num_updates=363, lr=3.63e-05, gnorm=2.94, loss_scale=1, train_wall=11, gb_free=2.8, wall=4137
2021-06-18 19:47:54 | INFO | train_inner | epoch 001: 372 / 3002 loss=3.258, ppl=9.57, wps=5763.3, ups=0.09, wpb=64828, bsz=128, num_updates=364, lr=3.64e-05, gnorm=2.765, loss_scale=1, train_wall=11, gb_free=2.8, wall=4148
2021-06-18 19:48:05 | INFO | train_inner | epoch 001: 373 / 3002 loss=3.284, ppl=9.74, wps=5764.4, ups=0.09, wpb=64718, bsz=128, num_updates=365, lr=3.65e-05, gnorm=2.945, loss_scale=1, train_wall=11, gb_free=2.8, wall=4159
2021-06-18 19:48:16 | INFO | train_inner | epoch 001: 374 / 3002 loss=3.268, ppl=9.63, wps=5778.3, ups=0.09, wpb=64867, bsz=128, num_updates=366, lr=3.66e-05, gnorm=3.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=4170
2021-06-18 19:48:27 | INFO | train_inner | epoch 001: 375 / 3002 loss=3.248, ppl=9.5, wps=5974.1, ups=0.09, wpb=64765, bsz=128, num_updates=367, lr=3.67e-05, gnorm=2.942, loss_scale=1, train_wall=10, gb_free=2.8, wall=4181
2021-06-18 19:48:38 | INFO | train_inner | epoch 001: 376 / 3002 loss=3.012, ppl=8.07, wps=5887.7, ups=0.09, wpb=64873, bsz=128, num_updates=368, lr=3.68e-05, gnorm=2.971, loss_scale=1, train_wall=11, gb_free=2.8, wall=4192
2021-06-18 19:48:49 | INFO | train_inner | epoch 001: 377 / 3002 loss=3.247, ppl=9.5, wps=5806.3, ups=0.09, wpb=64841, bsz=128, num_updates=369, lr=3.69e-05, gnorm=2.824, loss_scale=1, train_wall=11, gb_free=2.8, wall=4203
2021-06-18 19:49:00 | INFO | train_inner | epoch 001: 378 / 3002 loss=3.242, ppl=9.46, wps=5839.4, ups=0.09, wpb=64828, bsz=128, num_updates=370, lr=3.7e-05, gnorm=3.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=4215
2021-06-18 19:49:11 | INFO | train_inner | epoch 001: 379 / 3002 loss=3.162, ppl=8.95, wps=5829.1, ups=0.09, wpb=64904, bsz=128, num_updates=371, lr=3.71e-05, gnorm=3.21, loss_scale=1, train_wall=11, gb_free=2.8, wall=4226
2021-06-18 19:49:23 | INFO | train_inner | epoch 001: 380 / 3002 loss=3.164, ppl=8.96, wps=5792.4, ups=0.09, wpb=64914, bsz=128, num_updates=372, lr=3.72e-05, gnorm=3.167, loss_scale=1, train_wall=11, gb_free=2.8, wall=4237
2021-06-18 19:49:34 | INFO | train_inner | epoch 001: 381 / 3002 loss=3.29, ppl=9.78, wps=5794.4, ups=0.09, wpb=64854, bsz=128, num_updates=373, lr=3.73e-05, gnorm=3.208, loss_scale=1, train_wall=11, gb_free=2.8, wall=4248
2021-06-18 19:49:45 | INFO | train_inner | epoch 001: 382 / 3002 loss=3.308, ppl=9.91, wps=5905.4, ups=0.09, wpb=64805, bsz=128, num_updates=374, lr=3.74e-05, gnorm=2.908, loss_scale=1, train_wall=11, gb_free=2.8, wall=4259
2021-06-18 19:49:56 | INFO | train_inner | epoch 001: 383 / 3002 loss=3.119, ppl=8.69, wps=5832.7, ups=0.09, wpb=64894, bsz=128, num_updates=375, lr=3.75e-05, gnorm=3.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=4270
2021-06-18 19:50:07 | INFO | train_inner | epoch 001: 384 / 3002 loss=3.362, ppl=10.28, wps=5891.4, ups=0.09, wpb=64800, bsz=128, num_updates=376, lr=3.76e-05, gnorm=2.807, loss_scale=1, train_wall=11, gb_free=2.8, wall=4281
2021-06-18 19:50:18 | INFO | train_inner | epoch 001: 385 / 3002 loss=3.302, ppl=9.86, wps=5746.7, ups=0.09, wpb=64794, bsz=128, num_updates=377, lr=3.77e-05, gnorm=3.191, loss_scale=1, train_wall=11, gb_free=2.8, wall=4292
2021-06-18 19:50:29 | INFO | train_inner | epoch 001: 386 / 3002 loss=3.358, ppl=10.26, wps=5876.4, ups=0.09, wpb=64809, bsz=128, num_updates=378, lr=3.78e-05, gnorm=2.964, loss_scale=1, train_wall=11, gb_free=2.8, wall=4304
2021-06-18 19:50:40 | INFO | train_inner | epoch 001: 387 / 3002 loss=3.355, ppl=10.23, wps=5812.1, ups=0.09, wpb=64827, bsz=128, num_updates=379, lr=3.79e-05, gnorm=2.952, loss_scale=1, train_wall=11, gb_free=2.8, wall=4315
2021-06-18 19:50:51 | INFO | train_inner | epoch 001: 388 / 3002 loss=3.325, ppl=10.02, wps=5968.7, ups=0.09, wpb=64897, bsz=128, num_updates=380, lr=3.8e-05, gnorm=3.124, loss_scale=1, train_wall=10, gb_free=2.8, wall=4326
2021-06-18 19:51:02 | INFO | train_inner | epoch 001: 389 / 3002 loss=3.366, ppl=10.31, wps=5820.3, ups=0.09, wpb=64887, bsz=128, num_updates=381, lr=3.81e-05, gnorm=2.979, loss_scale=1, train_wall=11, gb_free=2.8, wall=4337
2021-06-18 19:51:14 | INFO | train_inner | epoch 001: 390 / 3002 loss=3.244, ppl=9.48, wps=5752.4, ups=0.09, wpb=64746, bsz=128, num_updates=382, lr=3.82e-05, gnorm=3.031, loss_scale=1, train_wall=11, gb_free=2.8, wall=4348
2021-06-18 19:51:25 | INFO | train_inner | epoch 001: 391 / 3002 loss=3.346, ppl=10.17, wps=5872.8, ups=0.09, wpb=64805, bsz=128, num_updates=383, lr=3.83e-05, gnorm=2.985, loss_scale=1, train_wall=11, gb_free=2.8, wall=4359
2021-06-18 19:51:36 | INFO | train_inner | epoch 001: 392 / 3002 loss=3.11, ppl=8.63, wps=5897.1, ups=0.09, wpb=64792, bsz=128, num_updates=384, lr=3.84e-05, gnorm=2.911, loss_scale=1, train_wall=11, gb_free=2.8, wall=4370
2021-06-18 19:51:47 | INFO | train_inner | epoch 001: 393 / 3002 loss=3.24, ppl=9.45, wps=5857.4, ups=0.09, wpb=64803, bsz=128, num_updates=385, lr=3.85e-05, gnorm=2.884, loss_scale=1, train_wall=11, gb_free=2.8, wall=4381
2021-06-18 19:51:58 | INFO | train_inner | epoch 001: 394 / 3002 loss=3.137, ppl=8.8, wps=5880.6, ups=0.09, wpb=64844, bsz=128, num_updates=386, lr=3.86e-05, gnorm=3.076, loss_scale=1, train_wall=11, gb_free=2.8, wall=4392
2021-06-18 19:52:09 | INFO | train_inner | epoch 001: 395 / 3002 loss=3.325, ppl=10.02, wps=5783.8, ups=0.09, wpb=64874, bsz=128, num_updates=387, lr=3.87e-05, gnorm=3.036, loss_scale=1, train_wall=11, gb_free=2.8, wall=4403
2021-06-18 19:52:20 | INFO | train_inner | epoch 001: 396 / 3002 loss=3.226, ppl=9.36, wps=5801.8, ups=0.09, wpb=64901, bsz=128, num_updates=388, lr=3.88e-05, gnorm=2.966, loss_scale=1, train_wall=11, gb_free=2.8, wall=4414
2021-06-18 19:52:31 | INFO | train_inner | epoch 001: 397 / 3002 loss=3.101, ppl=8.58, wps=5872.5, ups=0.09, wpb=64802, bsz=128, num_updates=389, lr=3.89e-05, gnorm=8.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=4426
2021-06-18 19:52:42 | INFO | train_inner | epoch 001: 398 / 3002 loss=3.258, ppl=9.57, wps=5845.6, ups=0.09, wpb=64852, bsz=128, num_updates=390, lr=3.9e-05, gnorm=3.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=4437
2021-06-18 19:52:54 | INFO | train_inner | epoch 001: 399 / 3002 loss=3.39, ppl=10.49, wps=5730, ups=0.09, wpb=64826, bsz=128, num_updates=391, lr=3.91e-05, gnorm=2.923, loss_scale=1, train_wall=11, gb_free=2.8, wall=4448
2021-06-18 19:53:05 | INFO | train_inner | epoch 001: 400 / 3002 loss=3.446, ppl=10.9, wps=5881.8, ups=0.09, wpb=64791, bsz=128, num_updates=392, lr=3.92e-05, gnorm=70.911, loss_scale=1, train_wall=11, gb_free=2.8, wall=4459
2021-06-18 19:53:16 | INFO | train_inner | epoch 001: 401 / 3002 loss=3.349, ppl=10.19, wps=5841.4, ups=0.09, wpb=64834, bsz=128, num_updates=393, lr=3.93e-05, gnorm=8.969, loss_scale=1, train_wall=11, gb_free=2.8, wall=4470
2021-06-18 19:53:27 | INFO | train_inner | epoch 001: 402 / 3002 loss=3.389, ppl=10.47, wps=5822.1, ups=0.09, wpb=64828, bsz=128, num_updates=394, lr=3.94e-05, gnorm=2.976, loss_scale=1, train_wall=11, gb_free=2.8, wall=4481
2021-06-18 19:53:38 | INFO | train_inner | epoch 001: 403 / 3002 loss=3.196, ppl=9.16, wps=5883.9, ups=0.09, wpb=64800, bsz=128, num_updates=395, lr=3.95e-05, gnorm=3.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=4492
2021-06-18 19:53:49 | INFO | train_inner | epoch 001: 404 / 3002 loss=3.081, ppl=8.46, wps=5794.8, ups=0.09, wpb=64790, bsz=128, num_updates=396, lr=3.96e-05, gnorm=2.827, loss_scale=1, train_wall=11, gb_free=2.8, wall=4503
2021-06-18 19:54:00 | INFO | train_inner | epoch 001: 405 / 3002 loss=3.315, ppl=9.95, wps=5856.6, ups=0.09, wpb=64767, bsz=128, num_updates=397, lr=3.97e-05, gnorm=3.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=4514
2021-06-18 19:54:11 | INFO | train_inner | epoch 001: 406 / 3002 loss=3.066, ppl=8.37, wps=5975.4, ups=0.09, wpb=64812, bsz=128, num_updates=398, lr=3.98e-05, gnorm=2.952, loss_scale=1, train_wall=10, gb_free=2.8, wall=4525
2021-06-18 19:54:22 | INFO | train_inner | epoch 001: 407 / 3002 loss=3.221, ppl=9.32, wps=5790.7, ups=0.09, wpb=64896, bsz=128, num_updates=399, lr=3.99e-05, gnorm=2.841, loss_scale=1, train_wall=11, gb_free=2.8, wall=4536
2021-06-18 19:54:33 | INFO | train_inner | epoch 001: 408 / 3002 loss=3.26, ppl=9.58, wps=5781.7, ups=0.09, wpb=64833, bsz=128, num_updates=400, lr=4e-05, gnorm=3.227, loss_scale=1, train_wall=11, gb_free=2.8, wall=4548
2021-06-18 19:54:44 | INFO | train_inner | epoch 001: 409 / 3002 loss=3.303, ppl=9.87, wps=5857.3, ups=0.09, wpb=64777, bsz=128, num_updates=401, lr=4.01e-05, gnorm=3.148, loss_scale=1, train_wall=11, gb_free=2.8, wall=4559
2021-06-18 19:54:56 | INFO | train_inner | epoch 001: 410 / 3002 loss=3.26, ppl=9.58, wps=5801.1, ups=0.09, wpb=64842, bsz=128, num_updates=402, lr=4.02e-05, gnorm=3.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=4570
2021-06-18 19:55:07 | INFO | train_inner | epoch 001: 411 / 3002 loss=3.311, ppl=9.93, wps=5842, ups=0.09, wpb=64827, bsz=128, num_updates=403, lr=4.03e-05, gnorm=2.876, loss_scale=1, train_wall=11, gb_free=2.8, wall=4581
2021-06-18 19:55:18 | INFO | train_inner | epoch 001: 412 / 3002 loss=3.45, ppl=10.93, wps=5855.5, ups=0.09, wpb=64790, bsz=128, num_updates=404, lr=4.04e-05, gnorm=3.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=4592
2021-06-18 19:55:29 | INFO | train_inner | epoch 001: 413 / 3002 loss=3.285, ppl=9.75, wps=5858.8, ups=0.09, wpb=64777, bsz=128, num_updates=405, lr=4.05e-05, gnorm=2.97, loss_scale=1, train_wall=11, gb_free=2.8, wall=4603
2021-06-18 19:55:40 | INFO | train_inner | epoch 001: 414 / 3002 loss=3.433, ppl=10.8, wps=5859, ups=0.09, wpb=64784, bsz=128, num_updates=406, lr=4.06e-05, gnorm=2.965, loss_scale=1, train_wall=11, gb_free=2.8, wall=4614
2021-06-18 19:55:51 | INFO | train_inner | epoch 001: 415 / 3002 loss=3.402, ppl=10.57, wps=5868.8, ups=0.09, wpb=64848, bsz=128, num_updates=407, lr=4.07e-05, gnorm=2.842, loss_scale=1, train_wall=11, gb_free=2.8, wall=4625
2021-06-18 19:56:02 | INFO | train_inner | epoch 001: 416 / 3002 loss=3.373, ppl=10.36, wps=5904.8, ups=0.09, wpb=64825, bsz=128, num_updates=408, lr=4.08e-05, gnorm=3.507, loss_scale=1, train_wall=11, gb_free=2.8, wall=4636
2021-06-18 19:56:13 | INFO | train_inner | epoch 001: 417 / 3002 loss=3.372, ppl=10.35, wps=5901.9, ups=0.09, wpb=64814, bsz=128, num_updates=409, lr=4.09e-05, gnorm=3.123, loss_scale=1, train_wall=10, gb_free=2.8, wall=4647
2021-06-18 19:56:24 | INFO | train_inner | epoch 001: 418 / 3002 loss=3.243, ppl=9.47, wps=5899.4, ups=0.09, wpb=64884, bsz=128, num_updates=410, lr=4.1e-05, gnorm=2.842, loss_scale=1, train_wall=11, gb_free=2.8, wall=4658
2021-06-18 19:56:35 | INFO | train_inner | epoch 001: 419 / 3002 loss=3.198, ppl=9.18, wps=5798.8, ups=0.09, wpb=64896, bsz=128, num_updates=411, lr=4.11e-05, gnorm=4.738, loss_scale=1, train_wall=11, gb_free=2.8, wall=4669
2021-06-18 19:56:46 | INFO | train_inner | epoch 001: 420 / 3002 loss=3.275, ppl=9.68, wps=5861, ups=0.09, wpb=64824, bsz=128, num_updates=412, lr=4.12e-05, gnorm=2.926, loss_scale=1, train_wall=11, gb_free=2.8, wall=4680
2021-06-18 19:56:57 | INFO | train_inner | epoch 001: 421 / 3002 loss=3.339, ppl=10.12, wps=5830.7, ups=0.09, wpb=64813, bsz=128, num_updates=413, lr=4.13e-05, gnorm=2.911, loss_scale=1, train_wall=11, gb_free=2.8, wall=4692
2021-06-18 19:57:08 | INFO | train_inner | epoch 001: 422 / 3002 loss=3.135, ppl=8.78, wps=5880.7, ups=0.09, wpb=64825, bsz=128, num_updates=414, lr=4.14e-05, gnorm=2.772, loss_scale=1, train_wall=11, gb_free=2.8, wall=4703
2021-06-18 19:57:19 | INFO | train_inner | epoch 001: 423 / 3002 loss=3.16, ppl=8.94, wps=5808.3, ups=0.09, wpb=64786, bsz=128, num_updates=415, lr=4.15e-05, gnorm=2.849, loss_scale=1, train_wall=11, gb_free=2.8, wall=4714
2021-06-18 19:57:30 | INFO | train_inner | epoch 001: 424 / 3002 loss=3.286, ppl=9.75, wps=5945, ups=0.09, wpb=64858, bsz=128, num_updates=416, lr=4.16e-05, gnorm=2.863, loss_scale=1, train_wall=10, gb_free=2.8, wall=4725
2021-06-18 19:57:41 | INFO | train_inner | epoch 001: 425 / 3002 loss=3.119, ppl=8.69, wps=5818.7, ups=0.09, wpb=64849, bsz=128, num_updates=417, lr=4.17e-05, gnorm=2.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=4736
2021-06-18 19:57:53 | INFO | train_inner | epoch 001: 426 / 3002 loss=3.232, ppl=9.39, wps=5796.5, ups=0.09, wpb=64816, bsz=128, num_updates=418, lr=4.18e-05, gnorm=2.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=4747
2021-06-18 19:58:04 | INFO | train_inner | epoch 001: 427 / 3002 loss=3.292, ppl=9.79, wps=5787.3, ups=0.09, wpb=64877, bsz=128, num_updates=419, lr=4.19e-05, gnorm=2.862, loss_scale=2, train_wall=11, gb_free=2.8, wall=4758
2021-06-18 19:58:15 | INFO | train_inner | epoch 001: 428 / 3002 loss=3.114, ppl=8.66, wps=5906.9, ups=0.09, wpb=64842, bsz=128, num_updates=420, lr=4.2e-05, gnorm=2.849, loss_scale=2, train_wall=11, gb_free=2.8, wall=4769
2021-06-18 19:58:26 | INFO | train_inner | epoch 001: 429 / 3002 loss=3.393, ppl=10.5, wps=5944.2, ups=0.09, wpb=64837, bsz=128, num_updates=421, lr=4.21e-05, gnorm=3.653, loss_scale=2, train_wall=10, gb_free=2.8, wall=4780
2021-06-18 19:58:37 | INFO | train_inner | epoch 001: 430 / 3002 loss=3.287, ppl=9.76, wps=5844.2, ups=0.09, wpb=64768, bsz=128, num_updates=422, lr=4.22e-05, gnorm=2.778, loss_scale=2, train_wall=11, gb_free=2.8, wall=4791
2021-06-18 19:58:48 | INFO | train_inner | epoch 001: 431 / 3002 loss=3.449, ppl=10.92, wps=5855.5, ups=0.09, wpb=64813, bsz=128, num_updates=423, lr=4.23e-05, gnorm=3.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=4802
2021-06-18 19:58:59 | INFO | train_inner | epoch 001: 432 / 3002 loss=3.156, ppl=8.92, wps=5837.3, ups=0.09, wpb=64842, bsz=128, num_updates=424, lr=4.24e-05, gnorm=2.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=4813
2021-06-18 19:59:10 | INFO | train_inner | epoch 001: 433 / 3002 loss=3.29, ppl=9.78, wps=5878.5, ups=0.09, wpb=64890, bsz=128, num_updates=425, lr=4.25e-05, gnorm=2.951, loss_scale=2, train_wall=11, gb_free=2.8, wall=4824
2021-06-18 19:59:21 | INFO | train_inner | epoch 001: 434 / 3002 loss=3.162, ppl=8.95, wps=5750, ups=0.09, wpb=64800, bsz=128, num_updates=426, lr=4.26e-05, gnorm=3.227, loss_scale=2, train_wall=11, gb_free=2.8, wall=4836
2021-06-18 19:59:32 | INFO | train_inner | epoch 001: 435 / 3002 loss=3.299, ppl=9.84, wps=5855.8, ups=0.09, wpb=64867, bsz=128, num_updates=427, lr=4.27e-05, gnorm=2.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=4847
2021-06-18 19:59:43 | INFO | train_inner | epoch 001: 436 / 3002 loss=3.07, ppl=8.4, wps=5918.6, ups=0.09, wpb=64927, bsz=128, num_updates=428, lr=4.28e-05, gnorm=3.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=4858
2021-06-18 19:59:54 | INFO | train_inner | epoch 001: 437 / 3002 loss=3.216, ppl=9.3, wps=5878, ups=0.09, wpb=64843, bsz=128, num_updates=429, lr=4.29e-05, gnorm=2.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=4869
2021-06-18 20:00:05 | INFO | train_inner | epoch 001: 438 / 3002 loss=3.289, ppl=9.78, wps=5889.7, ups=0.09, wpb=64773, bsz=128, num_updates=430, lr=4.3e-05, gnorm=2.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=4880
2021-06-18 20:00:16 | INFO | train_inner | epoch 001: 439 / 3002 loss=3.272, ppl=9.66, wps=5869.4, ups=0.09, wpb=64970, bsz=128, num_updates=431, lr=4.31e-05, gnorm=7.882, loss_scale=2, train_wall=11, gb_free=2.8, wall=4891
2021-06-18 20:00:28 | INFO | train_inner | epoch 001: 440 / 3002 loss=3.114, ppl=8.66, wps=5811.7, ups=0.09, wpb=64781, bsz=128, num_updates=432, lr=4.32e-05, gnorm=2.8, loss_scale=2, train_wall=11, gb_free=2.8, wall=4902
2021-06-18 20:00:39 | INFO | train_inner | epoch 001: 441 / 3002 loss=3.1, ppl=8.58, wps=5845.4, ups=0.09, wpb=64887, bsz=128, num_updates=433, lr=4.33e-05, gnorm=2.842, loss_scale=2, train_wall=11, gb_free=2.8, wall=4913
2021-06-18 20:00:50 | INFO | train_inner | epoch 001: 442 / 3002 loss=3.276, ppl=9.69, wps=5872.2, ups=0.09, wpb=64827, bsz=128, num_updates=434, lr=4.34e-05, gnorm=2.927, loss_scale=2, train_wall=11, gb_free=2.8, wall=4924
2021-06-18 20:01:01 | INFO | train_inner | epoch 001: 443 / 3002 loss=3.25, ppl=9.51, wps=5860, ups=0.09, wpb=64863, bsz=128, num_updates=435, lr=4.35e-05, gnorm=2.924, loss_scale=2, train_wall=11, gb_free=2.8, wall=4935
2021-06-18 20:01:12 | INFO | train_inner | epoch 001: 444 / 3002 loss=3.31, ppl=9.92, wps=5906.8, ups=0.09, wpb=64859, bsz=128, num_updates=436, lr=4.36e-05, gnorm=2.711, loss_scale=2, train_wall=11, gb_free=2.8, wall=4946
2021-06-18 20:01:23 | INFO | train_inner | epoch 001: 445 / 3002 loss=3.192, ppl=9.14, wps=5815.3, ups=0.09, wpb=64839, bsz=128, num_updates=437, lr=4.37e-05, gnorm=3, loss_scale=2, train_wall=11, gb_free=2.8, wall=4957
2021-06-18 20:01:34 | INFO | train_inner | epoch 001: 446 / 3002 loss=3.417, ppl=10.68, wps=5803.2, ups=0.09, wpb=64807, bsz=128, num_updates=438, lr=4.38e-05, gnorm=2.816, loss_scale=2, train_wall=11, gb_free=2.8, wall=4968
2021-06-18 20:01:45 | INFO | train_inner | epoch 001: 447 / 3002 loss=3.457, ppl=10.98, wps=5792.7, ups=0.09, wpb=64849, bsz=128, num_updates=439, lr=4.39e-05, gnorm=2.925, loss_scale=2, train_wall=11, gb_free=2.8, wall=4980
2021-06-18 20:01:56 | INFO | train_inner | epoch 001: 448 / 3002 loss=3.404, ppl=10.59, wps=5816, ups=0.09, wpb=64821, bsz=128, num_updates=440, lr=4.4e-05, gnorm=2.938, loss_scale=2, train_wall=11, gb_free=2.8, wall=4991
2021-06-18 20:02:08 | INFO | train_inner | epoch 001: 449 / 3002 loss=3.366, ppl=10.31, wps=5834.8, ups=0.09, wpb=64825, bsz=128, num_updates=441, lr=4.41e-05, gnorm=2.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=5002
2021-06-18 20:02:19 | INFO | train_inner | epoch 001: 450 / 3002 loss=3.302, ppl=9.86, wps=5830.5, ups=0.09, wpb=64764, bsz=128, num_updates=442, lr=4.42e-05, gnorm=2.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=5013
2021-06-18 20:02:30 | INFO | train_inner | epoch 001: 451 / 3002 loss=3.275, ppl=9.68, wps=5814.3, ups=0.09, wpb=64803, bsz=128, num_updates=443, lr=4.43e-05, gnorm=2.877, loss_scale=2, train_wall=11, gb_free=2.8, wall=5024
2021-06-18 20:02:41 | INFO | train_inner | epoch 001: 452 / 3002 loss=3.199, ppl=9.18, wps=5854.4, ups=0.09, wpb=64795, bsz=128, num_updates=444, lr=4.44e-05, gnorm=2.821, loss_scale=2, train_wall=11, gb_free=2.8, wall=5035
2021-06-18 20:02:52 | INFO | train_inner | epoch 001: 453 / 3002 loss=3.395, ppl=10.52, wps=5784.1, ups=0.09, wpb=64840, bsz=128, num_updates=445, lr=4.45e-05, gnorm=3.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=5046
2021-06-18 20:03:03 | INFO | train_inner | epoch 001: 454 / 3002 loss=3.329, ppl=10.05, wps=5825.5, ups=0.09, wpb=64839, bsz=128, num_updates=446, lr=4.46e-05, gnorm=2.955, loss_scale=2, train_wall=11, gb_free=2.8, wall=5058
2021-06-18 20:03:14 | INFO | train_inner | epoch 001: 455 / 3002 loss=3.284, ppl=9.74, wps=5778.8, ups=0.09, wpb=64774, bsz=128, num_updates=447, lr=4.47e-05, gnorm=2.938, loss_scale=2, train_wall=11, gb_free=2.8, wall=5069
2021-06-18 20:03:26 | INFO | train_inner | epoch 001: 456 / 3002 loss=3.444, ppl=10.88, wps=5787, ups=0.09, wpb=64799, bsz=128, num_updates=448, lr=4.48e-05, gnorm=3.072, loss_scale=2, train_wall=11, gb_free=2.8, wall=5080
2021-06-18 20:03:37 | INFO | train_inner | epoch 001: 457 / 3002 loss=3.307, ppl=9.9, wps=5811.9, ups=0.09, wpb=64787, bsz=128, num_updates=449, lr=4.49e-05, gnorm=3.12, loss_scale=2, train_wall=11, gb_free=2.8, wall=5091
2021-06-18 20:03:48 | INFO | train_inner | epoch 001: 458 / 3002 loss=3.266, ppl=9.62, wps=5949.4, ups=0.09, wpb=64879, bsz=128, num_updates=450, lr=4.5e-05, gnorm=3.118, loss_scale=2, train_wall=10, gb_free=2.8, wall=5102
2021-06-18 20:03:59 | INFO | train_inner | epoch 001: 459 / 3002 loss=3.315, ppl=9.95, wps=5925.4, ups=0.09, wpb=64932, bsz=128, num_updates=451, lr=4.51e-05, gnorm=3.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=5113
2021-06-18 20:04:10 | INFO | train_inner | epoch 001: 460 / 3002 loss=3.308, ppl=9.91, wps=5820.5, ups=0.09, wpb=64851, bsz=128, num_updates=452, lr=4.52e-05, gnorm=3.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=5124
2021-06-18 20:04:21 | INFO | train_inner | epoch 001: 461 / 3002 loss=3.32, ppl=9.99, wps=5825.5, ups=0.09, wpb=64845, bsz=128, num_updates=453, lr=4.53e-05, gnorm=2.929, loss_scale=2, train_wall=11, gb_free=2.8, wall=5135
2021-06-18 20:04:32 | INFO | train_inner | epoch 001: 462 / 3002 loss=3.192, ppl=9.14, wps=5825, ups=0.09, wpb=64895, bsz=128, num_updates=454, lr=4.54e-05, gnorm=3.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=5146
2021-06-18 20:04:43 | INFO | train_inner | epoch 001: 463 / 3002 loss=3.208, ppl=9.24, wps=5711.9, ups=0.09, wpb=64802, bsz=128, num_updates=455, lr=4.55e-05, gnorm=3.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=5158
2021-06-18 20:04:54 | INFO | train_inner | epoch 001: 464 / 3002 loss=3.132, ppl=8.77, wps=5886.8, ups=0.09, wpb=64881, bsz=128, num_updates=456, lr=4.56e-05, gnorm=2.898, loss_scale=2, train_wall=11, gb_free=2.8, wall=5169
2021-06-18 20:05:05 | INFO | train_inner | epoch 001: 465 / 3002 loss=3.319, ppl=9.98, wps=5908, ups=0.09, wpb=64811, bsz=128, num_updates=457, lr=4.57e-05, gnorm=3.152, loss_scale=2, train_wall=11, gb_free=2.8, wall=5180
2021-06-18 20:05:17 | INFO | train_inner | epoch 001: 466 / 3002 loss=3.156, ppl=8.91, wps=5786.1, ups=0.09, wpb=64767, bsz=128, num_updates=458, lr=4.58e-05, gnorm=3.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=5191
2021-06-18 20:05:28 | INFO | train_inner | epoch 001: 467 / 3002 loss=3.236, ppl=9.42, wps=5903.1, ups=0.09, wpb=64872, bsz=128, num_updates=459, lr=4.59e-05, gnorm=3.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=5202
2021-06-18 20:05:39 | INFO | train_inner | epoch 001: 468 / 3002 loss=3.192, ppl=9.14, wps=5923.3, ups=0.09, wpb=64797, bsz=128, num_updates=460, lr=4.6e-05, gnorm=2.894, loss_scale=2, train_wall=10, gb_free=2.8, wall=5213
2021-06-18 20:05:50 | INFO | train_inner | epoch 001: 469 / 3002 loss=3.296, ppl=9.82, wps=5812.4, ups=0.09, wpb=64786, bsz=128, num_updates=461, lr=4.61e-05, gnorm=3.036, loss_scale=2, train_wall=11, gb_free=2.8, wall=5224
2021-06-18 20:06:01 | INFO | train_inner | epoch 001: 470 / 3002 loss=3.201, ppl=9.19, wps=5851.4, ups=0.09, wpb=64832, bsz=128, num_updates=462, lr=4.62e-05, gnorm=2.849, loss_scale=2, train_wall=11, gb_free=2.8, wall=5235
2021-06-18 20:06:12 | INFO | train_inner | epoch 001: 471 / 3002 loss=3.309, ppl=9.91, wps=5865.4, ups=0.09, wpb=64786, bsz=128, num_updates=463, lr=4.63e-05, gnorm=3.089, loss_scale=2, train_wall=11, gb_free=2.8, wall=5246
2021-06-18 20:06:23 | INFO | train_inner | epoch 001: 472 / 3002 loss=3.166, ppl=8.97, wps=5888, ups=0.09, wpb=64825, bsz=128, num_updates=464, lr=4.64e-05, gnorm=2.907, loss_scale=2, train_wall=11, gb_free=2.8, wall=5257
2021-06-18 20:06:34 | INFO | train_inner | epoch 001: 473 / 3002 loss=3.186, ppl=9.1, wps=5821.9, ups=0.09, wpb=64772, bsz=128, num_updates=465, lr=4.65e-05, gnorm=2.766, loss_scale=2, train_wall=11, gb_free=2.8, wall=5268
2021-06-18 20:06:45 | INFO | train_inner | epoch 001: 474 / 3002 loss=3.178, ppl=9.05, wps=5932.3, ups=0.09, wpb=64808, bsz=128, num_updates=466, lr=4.66e-05, gnorm=2.954, loss_scale=2, train_wall=10, gb_free=2.8, wall=5279
2021-06-18 20:06:56 | INFO | train_inner | epoch 001: 475 / 3002 loss=3.226, ppl=9.35, wps=5855.5, ups=0.09, wpb=64842, bsz=128, num_updates=467, lr=4.67e-05, gnorm=3.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=5290
2021-06-18 20:07:07 | INFO | train_inner | epoch 001: 476 / 3002 loss=3.29, ppl=9.78, wps=5853.5, ups=0.09, wpb=64876, bsz=128, num_updates=468, lr=4.68e-05, gnorm=2.888, loss_scale=2, train_wall=11, gb_free=2.8, wall=5301
2021-06-18 20:07:18 | INFO | train_inner | epoch 001: 477 / 3002 loss=3.212, ppl=9.27, wps=5824.2, ups=0.09, wpb=64887, bsz=128, num_updates=469, lr=4.69e-05, gnorm=3.471, loss_scale=2, train_wall=11, gb_free=2.8, wall=5312
2021-06-18 20:07:29 | INFO | train_inner | epoch 001: 478 / 3002 loss=3.24, ppl=9.45, wps=5853.6, ups=0.09, wpb=64806, bsz=128, num_updates=470, lr=4.7e-05, gnorm=2.742, loss_scale=2, train_wall=11, gb_free=2.8, wall=5324
2021-06-18 20:07:40 | INFO | train_inner | epoch 001: 479 / 3002 loss=3.097, ppl=8.55, wps=5840.7, ups=0.09, wpb=64836, bsz=128, num_updates=471, lr=4.71e-05, gnorm=2.789, loss_scale=2, train_wall=11, gb_free=2.8, wall=5335
2021-06-18 20:07:52 | INFO | train_inner | epoch 001: 480 / 3002 loss=3.233, ppl=9.4, wps=5755, ups=0.09, wpb=64786, bsz=128, num_updates=472, lr=4.72e-05, gnorm=2.778, loss_scale=2, train_wall=11, gb_free=2.8, wall=5346
2021-06-18 20:08:03 | INFO | train_inner | epoch 001: 481 / 3002 loss=3.165, ppl=8.97, wps=5926.1, ups=0.09, wpb=64801, bsz=128, num_updates=473, lr=4.73e-05, gnorm=3.068, loss_scale=2, train_wall=10, gb_free=2.8, wall=5357
2021-06-18 20:08:13 | INFO | train_inner | epoch 001: 482 / 3002 loss=3.242, ppl=9.46, wps=5922.9, ups=0.09, wpb=64901, bsz=128, num_updates=474, lr=4.74e-05, gnorm=3.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=5368
2021-06-18 20:08:25 | INFO | train_inner | epoch 001: 483 / 3002 loss=3.005, ppl=8.03, wps=5816.4, ups=0.09, wpb=64867, bsz=128, num_updates=475, lr=4.75e-05, gnorm=3.709, loss_scale=2, train_wall=11, gb_free=2.8, wall=5379
2021-06-18 20:08:36 | INFO | train_inner | epoch 001: 484 / 3002 loss=3.082, ppl=8.47, wps=5724.3, ups=0.09, wpb=64864, bsz=128, num_updates=476, lr=4.76e-05, gnorm=3.071, loss_scale=2, train_wall=11, gb_free=2.8, wall=5390
2021-06-18 20:08:47 | INFO | train_inner | epoch 001: 485 / 3002 loss=3.1, ppl=8.57, wps=5877.2, ups=0.09, wpb=64819, bsz=128, num_updates=477, lr=4.77e-05, gnorm=2.825, loss_scale=2, train_wall=11, gb_free=2.8, wall=5401
2021-06-18 20:08:58 | INFO | train_inner | epoch 001: 486 / 3002 loss=3.064, ppl=8.36, wps=5769.2, ups=0.09, wpb=64898, bsz=128, num_updates=478, lr=4.78e-05, gnorm=2.797, loss_scale=2, train_wall=11, gb_free=2.8, wall=5413
2021-06-18 20:09:09 | INFO | train_inner | epoch 001: 487 / 3002 loss=3.192, ppl=9.14, wps=5894.4, ups=0.09, wpb=64779, bsz=128, num_updates=479, lr=4.79e-05, gnorm=14.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=5424
2021-06-18 20:09:20 | INFO | train_inner | epoch 001: 488 / 3002 loss=3.161, ppl=8.94, wps=5881.6, ups=0.09, wpb=64822, bsz=128, num_updates=480, lr=4.8e-05, gnorm=2.883, loss_scale=2, train_wall=11, gb_free=2.8, wall=5435
2021-06-18 20:09:31 | INFO | train_inner | epoch 001: 489 / 3002 loss=3.041, ppl=8.23, wps=5859.4, ups=0.09, wpb=64873, bsz=128, num_updates=481, lr=4.81e-05, gnorm=2.759, loss_scale=2, train_wall=11, gb_free=2.8, wall=5446
2021-06-18 20:09:42 | INFO | train_inner | epoch 001: 490 / 3002 loss=3.448, ppl=10.91, wps=5897.8, ups=0.09, wpb=64814, bsz=128, num_updates=482, lr=4.82e-05, gnorm=3.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=5457
2021-06-18 20:09:53 | INFO | train_inner | epoch 001: 491 / 3002 loss=3.139, ppl=8.81, wps=5911.7, ups=0.09, wpb=64878, bsz=128, num_updates=483, lr=4.83e-05, gnorm=3.61, loss_scale=2, train_wall=11, gb_free=2.8, wall=5468
2021-06-18 20:10:04 | INFO | train_inner | epoch 001: 492 / 3002 loss=3.334, ppl=10.08, wps=5887.3, ups=0.09, wpb=64771, bsz=128, num_updates=484, lr=4.84e-05, gnorm=2.881, loss_scale=2, train_wall=11, gb_free=2.8, wall=5479
2021-06-18 20:10:15 | INFO | train_inner | epoch 001: 493 / 3002 loss=3.175, ppl=9.03, wps=5849, ups=0.09, wpb=64829, bsz=128, num_updates=485, lr=4.85e-05, gnorm=2.883, loss_scale=2, train_wall=11, gb_free=2.8, wall=5490
2021-06-18 20:10:27 | INFO | train_inner | epoch 001: 494 / 3002 loss=3.232, ppl=9.4, wps=5706.9, ups=0.09, wpb=64922, bsz=128, num_updates=486, lr=4.86e-05, gnorm=2.913, loss_scale=2, train_wall=11, gb_free=2.8, wall=5501
2021-06-18 20:10:38 | INFO | train_inner | epoch 001: 495 / 3002 loss=3.267, ppl=9.63, wps=5801, ups=0.09, wpb=64844, bsz=128, num_updates=487, lr=4.87e-05, gnorm=8.15, loss_scale=2, train_wall=11, gb_free=2.8, wall=5512
2021-06-18 20:10:49 | INFO | train_inner | epoch 001: 496 / 3002 loss=3.249, ppl=9.51, wps=5896.8, ups=0.09, wpb=64769, bsz=128, num_updates=488, lr=4.88e-05, gnorm=2.819, loss_scale=2, train_wall=11, gb_free=2.8, wall=5523
2021-06-18 20:11:00 | INFO | train_inner | epoch 001: 497 / 3002 loss=3.182, ppl=9.08, wps=5969.9, ups=0.09, wpb=64927, bsz=128, num_updates=489, lr=4.89e-05, gnorm=3.124, loss_scale=2, train_wall=10, gb_free=2.8, wall=5534
2021-06-18 20:11:11 | INFO | train_inner | epoch 001: 498 / 3002 loss=3.037, ppl=8.21, wps=5928.4, ups=0.09, wpb=64880, bsz=128, num_updates=490, lr=4.9e-05, gnorm=2.945, loss_scale=2, train_wall=10, gb_free=2.8, wall=5545
2021-06-18 20:11:22 | INFO | train_inner | epoch 001: 499 / 3002 loss=3.218, ppl=9.31, wps=5858, ups=0.09, wpb=64906, bsz=128, num_updates=491, lr=4.91e-05, gnorm=3.344, loss_scale=2, train_wall=11, gb_free=2.8, wall=5556
2021-06-18 20:11:33 | INFO | train_inner | epoch 001: 500 / 3002 loss=3.314, ppl=9.95, wps=5884.2, ups=0.09, wpb=64816, bsz=128, num_updates=492, lr=4.92e-05, gnorm=2.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=5567
2021-06-18 20:11:44 | INFO | train_inner | epoch 001: 501 / 3002 loss=3.322, ppl=10, wps=5862.9, ups=0.09, wpb=64727, bsz=128, num_updates=493, lr=4.93e-05, gnorm=2.815, loss_scale=2, train_wall=11, gb_free=2.8, wall=5578
2021-06-18 20:11:55 | INFO | train_inner | epoch 001: 502 / 3002 loss=3.175, ppl=9.03, wps=5768.8, ups=0.09, wpb=64788, bsz=128, num_updates=494, lr=4.94e-05, gnorm=2.859, loss_scale=2, train_wall=11, gb_free=2.8, wall=5589
2021-06-18 20:12:06 | INFO | train_inner | epoch 001: 503 / 3002 loss=3.417, ppl=10.68, wps=5832.7, ups=0.09, wpb=64782, bsz=128, num_updates=495, lr=4.95e-05, gnorm=2.981, loss_scale=2, train_wall=11, gb_free=2.8, wall=5601
2021-06-18 20:12:17 | INFO | train_inner | epoch 001: 504 / 3002 loss=3.31, ppl=9.92, wps=5781.5, ups=0.09, wpb=64912, bsz=128, num_updates=496, lr=4.96e-05, gnorm=2.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=5612
2021-06-18 20:12:29 | INFO | train_inner | epoch 001: 505 / 3002 loss=3.175, ppl=9.03, wps=5809.5, ups=0.09, wpb=64912, bsz=128, num_updates=497, lr=4.97e-05, gnorm=2.882, loss_scale=2, train_wall=11, gb_free=2.8, wall=5623
2021-06-18 20:12:40 | INFO | train_inner | epoch 001: 506 / 3002 loss=3.112, ppl=8.64, wps=5816.8, ups=0.09, wpb=64838, bsz=128, num_updates=498, lr=4.98e-05, gnorm=2.81, loss_scale=2, train_wall=11, gb_free=2.8, wall=5634
2021-06-18 20:12:51 | INFO | train_inner | epoch 001: 507 / 3002 loss=3.276, ppl=9.68, wps=5758.9, ups=0.09, wpb=64780, bsz=128, num_updates=499, lr=4.99e-05, gnorm=2.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=5645
2021-06-18 20:13:02 | INFO | train_inner | epoch 001: 508 / 3002 loss=3.179, ppl=9.05, wps=5882.4, ups=0.09, wpb=64816, bsz=128, num_updates=500, lr=5e-05, gnorm=2.835, loss_scale=2, train_wall=11, gb_free=2.8, wall=5656
2021-06-18 20:13:13 | INFO | train_inner | epoch 001: 509 / 3002 loss=3.221, ppl=9.33, wps=5791.5, ups=0.09, wpb=64803, bsz=128, num_updates=501, lr=5.01e-05, gnorm=3.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=5668
2021-06-18 20:13:24 | INFO | train_inner | epoch 001: 510 / 3002 loss=3.156, ppl=8.91, wps=5900.4, ups=0.09, wpb=64827, bsz=128, num_updates=502, lr=5.02e-05, gnorm=2.705, loss_scale=2, train_wall=11, gb_free=2.8, wall=5679
2021-06-18 20:13:35 | INFO | train_inner | epoch 001: 511 / 3002 loss=3.249, ppl=9.51, wps=5809.4, ups=0.09, wpb=64747, bsz=128, num_updates=503, lr=5.03e-05, gnorm=14.574, loss_scale=2, train_wall=11, gb_free=2.8, wall=5690
2021-06-18 20:13:46 | INFO | train_inner | epoch 001: 512 / 3002 loss=3.316, ppl=9.96, wps=5856.9, ups=0.09, wpb=64841, bsz=128, num_updates=504, lr=5.04e-05, gnorm=2.853, loss_scale=2, train_wall=11, gb_free=2.8, wall=5701
2021-06-18 20:13:58 | INFO | train_inner | epoch 001: 513 / 3002 loss=3.406, ppl=10.6, wps=5809.8, ups=0.09, wpb=64850, bsz=128, num_updates=505, lr=5.05e-05, gnorm=3.878, loss_scale=2, train_wall=11, gb_free=2.8, wall=5712
2021-06-18 20:14:09 | INFO | train_inner | epoch 001: 514 / 3002 loss=3.394, ppl=10.51, wps=5816.2, ups=0.09, wpb=64788, bsz=128, num_updates=506, lr=5.06e-05, gnorm=8.85, loss_scale=2, train_wall=11, gb_free=2.8, wall=5723
2021-06-18 20:14:20 | INFO | train_inner | epoch 001: 515 / 3002 loss=3.115, ppl=8.67, wps=5872.2, ups=0.09, wpb=64827, bsz=128, num_updates=507, lr=5.07e-05, gnorm=3.178, loss_scale=2, train_wall=11, gb_free=2.8, wall=5734
2021-06-18 20:14:31 | INFO | train_inner | epoch 001: 516 / 3002 loss=3.193, ppl=9.15, wps=5918.9, ups=0.09, wpb=64853, bsz=128, num_updates=508, lr=5.08e-05, gnorm=2.97, loss_scale=2, train_wall=10, gb_free=2.8, wall=5745
2021-06-18 20:14:42 | INFO | train_inner | epoch 001: 517 / 3002 loss=3.267, ppl=9.63, wps=5901.5, ups=0.09, wpb=64892, bsz=128, num_updates=509, lr=5.09e-05, gnorm=3.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=5756
2021-06-18 20:14:53 | INFO | train_inner | epoch 001: 518 / 3002 loss=3.277, ppl=9.7, wps=5751.3, ups=0.09, wpb=64813, bsz=128, num_updates=510, lr=5.1e-05, gnorm=2.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=5767
2021-06-18 20:15:04 | INFO | train_inner | epoch 001: 519 / 3002 loss=3.232, ppl=9.39, wps=5758.7, ups=0.09, wpb=64781, bsz=128, num_updates=511, lr=5.11e-05, gnorm=3.077, loss_scale=2, train_wall=11, gb_free=2.8, wall=5779
2021-06-18 20:15:15 | INFO | train_inner | epoch 001: 520 / 3002 loss=3.053, ppl=8.3, wps=5998.9, ups=0.09, wpb=64870, bsz=128, num_updates=512, lr=5.12e-05, gnorm=16.632, loss_scale=2, train_wall=10, gb_free=2.8, wall=5789
2021-06-18 20:15:26 | INFO | train_inner | epoch 001: 521 / 3002 loss=3.146, ppl=8.85, wps=5830.4, ups=0.09, wpb=64898, bsz=128, num_updates=513, lr=5.13e-05, gnorm=2.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=5801
2021-06-18 20:15:37 | INFO | train_inner | epoch 001: 522 / 3002 loss=3.294, ppl=9.81, wps=5809.8, ups=0.09, wpb=64858, bsz=128, num_updates=514, lr=5.14e-05, gnorm=3.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=5812
2021-06-18 20:15:48 | INFO | train_inner | epoch 001: 523 / 3002 loss=3.316, ppl=9.96, wps=5861.5, ups=0.09, wpb=64844, bsz=128, num_updates=515, lr=5.15e-05, gnorm=3.289, loss_scale=2, train_wall=11, gb_free=2.8, wall=5823
2021-06-18 20:16:00 | INFO | train_inner | epoch 001: 524 / 3002 loss=3.493, ppl=11.26, wps=5762.6, ups=0.09, wpb=64878, bsz=128, num_updates=516, lr=5.16e-05, gnorm=3.441, loss_scale=2, train_wall=11, gb_free=2.8, wall=5834
2021-06-18 20:16:11 | INFO | train_inner | epoch 001: 525 / 3002 loss=3.187, ppl=9.1, wps=5843.5, ups=0.09, wpb=64792, bsz=128, num_updates=517, lr=5.17e-05, gnorm=3.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=5845
2021-06-18 20:16:22 | INFO | train_inner | epoch 001: 526 / 3002 loss=3.269, ppl=9.64, wps=5908.8, ups=0.09, wpb=64861, bsz=128, num_updates=518, lr=5.18e-05, gnorm=3.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=5856
2021-06-18 20:16:33 | INFO | train_inner | epoch 001: 527 / 3002 loss=3.109, ppl=8.63, wps=5904.2, ups=0.09, wpb=64772, bsz=128, num_updates=519, lr=5.19e-05, gnorm=3.609, loss_scale=2, train_wall=11, gb_free=2.8, wall=5867
2021-06-18 20:16:44 | INFO | train_inner | epoch 001: 528 / 3002 loss=3.188, ppl=9.11, wps=5886.5, ups=0.09, wpb=64797, bsz=128, num_updates=520, lr=5.2e-05, gnorm=2.875, loss_scale=2, train_wall=11, gb_free=2.8, wall=5878
2021-06-18 20:16:55 | INFO | train_inner | epoch 001: 529 / 3002 loss=3.187, ppl=9.11, wps=5761.1, ups=0.09, wpb=64822, bsz=128, num_updates=521, lr=5.21e-05, gnorm=4.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=5889
2021-06-18 20:17:06 | INFO | train_inner | epoch 001: 530 / 3002 loss=3.25, ppl=9.52, wps=6036.9, ups=0.09, wpb=64859, bsz=128, num_updates=522, lr=5.22e-05, gnorm=2.886, loss_scale=2, train_wall=10, gb_free=2.8, wall=5900
2021-06-18 20:17:17 | INFO | train_inner | epoch 001: 531 / 3002 loss=3.153, ppl=8.9, wps=5837, ups=0.09, wpb=64819, bsz=128, num_updates=523, lr=5.23e-05, gnorm=3.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=5911
2021-06-18 20:17:28 | INFO | train_inner | epoch 001: 532 / 3002 loss=3.376, ppl=10.38, wps=5807.3, ups=0.09, wpb=64876, bsz=128, num_updates=524, lr=5.24e-05, gnorm=3.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=5922
2021-06-18 20:17:39 | INFO | train_inner | epoch 001: 533 / 3002 loss=3.303, ppl=9.87, wps=5857.7, ups=0.09, wpb=64775, bsz=128, num_updates=525, lr=5.25e-05, gnorm=3.166, loss_scale=2, train_wall=11, gb_free=2.8, wall=5933
2021-06-18 20:17:50 | INFO | train_inner | epoch 001: 534 / 3002 loss=3.157, ppl=8.92, wps=5947.9, ups=0.09, wpb=64870, bsz=128, num_updates=526, lr=5.26e-05, gnorm=2.896, loss_scale=2, train_wall=10, gb_free=2.8, wall=5944
2021-06-18 20:18:01 | INFO | train_inner | epoch 001: 535 / 3002 loss=3.419, ppl=10.7, wps=5901.9, ups=0.09, wpb=64855, bsz=128, num_updates=527, lr=5.27e-05, gnorm=3.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=5955
2021-06-18 20:18:12 | INFO | train_inner | epoch 001: 536 / 3002 loss=3.17, ppl=9, wps=5728.1, ups=0.09, wpb=64854, bsz=128, num_updates=528, lr=5.28e-05, gnorm=3.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=5967
2021-06-18 20:18:23 | INFO | train_inner | epoch 001: 537 / 3002 loss=3.263, ppl=9.6, wps=5888.2, ups=0.09, wpb=64819, bsz=128, num_updates=529, lr=5.29e-05, gnorm=3.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=5978
2021-06-18 20:18:34 | INFO | train_inner | epoch 001: 538 / 3002 loss=3.347, ppl=10.17, wps=5877.1, ups=0.09, wpb=64814, bsz=128, num_updates=530, lr=5.3e-05, gnorm=3.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=5989
2021-06-18 20:18:45 | INFO | train_inner | epoch 001: 539 / 3002 loss=3.065, ppl=8.37, wps=5886.4, ups=0.09, wpb=64778, bsz=128, num_updates=531, lr=5.31e-05, gnorm=2.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=6000
2021-06-18 20:18:56 | INFO | train_inner | epoch 001: 540 / 3002 loss=3.235, ppl=9.42, wps=5854.9, ups=0.09, wpb=64732, bsz=128, num_updates=532, lr=5.32e-05, gnorm=3.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=6011
2021-06-18 20:19:07 | INFO | train_inner | epoch 001: 541 / 3002 loss=3.025, ppl=8.14, wps=5828.5, ups=0.09, wpb=64856, bsz=128, num_updates=533, lr=5.33e-05, gnorm=10.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=6022
2021-06-18 20:19:18 | INFO | train_inner | epoch 001: 542 / 3002 loss=2.996, ppl=7.98, wps=5967.4, ups=0.09, wpb=64890, bsz=128, num_updates=534, lr=5.34e-05, gnorm=7.986, loss_scale=2, train_wall=10, gb_free=2.8, wall=6033
2021-06-18 20:19:29 | INFO | train_inner | epoch 001: 543 / 3002 loss=3.208, ppl=9.24, wps=5945.8, ups=0.09, wpb=64790, bsz=128, num_updates=535, lr=5.35e-05, gnorm=2.896, loss_scale=2, train_wall=10, gb_free=2.8, wall=6044
2021-06-18 20:19:40 | INFO | train_inner | epoch 001: 544 / 3002 loss=3.387, ppl=10.46, wps=5922.3, ups=0.09, wpb=64819, bsz=128, num_updates=536, lr=5.36e-05, gnorm=3.156, loss_scale=2, train_wall=10, gb_free=2.8, wall=6055
2021-06-18 20:19:51 | INFO | train_inner | epoch 001: 545 / 3002 loss=3.398, ppl=10.54, wps=5900.8, ups=0.09, wpb=64813, bsz=128, num_updates=537, lr=5.37e-05, gnorm=3.065, loss_scale=2, train_wall=11, gb_free=2.8, wall=6066
2021-06-18 20:20:02 | INFO | train_inner | epoch 001: 546 / 3002 loss=3.294, ppl=9.81, wps=6007.1, ups=0.09, wpb=64897, bsz=128, num_updates=538, lr=5.38e-05, gnorm=3.037, loss_scale=2, train_wall=10, gb_free=2.8, wall=6076
2021-06-18 20:20:13 | INFO | train_inner | epoch 001: 547 / 3002 loss=3.27, ppl=9.64, wps=5869, ups=0.09, wpb=64846, bsz=128, num_updates=539, lr=5.39e-05, gnorm=14.902, loss_scale=2, train_wall=11, gb_free=2.8, wall=6087
2021-06-18 20:20:24 | INFO | train_inner | epoch 001: 548 / 3002 loss=3.301, ppl=9.86, wps=5767, ups=0.09, wpb=64825, bsz=128, num_updates=540, lr=5.4e-05, gnorm=3.099, loss_scale=2, train_wall=11, gb_free=2.8, wall=6099
2021-06-18 20:20:35 | INFO | train_inner | epoch 001: 549 / 3002 loss=3.034, ppl=8.19, wps=5927.1, ups=0.09, wpb=64844, bsz=128, num_updates=541, lr=5.41e-05, gnorm=5.075, loss_scale=2, train_wall=10, gb_free=2.8, wall=6110
2021-06-18 20:20:46 | INFO | train_inner | epoch 001: 550 / 3002 loss=3.223, ppl=9.34, wps=5915.9, ups=0.09, wpb=64870, bsz=128, num_updates=542, lr=5.42e-05, gnorm=3.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=6121
2021-06-18 20:20:57 | INFO | train_inner | epoch 001: 551 / 3002 loss=3.377, ppl=10.39, wps=5839, ups=0.09, wpb=64756, bsz=128, num_updates=543, lr=5.43e-05, gnorm=2.929, loss_scale=2, train_wall=11, gb_free=2.8, wall=6132
2021-06-18 20:21:09 | INFO | train_inner | epoch 001: 552 / 3002 loss=3.188, ppl=9.11, wps=5773, ups=0.09, wpb=64778, bsz=128, num_updates=544, lr=5.44e-05, gnorm=3.057, loss_scale=2, train_wall=11, gb_free=2.8, wall=6143
2021-06-18 20:21:20 | INFO | train_inner | epoch 001: 553 / 3002 loss=3.217, ppl=9.3, wps=5765.1, ups=0.09, wpb=64838, bsz=128, num_updates=545, lr=5.45e-05, gnorm=3.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=6154
2021-06-18 20:21:31 | INFO | train_inner | epoch 001: 554 / 3002 loss=3.288, ppl=9.77, wps=5802.9, ups=0.09, wpb=64849, bsz=128, num_updates=546, lr=5.46e-05, gnorm=3.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=6165
2021-06-18 20:21:42 | INFO | train_inner | epoch 001: 555 / 3002 loss=3.157, ppl=8.92, wps=5840.7, ups=0.09, wpb=64820, bsz=128, num_updates=547, lr=5.47e-05, gnorm=3.442, loss_scale=4, train_wall=11, gb_free=2.8, wall=6176
2021-06-18 20:21:53 | INFO | train_inner | epoch 001: 556 / 3002 loss=3.136, ppl=8.79, wps=5838.9, ups=0.09, wpb=64771, bsz=128, num_updates=548, lr=5.48e-05, gnorm=2.907, loss_scale=4, train_wall=11, gb_free=2.8, wall=6187
2021-06-18 20:22:04 | INFO | train_inner | epoch 001: 557 / 3002 loss=3.162, ppl=8.95, wps=5854.7, ups=0.09, wpb=64817, bsz=128, num_updates=549, lr=5.49e-05, gnorm=3.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=6199
2021-06-18 20:22:15 | INFO | train_inner | epoch 001: 558 / 3002 loss=3.261, ppl=9.58, wps=5940.7, ups=0.09, wpb=64868, bsz=128, num_updates=550, lr=5.5e-05, gnorm=6.265, loss_scale=4, train_wall=10, gb_free=2.8, wall=6209
2021-06-18 20:22:26 | INFO | train_inner | epoch 001: 559 / 3002 loss=3.227, ppl=9.36, wps=5943.1, ups=0.09, wpb=64861, bsz=128, num_updates=551, lr=5.51e-05, gnorm=3.628, loss_scale=4, train_wall=10, gb_free=2.8, wall=6220
2021-06-18 20:22:37 | INFO | train_inner | epoch 001: 560 / 3002 loss=3.071, ppl=8.41, wps=5878.5, ups=0.09, wpb=64846, bsz=128, num_updates=552, lr=5.52e-05, gnorm=2.761, loss_scale=4, train_wall=11, gb_free=2.8, wall=6231
2021-06-18 20:22:48 | INFO | train_inner | epoch 001: 561 / 3002 loss=3.243, ppl=9.46, wps=5922.6, ups=0.09, wpb=64899, bsz=128, num_updates=553, lr=5.53e-05, gnorm=8.329, loss_scale=4, train_wall=10, gb_free=2.8, wall=6242
2021-06-18 20:22:59 | INFO | train_inner | epoch 001: 562 / 3002 loss=3.151, ppl=8.88, wps=5865.3, ups=0.09, wpb=64865, bsz=128, num_updates=554, lr=5.54e-05, gnorm=2.81, loss_scale=4, train_wall=11, gb_free=2.8, wall=6253
2021-06-18 20:23:10 | INFO | train_inner | epoch 001: 563 / 3002 loss=3.375, ppl=10.38, wps=6051.9, ups=0.09, wpb=64839, bsz=128, num_updates=555, lr=5.55e-05, gnorm=2.838, loss_scale=4, train_wall=10, gb_free=2.8, wall=6264
2021-06-18 20:23:21 | INFO | train_inner | epoch 001: 564 / 3002 loss=3.358, ppl=10.25, wps=5812, ups=0.09, wpb=64812, bsz=128, num_updates=556, lr=5.56e-05, gnorm=3.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=6275
2021-06-18 20:23:32 | INFO | train_inner | epoch 001: 565 / 3002 loss=3.439, ppl=10.84, wps=5804.3, ups=0.09, wpb=64792, bsz=128, num_updates=557, lr=5.57e-05, gnorm=3.908, loss_scale=4, train_wall=11, gb_free=2.8, wall=6286
2021-06-18 20:23:43 | INFO | train_inner | epoch 001: 566 / 3002 loss=3.249, ppl=9.51, wps=5882.8, ups=0.09, wpb=64826, bsz=128, num_updates=558, lr=5.58e-05, gnorm=3.835, loss_scale=4, train_wall=11, gb_free=2.8, wall=6297
2021-06-18 20:23:54 | INFO | train_inner | epoch 001: 567 / 3002 loss=3.294, ppl=9.81, wps=6011.5, ups=0.09, wpb=64950, bsz=128, num_updates=559, lr=5.59e-05, gnorm=2.798, loss_scale=4, train_wall=10, gb_free=2.8, wall=6308
2021-06-18 20:24:05 | INFO | train_inner | epoch 001: 568 / 3002 loss=3.177, ppl=9.04, wps=5771, ups=0.09, wpb=64750, bsz=128, num_updates=560, lr=5.6e-05, gnorm=2.867, loss_scale=4, train_wall=11, gb_free=2.8, wall=6319
2021-06-18 20:24:16 | INFO | train_inner | epoch 001: 569 / 3002 loss=3.262, ppl=9.59, wps=5838.2, ups=0.09, wpb=64919, bsz=128, num_updates=561, lr=5.61e-05, gnorm=2.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=6331
2021-06-18 20:24:27 | INFO | train_inner | epoch 001: 570 / 3002 loss=3.061, ppl=8.35, wps=5838, ups=0.09, wpb=64797, bsz=128, num_updates=562, lr=5.62e-05, gnorm=3.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=6342
2021-06-18 20:24:38 | INFO | train_inner | epoch 001: 571 / 3002 loss=3.259, ppl=9.58, wps=5879.3, ups=0.09, wpb=64901, bsz=128, num_updates=563, lr=5.63e-05, gnorm=3.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=6353
2021-06-18 20:24:49 | INFO | train_inner | epoch 001: 572 / 3002 loss=3.112, ppl=8.65, wps=5921.2, ups=0.09, wpb=64794, bsz=128, num_updates=564, lr=5.64e-05, gnorm=2.768, loss_scale=4, train_wall=10, gb_free=2.8, wall=6364
2021-06-18 20:25:00 | INFO | train_inner | epoch 001: 573 / 3002 loss=3.238, ppl=9.44, wps=5953.9, ups=0.09, wpb=64900, bsz=128, num_updates=565, lr=5.65e-05, gnorm=2.937, loss_scale=4, train_wall=10, gb_free=2.8, wall=6375
2021-06-18 20:25:11 | INFO | train_inner | epoch 001: 574 / 3002 loss=3.243, ppl=9.47, wps=5772.2, ups=0.09, wpb=64849, bsz=128, num_updates=566, lr=5.66e-05, gnorm=2.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=6386
2021-06-18 20:25:22 | INFO | train_inner | epoch 001: 575 / 3002 loss=3.413, ppl=10.65, wps=5882.7, ups=0.09, wpb=64779, bsz=128, num_updates=567, lr=5.67e-05, gnorm=2.976, loss_scale=4, train_wall=11, gb_free=2.8, wall=6397
2021-06-18 20:25:33 | INFO | train_inner | epoch 001: 576 / 3002 loss=3.272, ppl=9.66, wps=5963.7, ups=0.09, wpb=64811, bsz=128, num_updates=568, lr=5.68e-05, gnorm=2.837, loss_scale=4, train_wall=10, gb_free=2.8, wall=6408
2021-06-18 20:25:44 | INFO | train_inner | epoch 001: 577 / 3002 loss=3.217, ppl=9.3, wps=5909.3, ups=0.09, wpb=64817, bsz=128, num_updates=569, lr=5.69e-05, gnorm=3.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=6419
2021-06-18 20:25:56 | INFO | train_inner | epoch 001: 578 / 3002 loss=3.044, ppl=8.25, wps=5762, ups=0.09, wpb=64821, bsz=128, num_updates=570, lr=5.7e-05, gnorm=2.857, loss_scale=4, train_wall=11, gb_free=2.8, wall=6430
2021-06-18 20:26:07 | INFO | train_inner | epoch 001: 579 / 3002 loss=3.225, ppl=9.35, wps=5902.5, ups=0.09, wpb=64763, bsz=128, num_updates=571, lr=5.71e-05, gnorm=5.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=6441
2021-06-18 20:26:17 | INFO | train_inner | epoch 001: 580 / 3002 loss=3.294, ppl=9.81, wps=6051, ups=0.09, wpb=64890, bsz=128, num_updates=572, lr=5.72e-05, gnorm=2.871, loss_scale=4, train_wall=10, gb_free=2.8, wall=6452
2021-06-18 20:26:28 | INFO | train_inner | epoch 001: 581 / 3002 loss=3.231, ppl=9.39, wps=5911.5, ups=0.09, wpb=64832, bsz=128, num_updates=573, lr=5.73e-05, gnorm=2.763, loss_scale=4, train_wall=11, gb_free=2.8, wall=6463
2021-06-18 20:26:39 | INFO | train_inner | epoch 001: 582 / 3002 loss=3.197, ppl=9.17, wps=5834.6, ups=0.09, wpb=64865, bsz=128, num_updates=574, lr=5.74e-05, gnorm=2.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=6474
2021-06-18 20:26:51 | INFO | train_inner | epoch 001: 583 / 3002 loss=3.339, ppl=10.12, wps=5748.7, ups=0.09, wpb=64888, bsz=128, num_updates=575, lr=5.75e-05, gnorm=2.713, loss_scale=4, train_wall=11, gb_free=2.8, wall=6485
2021-06-18 20:27:02 | INFO | train_inner | epoch 001: 584 / 3002 loss=3.172, ppl=9.01, wps=5839.6, ups=0.09, wpb=64877, bsz=128, num_updates=576, lr=5.76e-05, gnorm=2.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=6496
2021-06-18 20:27:13 | INFO | train_inner | epoch 001: 585 / 3002 loss=3.207, ppl=9.23, wps=5857.8, ups=0.09, wpb=64884, bsz=128, num_updates=577, lr=5.77e-05, gnorm=3.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=6507
2021-06-18 20:27:24 | INFO | train_inner | epoch 001: 586 / 3002 loss=3.187, ppl=9.11, wps=5915.3, ups=0.09, wpb=64884, bsz=128, num_updates=578, lr=5.78e-05, gnorm=3.26, loss_scale=4, train_wall=10, gb_free=2.8, wall=6518
2021-06-18 20:27:35 | INFO | train_inner | epoch 001: 587 / 3002 loss=3.236, ppl=9.42, wps=5965.1, ups=0.09, wpb=64899, bsz=128, num_updates=579, lr=5.79e-05, gnorm=2.958, loss_scale=4, train_wall=10, gb_free=2.8, wall=6529
2021-06-18 20:27:46 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-18 20:27:57 | INFO | train_inner | epoch 001: 589 / 3002 loss=3.149, ppl=8.87, wps=2919, ups=0.04, wpb=64881, bsz=128, num_updates=580, lr=5.8e-05, gnorm=2.987, loss_scale=2, train_wall=21, gb_free=2.8, wall=6551
2021-06-18 20:28:08 | INFO | train_inner | epoch 001: 590 / 3002 loss=3.171, ppl=9.01, wps=5729.7, ups=0.09, wpb=64868, bsz=128, num_updates=581, lr=5.81e-05, gnorm=2.803, loss_scale=2, train_wall=11, gb_free=2.8, wall=6563
2021-06-18 20:28:19 | INFO | train_inner | epoch 001: 591 / 3002 loss=3.116, ppl=8.67, wps=5985.5, ups=0.09, wpb=64813, bsz=128, num_updates=582, lr=5.82e-05, gnorm=3.839, loss_scale=2, train_wall=10, gb_free=2.8, wall=6573
2021-06-18 20:28:30 | INFO | train_inner | epoch 001: 592 / 3002 loss=3.401, ppl=10.57, wps=5846, ups=0.09, wpb=64806, bsz=128, num_updates=583, lr=5.83e-05, gnorm=3.548, loss_scale=2, train_wall=11, gb_free=2.8, wall=6584
2021-06-18 20:28:41 | INFO | train_inner | epoch 001: 593 / 3002 loss=3.292, ppl=9.8, wps=5832.1, ups=0.09, wpb=64898, bsz=128, num_updates=584, lr=5.84e-05, gnorm=2.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=6596
2021-06-18 20:28:52 | INFO | train_inner | epoch 001: 594 / 3002 loss=3.202, ppl=9.2, wps=5809.9, ups=0.09, wpb=64828, bsz=128, num_updates=585, lr=5.85e-05, gnorm=8.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=6607
2021-06-18 20:29:03 | INFO | train_inner | epoch 001: 595 / 3002 loss=3.373, ppl=10.36, wps=5868, ups=0.09, wpb=64871, bsz=128, num_updates=586, lr=5.86e-05, gnorm=3.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=6618
2021-06-18 20:29:15 | INFO | train_inner | epoch 001: 596 / 3002 loss=3.246, ppl=9.49, wps=5879.4, ups=0.09, wpb=64838, bsz=128, num_updates=587, lr=5.87e-05, gnorm=2.918, loss_scale=2, train_wall=11, gb_free=2.8, wall=6629
2021-06-18 20:29:26 | INFO | train_inner | epoch 001: 597 / 3002 loss=3.314, ppl=9.95, wps=5876.7, ups=0.09, wpb=64844, bsz=128, num_updates=588, lr=5.88e-05, gnorm=2.865, loss_scale=2, train_wall=11, gb_free=2.8, wall=6640
2021-06-18 20:29:37 | INFO | train_inner | epoch 001: 598 / 3002 loss=3.482, ppl=11.17, wps=5744.1, ups=0.09, wpb=64847, bsz=128, num_updates=589, lr=5.89e-05, gnorm=2.981, loss_scale=2, train_wall=11, gb_free=2.8, wall=6651
2021-06-18 20:29:48 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-18 20:29:59 | INFO | train_inner | epoch 001: 600 / 3002 loss=3.201, ppl=9.19, wps=2931.2, ups=0.05, wpb=64767, bsz=128, num_updates=590, lr=5.9e-05, gnorm=3.393, loss_scale=1, train_wall=21, gb_free=2.8, wall=6673
2021-06-18 20:30:10 | INFO | train_inner | epoch 001: 601 / 3002 loss=2.959, ppl=7.78, wps=5843.6, ups=0.09, wpb=64847, bsz=128, num_updates=591, lr=5.91e-05, gnorm=21.991, loss_scale=1, train_wall=11, gb_free=2.8, wall=6684
2021-06-18 20:30:21 | INFO | train_inner | epoch 001: 602 / 3002 loss=3.355, ppl=10.23, wps=5917.9, ups=0.09, wpb=64796, bsz=128, num_updates=592, lr=5.92e-05, gnorm=3.072, loss_scale=1, train_wall=10, gb_free=2.8, wall=6695
2021-06-18 20:30:32 | INFO | train_inner | epoch 001: 603 / 3002 loss=3.154, ppl=8.9, wps=5924.1, ups=0.09, wpb=64903, bsz=128, num_updates=593, lr=5.93e-05, gnorm=2.748, loss_scale=1, train_wall=10, gb_free=2.8, wall=6706
2021-06-18 20:30:43 | INFO | train_inner | epoch 001: 604 / 3002 loss=3.237, ppl=9.43, wps=5975.5, ups=0.09, wpb=64812, bsz=128, num_updates=594, lr=5.94e-05, gnorm=2.769, loss_scale=1, train_wall=10, gb_free=2.8, wall=6717
2021-06-18 20:30:54 | INFO | train_inner | epoch 001: 605 / 3002 loss=3.132, ppl=8.77, wps=5839.5, ups=0.09, wpb=64888, bsz=128, num_updates=595, lr=5.95e-05, gnorm=2.706, loss_scale=1, train_wall=11, gb_free=2.8, wall=6728
2021-06-18 20:31:05 | INFO | train_inner | epoch 001: 606 / 3002 loss=3.174, ppl=9.02, wps=5788.6, ups=0.09, wpb=64842, bsz=128, num_updates=596, lr=5.96e-05, gnorm=2.918, loss_scale=1, train_wall=11, gb_free=2.8, wall=6739
2021-06-18 20:31:16 | INFO | train_inner | epoch 001: 607 / 3002 loss=3.051, ppl=8.29, wps=5879.4, ups=0.09, wpb=64868, bsz=128, num_updates=597, lr=5.97e-05, gnorm=3.023, loss_scale=1, train_wall=11, gb_free=2.8, wall=6750
2021-06-18 20:31:27 | INFO | train_inner | epoch 001: 608 / 3002 loss=3.061, ppl=8.35, wps=5855.4, ups=0.09, wpb=64791, bsz=128, num_updates=598, lr=5.98e-05, gnorm=2.822, loss_scale=1, train_wall=11, gb_free=2.8, wall=6762
2021-06-18 20:31:38 | INFO | train_inner | epoch 001: 609 / 3002 loss=3.226, ppl=9.36, wps=5861.9, ups=0.09, wpb=64855, bsz=128, num_updates=599, lr=5.99e-05, gnorm=2.868, loss_scale=1, train_wall=11, gb_free=2.8, wall=6773
2021-06-18 20:31:49 | INFO | train_inner | epoch 001: 610 / 3002 loss=3.145, ppl=8.84, wps=5817.7, ups=0.09, wpb=64800, bsz=128, num_updates=600, lr=6e-05, gnorm=4.338, loss_scale=1, train_wall=11, gb_free=2.8, wall=6784
2021-06-18 20:32:01 | INFO | train_inner | epoch 001: 611 / 3002 loss=3.231, ppl=9.39, wps=5775.8, ups=0.09, wpb=64805, bsz=128, num_updates=601, lr=6.01e-05, gnorm=2.892, loss_scale=1, train_wall=11, gb_free=2.8, wall=6795
2021-06-18 20:32:12 | INFO | train_inner | epoch 001: 612 / 3002 loss=3.196, ppl=9.17, wps=5787.7, ups=0.09, wpb=64814, bsz=128, num_updates=602, lr=6.02e-05, gnorm=3.278, loss_scale=1, train_wall=11, gb_free=2.8, wall=6806
2021-06-18 20:32:23 | INFO | train_inner | epoch 001: 613 / 3002 loss=3.149, ppl=8.87, wps=5918, ups=0.09, wpb=64838, bsz=128, num_updates=603, lr=6.03e-05, gnorm=2.912, loss_scale=1, train_wall=11, gb_free=2.8, wall=6817
2021-06-18 20:32:34 | INFO | train_inner | epoch 001: 614 / 3002 loss=3.17, ppl=9, wps=5890.3, ups=0.09, wpb=64935, bsz=128, num_updates=604, lr=6.04e-05, gnorm=3.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=6828
2021-06-18 20:32:45 | INFO | train_inner | epoch 001: 615 / 3002 loss=3.262, ppl=9.59, wps=5942.9, ups=0.09, wpb=64808, bsz=128, num_updates=605, lr=6.05e-05, gnorm=3.583, loss_scale=1, train_wall=10, gb_free=2.8, wall=6839
2021-06-18 20:32:56 | INFO | train_inner | epoch 001: 616 / 3002 loss=3.358, ppl=10.25, wps=5930, ups=0.09, wpb=64843, bsz=128, num_updates=606, lr=6.06e-05, gnorm=2.972, loss_scale=1, train_wall=11, gb_free=2.8, wall=6850
2021-06-18 20:33:07 | INFO | train_inner | epoch 001: 617 / 3002 loss=3.273, ppl=9.67, wps=5765.3, ups=0.09, wpb=64797, bsz=128, num_updates=607, lr=6.07e-05, gnorm=2.805, loss_scale=1, train_wall=11, gb_free=2.8, wall=6861
2021-06-18 20:33:18 | INFO | train_inner | epoch 001: 618 / 3002 loss=3.158, ppl=8.92, wps=5888.8, ups=0.09, wpb=64959, bsz=128, num_updates=608, lr=6.08e-05, gnorm=9.323, loss_scale=1, train_wall=11, gb_free=2.8, wall=6872
2021-06-18 20:33:29 | INFO | train_inner | epoch 001: 619 / 3002 loss=3.071, ppl=8.4, wps=5769.5, ups=0.09, wpb=64907, bsz=128, num_updates=609, lr=6.09e-05, gnorm=3.385, loss_scale=1, train_wall=11, gb_free=2.8, wall=6884
2021-06-18 20:33:40 | INFO | train_inner | epoch 001: 620 / 3002 loss=3.172, ppl=9.01, wps=5866.8, ups=0.09, wpb=64857, bsz=128, num_updates=610, lr=6.1e-05, gnorm=3.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=6895
2021-06-18 20:33:51 | INFO | train_inner | epoch 001: 621 / 3002 loss=3.061, ppl=8.34, wps=5786.6, ups=0.09, wpb=64817, bsz=128, num_updates=611, lr=6.11e-05, gnorm=3.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=6906
2021-06-18 20:34:03 | INFO | train_inner | epoch 001: 622 / 3002 loss=3.093, ppl=8.53, wps=5846.3, ups=0.09, wpb=64870, bsz=128, num_updates=612, lr=6.12e-05, gnorm=2.729, loss_scale=1, train_wall=11, gb_free=2.8, wall=6917
2021-06-18 20:34:14 | INFO | train_inner | epoch 001: 623 / 3002 loss=3.284, ppl=9.74, wps=5839.1, ups=0.09, wpb=64845, bsz=128, num_updates=613, lr=6.13e-05, gnorm=4.951, loss_scale=1, train_wall=11, gb_free=2.8, wall=6928
2021-06-18 20:34:25 | INFO | train_inner | epoch 001: 624 / 3002 loss=3.173, ppl=9.02, wps=5941.4, ups=0.09, wpb=64839, bsz=128, num_updates=614, lr=6.14e-05, gnorm=5.23, loss_scale=1, train_wall=10, gb_free=2.8, wall=6939
2021-06-18 20:34:36 | INFO | train_inner | epoch 001: 625 / 3002 loss=3.094, ppl=8.54, wps=5888.2, ups=0.09, wpb=64843, bsz=128, num_updates=615, lr=6.15e-05, gnorm=4.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=6950
2021-06-18 20:34:47 | INFO | train_inner | epoch 001: 626 / 3002 loss=3.223, ppl=9.34, wps=5831.4, ups=0.09, wpb=64761, bsz=128, num_updates=616, lr=6.16e-05, gnorm=2.962, loss_scale=1, train_wall=11, gb_free=2.8, wall=6961
2021-06-18 20:34:58 | INFO | train_inner | epoch 001: 627 / 3002 loss=3.23, ppl=9.38, wps=5675.9, ups=0.09, wpb=64744, bsz=128, num_updates=617, lr=6.17e-05, gnorm=3.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=6972
2021-06-18 20:35:09 | INFO | train_inner | epoch 001: 628 / 3002 loss=3.15, ppl=8.88, wps=5985.1, ups=0.09, wpb=64768, bsz=128, num_updates=618, lr=6.18e-05, gnorm=2.837, loss_scale=1, train_wall=10, gb_free=2.8, wall=6983
2021-06-18 20:35:20 | INFO | train_inner | epoch 001: 629 / 3002 loss=3.269, ppl=9.64, wps=5906.5, ups=0.09, wpb=64790, bsz=128, num_updates=619, lr=6.19e-05, gnorm=2.875, loss_scale=1, train_wall=11, gb_free=2.8, wall=6994
2021-06-18 20:35:31 | INFO | train_inner | epoch 001: 630 / 3002 loss=3.226, ppl=9.35, wps=5851.9, ups=0.09, wpb=64806, bsz=128, num_updates=620, lr=6.2e-05, gnorm=3.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=7005
2021-06-18 20:35:42 | INFO | train_inner | epoch 001: 631 / 3002 loss=3.003, ppl=8.02, wps=5901.9, ups=0.09, wpb=64890, bsz=128, num_updates=621, lr=6.21e-05, gnorm=3.313, loss_scale=1, train_wall=11, gb_free=2.8, wall=7016
2021-06-18 20:35:53 | INFO | train_inner | epoch 001: 632 / 3002 loss=3.102, ppl=8.58, wps=5838.7, ups=0.09, wpb=64826, bsz=128, num_updates=622, lr=6.22e-05, gnorm=2.706, loss_scale=1, train_wall=11, gb_free=2.8, wall=7027
2021-06-18 20:36:04 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-18 20:36:15 | INFO | train_inner | epoch 001: 634 / 3002 loss=3.032, ppl=8.18, wps=2926.2, ups=0.05, wpb=64876, bsz=128, num_updates=623, lr=6.23e-05, gnorm=3.166, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=7050
2021-06-18 20:36:26 | INFO | train_inner | epoch 001: 635 / 3002 loss=3.372, ppl=10.35, wps=5874.7, ups=0.09, wpb=64879, bsz=128, num_updates=624, lr=6.24e-05, gnorm=2.928, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7061
2021-06-18 20:36:37 | INFO | train_inner | epoch 001: 636 / 3002 loss=3.108, ppl=8.62, wps=5944.3, ups=0.09, wpb=64879, bsz=128, num_updates=625, lr=6.25e-05, gnorm=2.847, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7071
2021-06-18 20:36:48 | INFO | train_inner | epoch 001: 637 / 3002 loss=3.262, ppl=9.6, wps=5736.4, ups=0.09, wpb=64880, bsz=128, num_updates=626, lr=6.26e-05, gnorm=2.948, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7083
2021-06-18 20:36:59 | INFO | train_inner | epoch 001: 638 / 3002 loss=3.01, ppl=8.06, wps=5941.2, ups=0.09, wpb=64871, bsz=128, num_updates=627, lr=6.27e-05, gnorm=4.344, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7094
2021-06-18 20:37:11 | INFO | train_inner | epoch 001: 639 / 3002 loss=3.179, ppl=9.06, wps=5829.3, ups=0.09, wpb=64913, bsz=128, num_updates=628, lr=6.28e-05, gnorm=3.03, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7105
2021-06-18 20:37:22 | INFO | train_inner | epoch 001: 640 / 3002 loss=3.394, ppl=10.51, wps=5870, ups=0.09, wpb=64826, bsz=128, num_updates=629, lr=6.29e-05, gnorm=2.897, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7116
2021-06-18 20:37:33 | INFO | train_inner | epoch 001: 641 / 3002 loss=3.352, ppl=10.21, wps=5860.7, ups=0.09, wpb=64752, bsz=128, num_updates=630, lr=6.3e-05, gnorm=2.924, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7127
2021-06-18 20:37:44 | INFO | train_inner | epoch 001: 642 / 3002 loss=3.18, ppl=9.07, wps=5945.2, ups=0.09, wpb=64823, bsz=128, num_updates=631, lr=6.31e-05, gnorm=2.878, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7138
2021-06-18 20:37:54 | INFO | train_inner | epoch 001: 643 / 3002 loss=3.164, ppl=8.96, wps=5912.6, ups=0.09, wpb=64875, bsz=128, num_updates=632, lr=6.32e-05, gnorm=2.868, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7149
2021-06-18 20:38:06 | INFO | train_inner | epoch 001: 644 / 3002 loss=3.242, ppl=9.46, wps=5824.1, ups=0.09, wpb=64807, bsz=128, num_updates=633, lr=6.33e-05, gnorm=2.889, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7160
2021-06-18 20:38:17 | INFO | train_inner | epoch 001: 645 / 3002 loss=3.236, ppl=9.42, wps=5892.6, ups=0.09, wpb=64854, bsz=128, num_updates=634, lr=6.34e-05, gnorm=4.238, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7171
2021-06-18 20:38:28 | INFO | train_inner | epoch 001: 646 / 3002 loss=3.112, ppl=8.65, wps=5840.4, ups=0.09, wpb=64845, bsz=128, num_updates=635, lr=6.35e-05, gnorm=2.886, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7182
2021-06-18 20:38:39 | INFO | train_inner | epoch 001: 647 / 3002 loss=3.22, ppl=9.32, wps=5758.4, ups=0.09, wpb=64829, bsz=128, num_updates=636, lr=6.36e-05, gnorm=2.931, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7193
2021-06-18 20:38:50 | INFO | train_inner | epoch 001: 648 / 3002 loss=3.16, ppl=8.94, wps=5866.7, ups=0.09, wpb=64761, bsz=128, num_updates=637, lr=6.37e-05, gnorm=3.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7204
2021-06-18 20:39:01 | INFO | train_inner | epoch 001: 649 / 3002 loss=3.277, ppl=9.69, wps=5765.6, ups=0.09, wpb=64786, bsz=128, num_updates=638, lr=6.38e-05, gnorm=3.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7216
2021-06-18 20:39:12 | INFO | train_inner | epoch 001: 650 / 3002 loss=3.217, ppl=9.3, wps=5894.6, ups=0.09, wpb=64793, bsz=128, num_updates=639, lr=6.39e-05, gnorm=2.841, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7227
2021-06-18 20:39:23 | INFO | train_inner | epoch 001: 651 / 3002 loss=3.298, ppl=9.84, wps=5826.2, ups=0.09, wpb=64807, bsz=128, num_updates=640, lr=6.4e-05, gnorm=2.756, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7238
2021-06-18 20:39:34 | INFO | train_inner | epoch 001: 652 / 3002 loss=3.238, ppl=9.44, wps=5918.9, ups=0.09, wpb=64868, bsz=128, num_updates=641, lr=6.41e-05, gnorm=3.006, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7249
2021-06-18 20:39:45 | INFO | train_inner | epoch 001: 653 / 3002 loss=3.141, ppl=8.82, wps=6007.8, ups=0.09, wpb=64868, bsz=128, num_updates=642, lr=6.42e-05, gnorm=3.279, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7259
2021-06-18 20:39:56 | INFO | train_inner | epoch 001: 654 / 3002 loss=3.261, ppl=9.58, wps=5859.1, ups=0.09, wpb=64825, bsz=128, num_updates=643, lr=6.43e-05, gnorm=2.918, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7271
2021-06-18 20:40:07 | INFO | train_inner | epoch 001: 655 / 3002 loss=3.163, ppl=8.96, wps=5889.8, ups=0.09, wpb=64883, bsz=128, num_updates=644, lr=6.44e-05, gnorm=2.802, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7282
2021-06-18 20:40:18 | INFO | train_inner | epoch 001: 656 / 3002 loss=3.145, ppl=8.85, wps=5867.1, ups=0.09, wpb=64765, bsz=128, num_updates=645, lr=6.45e-05, gnorm=2.828, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7293
2021-06-18 20:40:29 | INFO | train_inner | epoch 001: 657 / 3002 loss=3.235, ppl=9.41, wps=5865.8, ups=0.09, wpb=64876, bsz=128, num_updates=646, lr=6.46e-05, gnorm=3.04, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7304
2021-06-18 20:40:41 | INFO | train_inner | epoch 001: 658 / 3002 loss=3.271, ppl=9.65, wps=5725.3, ups=0.09, wpb=64839, bsz=128, num_updates=647, lr=6.47e-05, gnorm=2.954, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7315
2021-06-18 20:40:52 | INFO | train_inner | epoch 001: 659 / 3002 loss=3.368, ppl=10.32, wps=5867.1, ups=0.09, wpb=64847, bsz=128, num_updates=648, lr=6.48e-05, gnorm=2.81, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7326
2021-06-18 20:41:03 | INFO | train_inner | epoch 001: 660 / 3002 loss=2.982, ppl=7.9, wps=5802.6, ups=0.09, wpb=64790, bsz=128, num_updates=649, lr=6.49e-05, gnorm=3.077, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7337
2021-06-18 20:41:14 | INFO | train_inner | epoch 001: 661 / 3002 loss=3.242, ppl=9.46, wps=5782.4, ups=0.09, wpb=64737, bsz=128, num_updates=650, lr=6.5e-05, gnorm=3.166, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7348
2021-06-18 20:41:25 | INFO | train_inner | epoch 001: 662 / 3002 loss=3.128, ppl=8.74, wps=5860.5, ups=0.09, wpb=64831, bsz=128, num_updates=651, lr=6.51e-05, gnorm=2.799, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7359
2021-06-18 20:41:36 | INFO | train_inner | epoch 001: 663 / 3002 loss=3.258, ppl=9.56, wps=5863.8, ups=0.09, wpb=64850, bsz=128, num_updates=652, lr=6.52e-05, gnorm=2.941, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7371
2021-06-18 20:41:47 | INFO | train_inner | epoch 001: 664 / 3002 loss=3.25, ppl=9.51, wps=5887.3, ups=0.09, wpb=64731, bsz=128, num_updates=653, lr=6.53e-05, gnorm=6.886, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7382
2021-06-18 20:41:58 | INFO | train_inner | epoch 001: 665 / 3002 loss=3.227, ppl=9.36, wps=5856.4, ups=0.09, wpb=64858, bsz=128, num_updates=654, lr=6.54e-05, gnorm=2.908, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7393
2021-06-18 20:42:09 | INFO | train_inner | epoch 001: 666 / 3002 loss=3.123, ppl=8.71, wps=5787.8, ups=0.09, wpb=64776, bsz=128, num_updates=655, lr=6.55e-05, gnorm=2.911, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7404
2021-06-18 20:42:20 | INFO | train_inner | epoch 001: 667 / 3002 loss=3.239, ppl=9.44, wps=5955.9, ups=0.09, wpb=64871, bsz=128, num_updates=656, lr=6.56e-05, gnorm=2.853, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7415
2021-06-18 20:42:32 | INFO | train_inner | epoch 001: 668 / 3002 loss=3.219, ppl=9.31, wps=5792.4, ups=0.09, wpb=64817, bsz=128, num_updates=657, lr=6.57e-05, gnorm=2.992, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7426
2021-06-18 20:42:43 | INFO | train_inner | epoch 001: 669 / 3002 loss=3.061, ppl=8.34, wps=5880.2, ups=0.09, wpb=64851, bsz=128, num_updates=658, lr=6.58e-05, gnorm=2.803, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7437
2021-06-18 20:42:53 | INFO | train_inner | epoch 001: 670 / 3002 loss=3.154, ppl=8.9, wps=6013.9, ups=0.09, wpb=64864, bsz=128, num_updates=659, lr=6.59e-05, gnorm=2.828, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7448
2021-06-18 20:43:05 | INFO | train_inner | epoch 001: 671 / 3002 loss=3.096, ppl=8.55, wps=5783.7, ups=0.09, wpb=64790, bsz=128, num_updates=660, lr=6.6e-05, gnorm=4.359, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7459
2021-06-18 20:43:16 | INFO | train_inner | epoch 001: 672 / 3002 loss=3.129, ppl=8.75, wps=5852.2, ups=0.09, wpb=64841, bsz=128, num_updates=661, lr=6.61e-05, gnorm=3.983, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7470
2021-06-18 20:43:27 | INFO | train_inner | epoch 001: 673 / 3002 loss=3.207, ppl=9.23, wps=5870.5, ups=0.09, wpb=64869, bsz=128, num_updates=662, lr=6.62e-05, gnorm=4.59, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7481
2021-06-18 20:43:38 | INFO | train_inner | epoch 001: 674 / 3002 loss=3.064, ppl=8.36, wps=5834.9, ups=0.09, wpb=64786, bsz=128, num_updates=663, lr=6.63e-05, gnorm=2.899, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7492
2021-06-18 20:43:49 | INFO | train_inner | epoch 001: 675 / 3002 loss=3.234, ppl=9.41, wps=5888.1, ups=0.09, wpb=64803, bsz=128, num_updates=664, lr=6.64e-05, gnorm=2.998, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7503
2021-06-18 20:44:00 | INFO | train_inner | epoch 001: 676 / 3002 loss=3.3, ppl=9.85, wps=5756, ups=0.09, wpb=64795, bsz=128, num_updates=665, lr=6.65e-05, gnorm=2.846, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7514
2021-06-18 20:44:11 | INFO | train_inner | epoch 001: 677 / 3002 loss=3.069, ppl=8.39, wps=5925.3, ups=0.09, wpb=64797, bsz=128, num_updates=666, lr=6.66e-05, gnorm=2.82, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7525
2021-06-18 20:44:22 | INFO | train_inner | epoch 001: 678 / 3002 loss=3.05, ppl=8.28, wps=5877.2, ups=0.09, wpb=64859, bsz=128, num_updates=667, lr=6.67e-05, gnorm=2.787, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7536
2021-06-18 20:44:33 | INFO | train_inner | epoch 001: 679 / 3002 loss=3.141, ppl=8.82, wps=5942.5, ups=0.09, wpb=64905, bsz=128, num_updates=668, lr=6.68e-05, gnorm=2.813, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7547
2021-06-18 20:44:44 | INFO | train_inner | epoch 001: 680 / 3002 loss=3.082, ppl=8.47, wps=5953.7, ups=0.09, wpb=64874, bsz=128, num_updates=669, lr=6.69e-05, gnorm=3.867, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7558
2021-06-18 20:44:55 | INFO | train_inner | epoch 001: 681 / 3002 loss=3.199, ppl=9.18, wps=5767.7, ups=0.09, wpb=64778, bsz=128, num_updates=670, lr=6.7e-05, gnorm=2.848, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7569
2021-06-18 20:45:06 | INFO | train_inner | epoch 001: 682 / 3002 loss=3.172, ppl=9.01, wps=5734.4, ups=0.09, wpb=64882, bsz=128, num_updates=671, lr=6.71e-05, gnorm=2.841, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7581
2021-06-18 20:45:17 | INFO | train_inner | epoch 001: 683 / 3002 loss=3.213, ppl=9.27, wps=5959.8, ups=0.09, wpb=64883, bsz=128, num_updates=672, lr=6.72e-05, gnorm=3.05, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7592
2021-06-18 20:45:28 | INFO | train_inner | epoch 001: 684 / 3002 loss=3.144, ppl=8.84, wps=5786.3, ups=0.09, wpb=64774, bsz=128, num_updates=673, lr=6.73e-05, gnorm=2.834, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7603
2021-06-18 20:45:39 | INFO | train_inner | epoch 001: 685 / 3002 loss=3.224, ppl=9.34, wps=5877.9, ups=0.09, wpb=64769, bsz=128, num_updates=674, lr=6.74e-05, gnorm=2.829, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7614
2021-06-18 20:45:51 | INFO | train_inner | epoch 001: 686 / 3002 loss=3.102, ppl=8.58, wps=5771.3, ups=0.09, wpb=64840, bsz=128, num_updates=675, lr=6.75e-05, gnorm=2.865, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7625
2021-06-18 20:46:02 | INFO | train_inner | epoch 001: 687 / 3002 loss=3.068, ppl=8.39, wps=5946.3, ups=0.09, wpb=64863, bsz=128, num_updates=676, lr=6.76e-05, gnorm=3.233, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7636
2021-06-18 20:46:13 | INFO | train_inner | epoch 001: 688 / 3002 loss=3.149, ppl=8.87, wps=5758, ups=0.09, wpb=64697, bsz=128, num_updates=677, lr=6.77e-05, gnorm=2.712, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7647
2021-06-18 20:46:24 | INFO | train_inner | epoch 001: 689 / 3002 loss=3.171, ppl=9.01, wps=5906.1, ups=0.09, wpb=64806, bsz=128, num_updates=678, lr=6.78e-05, gnorm=2.878, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7658
2021-06-18 20:46:35 | INFO | train_inner | epoch 001: 690 / 3002 loss=3.216, ppl=9.29, wps=5941.4, ups=0.09, wpb=64902, bsz=128, num_updates=679, lr=6.79e-05, gnorm=3.439, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7669
2021-06-18 20:46:46 | INFO | train_inner | epoch 001: 691 / 3002 loss=3.161, ppl=8.95, wps=5810.1, ups=0.09, wpb=64833, bsz=128, num_updates=680, lr=6.8e-05, gnorm=2.952, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7680
2021-06-18 20:46:57 | INFO | train_inner | epoch 001: 692 / 3002 loss=3.125, ppl=8.73, wps=5949.5, ups=0.09, wpb=64774, bsz=128, num_updates=681, lr=6.81e-05, gnorm=2.81, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7691
2021-06-18 20:47:08 | INFO | train_inner | epoch 001: 693 / 3002 loss=3.163, ppl=8.95, wps=5909, ups=0.09, wpb=64911, bsz=128, num_updates=682, lr=6.82e-05, gnorm=2.831, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7702
2021-06-18 20:47:19 | INFO | train_inner | epoch 001: 694 / 3002 loss=3.308, ppl=9.91, wps=5923.1, ups=0.09, wpb=64785, bsz=128, num_updates=683, lr=6.83e-05, gnorm=3.142, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7713
2021-06-18 20:47:30 | INFO | train_inner | epoch 001: 695 / 3002 loss=3.111, ppl=8.64, wps=5802.6, ups=0.09, wpb=64869, bsz=128, num_updates=684, lr=6.84e-05, gnorm=2.987, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7724
2021-06-18 20:47:41 | INFO | train_inner | epoch 001: 696 / 3002 loss=3.068, ppl=8.39, wps=5952.3, ups=0.09, wpb=64817, bsz=128, num_updates=685, lr=6.85e-05, gnorm=2.761, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7735
2021-06-18 20:47:52 | INFO | train_inner | epoch 001: 697 / 3002 loss=3.057, ppl=8.32, wps=5850.6, ups=0.09, wpb=64796, bsz=128, num_updates=686, lr=6.86e-05, gnorm=31.392, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7746
2021-06-18 20:48:03 | INFO | train_inner | epoch 001: 698 / 3002 loss=3.213, ppl=9.28, wps=5781.1, ups=0.09, wpb=64851, bsz=128, num_updates=687, lr=6.87e-05, gnorm=3.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7757
2021-06-18 20:48:14 | INFO | train_inner | epoch 001: 699 / 3002 loss=3.167, ppl=8.98, wps=5791.4, ups=0.09, wpb=64815, bsz=128, num_updates=688, lr=6.88e-05, gnorm=7.673, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7769
2021-06-18 20:48:25 | INFO | train_inner | epoch 001: 700 / 3002 loss=3.156, ppl=8.91, wps=5812.8, ups=0.09, wpb=64876, bsz=128, num_updates=689, lr=6.89e-05, gnorm=2.828, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7780
2021-06-18 20:48:37 | INFO | train_inner | epoch 001: 701 / 3002 loss=3.066, ppl=8.38, wps=5841.2, ups=0.09, wpb=64801, bsz=128, num_updates=690, lr=6.9e-05, gnorm=3.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7791
2021-06-18 20:48:48 | INFO | train_inner | epoch 001: 702 / 3002 loss=3.086, ppl=8.49, wps=5828.1, ups=0.09, wpb=64781, bsz=128, num_updates=691, lr=6.91e-05, gnorm=2.915, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7802
2021-06-18 20:48:59 | INFO | train_inner | epoch 001: 703 / 3002 loss=3.129, ppl=8.75, wps=5935.5, ups=0.09, wpb=64874, bsz=128, num_updates=692, lr=6.92e-05, gnorm=2.908, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7813
2021-06-18 20:49:09 | INFO | train_inner | epoch 001: 704 / 3002 loss=3.186, ppl=9.1, wps=5965.1, ups=0.09, wpb=64780, bsz=128, num_updates=693, lr=6.93e-05, gnorm=2.941, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7824
2021-06-18 20:49:21 | INFO | train_inner | epoch 001: 705 / 3002 loss=3.326, ppl=10.03, wps=5852.5, ups=0.09, wpb=64856, bsz=128, num_updates=694, lr=6.94e-05, gnorm=4.869, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7835
2021-06-18 20:49:32 | INFO | train_inner | epoch 001: 706 / 3002 loss=3.174, ppl=9.02, wps=5902.8, ups=0.09, wpb=64892, bsz=128, num_updates=695, lr=6.95e-05, gnorm=4.206, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7846
2021-06-18 20:49:43 | INFO | train_inner | epoch 001: 707 / 3002 loss=3.328, ppl=10.04, wps=5865.9, ups=0.09, wpb=64802, bsz=128, num_updates=696, lr=6.96e-05, gnorm=3.16, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7857
2021-06-18 20:49:54 | INFO | train_inner | epoch 001: 708 / 3002 loss=3.121, ppl=8.7, wps=5907.7, ups=0.09, wpb=64854, bsz=128, num_updates=697, lr=6.97e-05, gnorm=2.975, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7868
2021-06-18 20:50:05 | INFO | train_inner | epoch 001: 709 / 3002 loss=3.306, ppl=9.89, wps=5874.5, ups=0.09, wpb=64871, bsz=128, num_updates=698, lr=6.98e-05, gnorm=2.894, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7879
2021-06-18 20:50:16 | INFO | train_inner | epoch 001: 710 / 3002 loss=3.227, ppl=9.36, wps=5810.7, ups=0.09, wpb=64884, bsz=128, num_updates=699, lr=6.99e-05, gnorm=6.945, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7890
2021-06-18 20:50:27 | INFO | train_inner | epoch 001: 711 / 3002 loss=3.237, ppl=9.43, wps=5921.9, ups=0.09, wpb=64827, bsz=128, num_updates=700, lr=7e-05, gnorm=2.993, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7901
2021-06-18 20:50:38 | INFO | train_inner | epoch 001: 712 / 3002 loss=3.31, ppl=9.92, wps=5878, ups=0.09, wpb=64851, bsz=128, num_updates=701, lr=7.01e-05, gnorm=3.111, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7912
2021-06-18 20:50:49 | INFO | train_inner | epoch 001: 713 / 3002 loss=3.351, ppl=10.2, wps=5929.9, ups=0.09, wpb=64805, bsz=128, num_updates=702, lr=7.02e-05, gnorm=2.923, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=7923
2021-06-18 20:51:00 | INFO | train_inner | epoch 001: 714 / 3002 loss=3.022, ppl=8.12, wps=5872.7, ups=0.09, wpb=64865, bsz=128, num_updates=703, lr=7.03e-05, gnorm=3.013, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7934
2021-06-18 20:51:11 | INFO | train_inner | epoch 001: 715 / 3002 loss=3.319, ppl=9.98, wps=5870.2, ups=0.09, wpb=64787, bsz=128, num_updates=704, lr=7.04e-05, gnorm=2.994, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7945
2021-06-18 20:51:22 | INFO | train_inner | epoch 001: 716 / 3002 loss=3.2, ppl=9.19, wps=5918.9, ups=0.09, wpb=64902, bsz=128, num_updates=705, lr=7.05e-05, gnorm=2.839, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7956
2021-06-18 20:51:33 | INFO | train_inner | epoch 001: 717 / 3002 loss=3.247, ppl=9.5, wps=5787.4, ups=0.09, wpb=64749, bsz=128, num_updates=706, lr=7.06e-05, gnorm=3.924, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7967
2021-06-18 20:51:44 | INFO | train_inner | epoch 001: 718 / 3002 loss=3.423, ppl=10.73, wps=5863.6, ups=0.09, wpb=64767, bsz=128, num_updates=707, lr=7.07e-05, gnorm=52.345, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7978
2021-06-18 20:51:55 | INFO | train_inner | epoch 001: 719 / 3002 loss=3.446, ppl=10.9, wps=5864.6, ups=0.09, wpb=64761, bsz=128, num_updates=708, lr=7.08e-05, gnorm=3.014, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=7989
2021-06-18 20:52:06 | INFO | train_inner | epoch 001: 720 / 3002 loss=3.237, ppl=9.43, wps=5868, ups=0.09, wpb=64844, bsz=128, num_updates=709, lr=7.09e-05, gnorm=2.908, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8000
2021-06-18 20:52:17 | INFO | train_inner | epoch 001: 721 / 3002 loss=3.195, ppl=9.16, wps=5772.3, ups=0.09, wpb=64852, bsz=128, num_updates=710, lr=7.1e-05, gnorm=2.891, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8012
2021-06-18 20:52:28 | INFO | train_inner | epoch 001: 722 / 3002 loss=3.128, ppl=8.74, wps=5844.3, ups=0.09, wpb=64848, bsz=128, num_updates=711, lr=7.11e-05, gnorm=3.158, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8023
2021-06-18 20:52:39 | INFO | train_inner | epoch 001: 723 / 3002 loss=3.224, ppl=9.34, wps=5997, ups=0.09, wpb=64876, bsz=128, num_updates=712, lr=7.12e-05, gnorm=2.799, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8034
2021-06-18 20:52:50 | INFO | train_inner | epoch 001: 724 / 3002 loss=3.295, ppl=9.81, wps=5863.6, ups=0.09, wpb=64819, bsz=128, num_updates=713, lr=7.13e-05, gnorm=2.761, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8045
2021-06-18 20:53:01 | INFO | train_inner | epoch 001: 725 / 3002 loss=3.089, ppl=8.51, wps=5934.2, ups=0.09, wpb=64868, bsz=128, num_updates=714, lr=7.14e-05, gnorm=2.7, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8056
2021-06-18 20:53:12 | INFO | train_inner | epoch 001: 726 / 3002 loss=3.214, ppl=9.28, wps=5787.3, ups=0.09, wpb=64791, bsz=128, num_updates=715, lr=7.15e-05, gnorm=3.467, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8067
2021-06-18 20:53:23 | INFO | train_inner | epoch 001: 727 / 3002 loss=3.134, ppl=8.78, wps=5857.7, ups=0.09, wpb=64826, bsz=128, num_updates=716, lr=7.16e-05, gnorm=2.804, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8078
2021-06-18 20:53:35 | INFO | train_inner | epoch 001: 728 / 3002 loss=3.085, ppl=8.48, wps=5760.7, ups=0.09, wpb=64790, bsz=128, num_updates=717, lr=7.17e-05, gnorm=2.987, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8089
2021-06-18 20:53:46 | INFO | train_inner | epoch 001: 729 / 3002 loss=3.2, ppl=9.19, wps=5843.9, ups=0.09, wpb=64856, bsz=128, num_updates=718, lr=7.18e-05, gnorm=2.901, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8100
2021-06-18 20:53:57 | INFO | train_inner | epoch 001: 730 / 3002 loss=3.286, ppl=9.76, wps=5923, ups=0.09, wpb=64868, bsz=128, num_updates=719, lr=7.19e-05, gnorm=2.782, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8111
2021-06-18 20:54:07 | INFO | train_inner | epoch 001: 731 / 3002 loss=3.116, ppl=8.67, wps=6021.3, ups=0.09, wpb=64808, bsz=128, num_updates=720, lr=7.2e-05, gnorm=3.261, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8122
2021-06-18 20:54:18 | INFO | train_inner | epoch 001: 732 / 3002 loss=3.053, ppl=8.3, wps=5983.7, ups=0.09, wpb=64817, bsz=128, num_updates=721, lr=7.21e-05, gnorm=2.855, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8133
2021-06-18 20:54:29 | INFO | train_inner | epoch 001: 733 / 3002 loss=3.016, ppl=8.09, wps=5886.2, ups=0.09, wpb=64749, bsz=128, num_updates=722, lr=7.22e-05, gnorm=5.082, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8144
2021-06-18 20:54:40 | INFO | train_inner | epoch 001: 734 / 3002 loss=3.221, ppl=9.32, wps=5970.6, ups=0.09, wpb=64807, bsz=128, num_updates=723, lr=7.23e-05, gnorm=2.893, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8155
2021-06-18 20:54:51 | INFO | train_inner | epoch 001: 735 / 3002 loss=3.302, ppl=9.86, wps=5874.2, ups=0.09, wpb=64844, bsz=128, num_updates=724, lr=7.24e-05, gnorm=3.024, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8166
2021-06-18 20:55:02 | INFO | train_inner | epoch 001: 736 / 3002 loss=3.245, ppl=9.48, wps=5843.2, ups=0.09, wpb=64844, bsz=128, num_updates=725, lr=7.25e-05, gnorm=6.746, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8177
2021-06-18 20:55:13 | INFO | train_inner | epoch 001: 737 / 3002 loss=3.015, ppl=8.08, wps=5803.9, ups=0.09, wpb=64852, bsz=128, num_updates=726, lr=7.26e-05, gnorm=3.44, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8188
2021-06-18 20:55:25 | INFO | train_inner | epoch 001: 738 / 3002 loss=3.033, ppl=8.19, wps=5890.2, ups=0.09, wpb=64925, bsz=128, num_updates=727, lr=7.27e-05, gnorm=2.76, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8199
2021-06-18 20:55:36 | INFO | train_inner | epoch 001: 739 / 3002 loss=3.321, ppl=9.99, wps=5814.2, ups=0.09, wpb=64775, bsz=128, num_updates=728, lr=7.28e-05, gnorm=2.786, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8210
2021-06-18 20:55:47 | INFO | train_inner | epoch 001: 740 / 3002 loss=2.972, ppl=7.85, wps=5760.6, ups=0.09, wpb=64842, bsz=128, num_updates=729, lr=7.29e-05, gnorm=2.745, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8221
2021-06-18 20:55:58 | INFO | train_inner | epoch 001: 741 / 3002 loss=3.136, ppl=8.79, wps=5826.8, ups=0.09, wpb=64836, bsz=128, num_updates=730, lr=7.3e-05, gnorm=2.769, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8232
2021-06-18 20:56:09 | INFO | train_inner | epoch 001: 742 / 3002 loss=3.051, ppl=8.29, wps=5823.7, ups=0.09, wpb=64840, bsz=128, num_updates=731, lr=7.31e-05, gnorm=2.815, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8244
2021-06-18 20:56:20 | INFO | train_inner | epoch 001: 743 / 3002 loss=3.12, ppl=8.7, wps=5802.6, ups=0.09, wpb=64828, bsz=128, num_updates=732, lr=7.32e-05, gnorm=2.796, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8255
2021-06-18 20:56:31 | INFO | train_inner | epoch 001: 744 / 3002 loss=3.151, ppl=8.88, wps=5910.6, ups=0.09, wpb=64781, bsz=128, num_updates=733, lr=7.33e-05, gnorm=2.775, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8266
2021-06-18 20:56:42 | INFO | train_inner | epoch 001: 745 / 3002 loss=3.223, ppl=9.34, wps=5838.7, ups=0.09, wpb=64845, bsz=128, num_updates=734, lr=7.34e-05, gnorm=2.766, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8277
2021-06-18 20:56:54 | INFO | train_inner | epoch 001: 746 / 3002 loss=3.142, ppl=8.83, wps=5845.6, ups=0.09, wpb=64825, bsz=128, num_updates=735, lr=7.35e-05, gnorm=2.721, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8288
2021-06-18 20:57:05 | INFO | train_inner | epoch 001: 747 / 3002 loss=3.129, ppl=8.75, wps=5828.6, ups=0.09, wpb=64813, bsz=128, num_updates=736, lr=7.36e-05, gnorm=2.703, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8299
2021-06-18 20:57:16 | INFO | train_inner | epoch 001: 748 / 3002 loss=3.334, ppl=10.09, wps=5868.7, ups=0.09, wpb=64908, bsz=128, num_updates=737, lr=7.37e-05, gnorm=2.794, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8310
2021-06-18 20:57:27 | INFO | train_inner | epoch 001: 749 / 3002 loss=3.028, ppl=8.16, wps=5891.6, ups=0.09, wpb=64816, bsz=128, num_updates=738, lr=7.38e-05, gnorm=2.708, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8321
2021-06-18 20:57:38 | INFO | train_inner | epoch 001: 750 / 3002 loss=3.124, ppl=8.72, wps=5984.2, ups=0.09, wpb=64840, bsz=128, num_updates=739, lr=7.39e-05, gnorm=2.776, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8332
2021-06-18 20:57:49 | INFO | train_inner | epoch 001: 751 / 3002 loss=3.338, ppl=10.11, wps=5875, ups=0.09, wpb=64819, bsz=128, num_updates=740, lr=7.4e-05, gnorm=3.54, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8343
2021-06-18 20:58:00 | INFO | train_inner | epoch 001: 752 / 3002 loss=3.052, ppl=8.29, wps=5783.5, ups=0.09, wpb=64843, bsz=128, num_updates=741, lr=7.41e-05, gnorm=3.001, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8354
2021-06-18 20:58:11 | INFO | train_inner | epoch 001: 753 / 3002 loss=3.327, ppl=10.04, wps=5986.6, ups=0.09, wpb=64838, bsz=128, num_updates=742, lr=7.42e-05, gnorm=2.96, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8365
2021-06-18 20:58:22 | INFO | train_inner | epoch 001: 754 / 3002 loss=3.353, ppl=10.22, wps=5877.6, ups=0.09, wpb=64722, bsz=128, num_updates=743, lr=7.43e-05, gnorm=3.033, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8376
2021-06-18 20:58:33 | INFO | train_inner | epoch 001: 755 / 3002 loss=3.005, ppl=8.03, wps=5766.7, ups=0.09, wpb=64881, bsz=128, num_updates=744, lr=7.44e-05, gnorm=2.889, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8387
2021-06-18 20:58:44 | INFO | train_inner | epoch 001: 756 / 3002 loss=3.18, ppl=9.06, wps=5849.9, ups=0.09, wpb=64818, bsz=128, num_updates=745, lr=7.45e-05, gnorm=2.777, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8398
2021-06-18 20:58:55 | INFO | train_inner | epoch 001: 757 / 3002 loss=3.217, ppl=9.3, wps=5800, ups=0.09, wpb=64769, bsz=128, num_updates=746, lr=7.46e-05, gnorm=2.754, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8409
2021-06-18 20:59:06 | INFO | train_inner | epoch 001: 758 / 3002 loss=3.207, ppl=9.24, wps=5946.1, ups=0.09, wpb=64789, bsz=128, num_updates=747, lr=7.47e-05, gnorm=2.864, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=8420
2021-06-18 20:59:17 | INFO | train_inner | epoch 001: 759 / 3002 loss=3.21, ppl=9.25, wps=5811.2, ups=0.09, wpb=64804, bsz=128, num_updates=748, lr=7.48e-05, gnorm=3.135, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8431
2021-06-18 20:59:28 | INFO | train_inner | epoch 001: 760 / 3002 loss=3.182, ppl=9.08, wps=5801.2, ups=0.09, wpb=64840, bsz=128, num_updates=749, lr=7.49e-05, gnorm=3.007, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=8443
2021-06-18 20:59:39 | INFO | train_inner | epoch 001: 761 / 3002 loss=3.333, ppl=10.08, wps=5856.1, ups=0.09, wpb=64883, bsz=128, num_updates=750, lr=7.5e-05, gnorm=3.762, loss_scale=1, train_wall=11, gb_free=2.8, wall=8454
2021-06-18 20:59:51 | INFO | train_inner | epoch 001: 762 / 3002 loss=3.055, ppl=8.31, wps=5776.9, ups=0.09, wpb=64854, bsz=128, num_updates=751, lr=7.51e-05, gnorm=2.723, loss_scale=1, train_wall=11, gb_free=2.8, wall=8465
2021-06-18 21:00:02 | INFO | train_inner | epoch 001: 763 / 3002 loss=3.143, ppl=8.83, wps=5829.3, ups=0.09, wpb=64827, bsz=128, num_updates=752, lr=7.52e-05, gnorm=2.832, loss_scale=1, train_wall=11, gb_free=2.8, wall=8476
2021-06-18 21:00:13 | INFO | train_inner | epoch 001: 764 / 3002 loss=3.126, ppl=8.73, wps=5836.1, ups=0.09, wpb=64878, bsz=128, num_updates=753, lr=7.53e-05, gnorm=2.885, loss_scale=1, train_wall=11, gb_free=2.8, wall=8487
2021-06-18 21:00:24 | INFO | train_inner | epoch 001: 765 / 3002 loss=3.321, ppl=9.99, wps=5800.2, ups=0.09, wpb=64772, bsz=128, num_updates=754, lr=7.54e-05, gnorm=2.864, loss_scale=1, train_wall=11, gb_free=2.8, wall=8498
2021-06-18 21:00:35 | INFO | train_inner | epoch 001: 766 / 3002 loss=3.183, ppl=9.08, wps=5854.5, ups=0.09, wpb=64851, bsz=128, num_updates=755, lr=7.55e-05, gnorm=2.748, loss_scale=1, train_wall=11, gb_free=2.8, wall=8509
2021-06-18 21:00:46 | INFO | train_inner | epoch 001: 767 / 3002 loss=3.251, ppl=9.52, wps=5716, ups=0.09, wpb=64821, bsz=128, num_updates=756, lr=7.56e-05, gnorm=2.77, loss_scale=1, train_wall=11, gb_free=2.8, wall=8521
2021-06-18 21:00:57 | INFO | train_inner | epoch 001: 768 / 3002 loss=3.122, ppl=8.7, wps=5900.4, ups=0.09, wpb=64854, bsz=128, num_updates=757, lr=7.57e-05, gnorm=2.847, loss_scale=1, train_wall=11, gb_free=2.8, wall=8532
2021-06-18 21:01:09 | INFO | train_inner | epoch 001: 769 / 3002 loss=3.298, ppl=9.83, wps=5795.6, ups=0.09, wpb=64821, bsz=128, num_updates=758, lr=7.58e-05, gnorm=2.807, loss_scale=1, train_wall=11, gb_free=2.8, wall=8543
2021-06-18 21:01:20 | INFO | train_inner | epoch 001: 770 / 3002 loss=3.06, ppl=8.34, wps=5932.6, ups=0.09, wpb=64895, bsz=128, num_updates=759, lr=7.59e-05, gnorm=3.049, loss_scale=1, train_wall=10, gb_free=2.8, wall=8554
2021-06-18 21:01:31 | INFO | train_inner | epoch 001: 771 / 3002 loss=3.135, ppl=8.79, wps=5875, ups=0.09, wpb=64832, bsz=128, num_updates=760, lr=7.6e-05, gnorm=2.706, loss_scale=1, train_wall=11, gb_free=2.8, wall=8565
2021-06-18 21:01:42 | INFO | train_inner | epoch 001: 772 / 3002 loss=3.269, ppl=9.64, wps=5922.7, ups=0.09, wpb=64784, bsz=128, num_updates=761, lr=7.61e-05, gnorm=2.821, loss_scale=1, train_wall=10, gb_free=2.8, wall=8576
2021-06-18 21:01:53 | INFO | train_inner | epoch 001: 773 / 3002 loss=3.293, ppl=9.8, wps=5893.6, ups=0.09, wpb=64777, bsz=128, num_updates=762, lr=7.62e-05, gnorm=4.36, loss_scale=1, train_wall=11, gb_free=2.8, wall=8587
2021-06-18 21:02:04 | INFO | train_inner | epoch 001: 774 / 3002 loss=3.201, ppl=9.2, wps=5878.2, ups=0.09, wpb=64832, bsz=128, num_updates=763, lr=7.63e-05, gnorm=2.721, loss_scale=1, train_wall=11, gb_free=2.8, wall=8598
2021-06-18 21:02:14 | INFO | train_inner | epoch 001: 775 / 3002 loss=3.196, ppl=9.17, wps=5959.1, ups=0.09, wpb=64759, bsz=128, num_updates=764, lr=7.64e-05, gnorm=2.749, loss_scale=1, train_wall=10, gb_free=2.8, wall=8609
2021-06-18 21:02:26 | INFO | train_inner | epoch 001: 776 / 3002 loss=3.22, ppl=9.32, wps=5767.1, ups=0.09, wpb=64747, bsz=128, num_updates=765, lr=7.65e-05, gnorm=2.804, loss_scale=1, train_wall=11, gb_free=2.8, wall=8620
2021-06-18 21:02:37 | INFO | train_inner | epoch 001: 777 / 3002 loss=3.152, ppl=8.89, wps=5887, ups=0.09, wpb=64786, bsz=128, num_updates=766, lr=7.66e-05, gnorm=2.632, loss_scale=1, train_wall=11, gb_free=2.8, wall=8631
2021-06-18 21:02:48 | INFO | train_inner | epoch 001: 778 / 3002 loss=3.16, ppl=8.94, wps=5862.3, ups=0.09, wpb=64744, bsz=128, num_updates=767, lr=7.67e-05, gnorm=2.695, loss_scale=1, train_wall=11, gb_free=2.8, wall=8642
2021-06-18 21:02:58 | INFO | train_inner | epoch 001: 779 / 3002 loss=3.331, ppl=10.06, wps=6078.5, ups=0.09, wpb=64822, bsz=128, num_updates=768, lr=7.68e-05, gnorm=3.652, loss_scale=1, train_wall=10, gb_free=2.8, wall=8653
2021-06-18 21:03:10 | INFO | train_inner | epoch 001: 780 / 3002 loss=3.079, ppl=8.45, wps=5805.3, ups=0.09, wpb=64830, bsz=128, num_updates=769, lr=7.69e-05, gnorm=2.885, loss_scale=1, train_wall=11, gb_free=2.8, wall=8664
2021-06-18 21:03:21 | INFO | train_inner | epoch 001: 781 / 3002 loss=3.312, ppl=9.93, wps=5765, ups=0.09, wpb=64769, bsz=128, num_updates=770, lr=7.7e-05, gnorm=2.863, loss_scale=1, train_wall=11, gb_free=2.8, wall=8675
2021-06-18 21:03:32 | INFO | train_inner | epoch 001: 782 / 3002 loss=3.203, ppl=9.21, wps=5877.3, ups=0.09, wpb=64836, bsz=128, num_updates=771, lr=7.71e-05, gnorm=2.724, loss_scale=1, train_wall=11, gb_free=2.8, wall=8686
2021-06-18 21:03:43 | INFO | train_inner | epoch 001: 783 / 3002 loss=3.034, ppl=8.19, wps=5862.7, ups=0.09, wpb=64886, bsz=128, num_updates=772, lr=7.72e-05, gnorm=2.724, loss_scale=1, train_wall=11, gb_free=2.8, wall=8697
2021-06-18 21:03:54 | INFO | train_inner | epoch 001: 784 / 3002 loss=3.165, ppl=8.97, wps=5776.4, ups=0.09, wpb=64769, bsz=128, num_updates=773, lr=7.73e-05, gnorm=2.661, loss_scale=1, train_wall=11, gb_free=2.8, wall=8708
2021-06-18 21:04:05 | INFO | train_inner | epoch 001: 785 / 3002 loss=3.127, ppl=8.74, wps=5952.2, ups=0.09, wpb=64848, bsz=128, num_updates=774, lr=7.74e-05, gnorm=4.675, loss_scale=1, train_wall=10, gb_free=2.8, wall=8719
2021-06-18 21:04:16 | INFO | train_inner | epoch 001: 786 / 3002 loss=3.279, ppl=9.71, wps=5835.4, ups=0.09, wpb=64851, bsz=128, num_updates=775, lr=7.75e-05, gnorm=2.86, loss_scale=1, train_wall=11, gb_free=2.8, wall=8730
2021-06-18 21:04:27 | INFO | train_inner | epoch 001: 787 / 3002 loss=3.324, ppl=10.01, wps=5918.9, ups=0.09, wpb=64806, bsz=128, num_updates=776, lr=7.76e-05, gnorm=2.804, loss_scale=1, train_wall=10, gb_free=2.8, wall=8741
2021-06-18 21:04:38 | INFO | train_inner | epoch 001: 788 / 3002 loss=3.191, ppl=9.13, wps=5960.9, ups=0.09, wpb=64828, bsz=128, num_updates=777, lr=7.77e-05, gnorm=2.895, loss_scale=1, train_wall=10, gb_free=2.8, wall=8752
2021-06-18 21:04:49 | INFO | train_inner | epoch 001: 789 / 3002 loss=3.261, ppl=9.59, wps=5831.9, ups=0.09, wpb=64762, bsz=128, num_updates=778, lr=7.78e-05, gnorm=2.844, loss_scale=1, train_wall=11, gb_free=2.8, wall=8763
2021-06-18 21:05:00 | INFO | train_inner | epoch 001: 790 / 3002 loss=3.169, ppl=8.99, wps=5768.9, ups=0.09, wpb=64794, bsz=128, num_updates=779, lr=7.79e-05, gnorm=2.751, loss_scale=1, train_wall=11, gb_free=2.8, wall=8775
2021-06-18 21:05:11 | INFO | train_inner | epoch 001: 791 / 3002 loss=3.342, ppl=10.14, wps=5882.5, ups=0.09, wpb=64814, bsz=128, num_updates=780, lr=7.8e-05, gnorm=2.762, loss_scale=1, train_wall=11, gb_free=2.8, wall=8786
2021-06-18 21:05:22 | INFO | train_inner | epoch 001: 792 / 3002 loss=3.255, ppl=9.55, wps=5844.1, ups=0.09, wpb=64775, bsz=128, num_updates=781, lr=7.81e-05, gnorm=11.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=8797
2021-06-18 21:05:33 | INFO | train_inner | epoch 001: 793 / 3002 loss=3.044, ppl=8.25, wps=5936.2, ups=0.09, wpb=64849, bsz=128, num_updates=782, lr=7.82e-05, gnorm=2.782, loss_scale=1, train_wall=10, gb_free=2.8, wall=8808
2021-06-18 21:05:44 | INFO | train_inner | epoch 001: 794 / 3002 loss=3.144, ppl=8.84, wps=5885.4, ups=0.09, wpb=64829, bsz=128, num_updates=783, lr=7.83e-05, gnorm=2.671, loss_scale=1, train_wall=11, gb_free=2.8, wall=8819
2021-06-18 21:05:55 | INFO | train_inner | epoch 001: 795 / 3002 loss=3.189, ppl=9.12, wps=5816.5, ups=0.09, wpb=64800, bsz=128, num_updates=784, lr=7.84e-05, gnorm=2.812, loss_scale=1, train_wall=11, gb_free=2.8, wall=8830
2021-06-18 21:06:06 | INFO | train_inner | epoch 001: 796 / 3002 loss=3.034, ppl=8.19, wps=5898.3, ups=0.09, wpb=64843, bsz=128, num_updates=785, lr=7.85e-05, gnorm=2.771, loss_scale=1, train_wall=11, gb_free=2.8, wall=8841
2021-06-18 21:06:17 | INFO | train_inner | epoch 001: 797 / 3002 loss=3.215, ppl=9.29, wps=5889.9, ups=0.09, wpb=64856, bsz=128, num_updates=786, lr=7.86e-05, gnorm=3.548, loss_scale=1, train_wall=11, gb_free=2.8, wall=8852
2021-06-18 21:06:28 | INFO | train_inner | epoch 001: 798 / 3002 loss=3.161, ppl=8.94, wps=5941.9, ups=0.09, wpb=64890, bsz=128, num_updates=787, lr=7.87e-05, gnorm=2.729, loss_scale=1, train_wall=10, gb_free=2.8, wall=8863
2021-06-18 21:06:39 | INFO | train_inner | epoch 001: 799 / 3002 loss=3.195, ppl=9.16, wps=5849.4, ups=0.09, wpb=64769, bsz=128, num_updates=788, lr=7.88e-05, gnorm=3.249, loss_scale=1, train_wall=11, gb_free=2.8, wall=8874
2021-06-18 21:06:51 | INFO | train_inner | epoch 001: 800 / 3002 loss=3.26, ppl=9.58, wps=5864.4, ups=0.09, wpb=64828, bsz=128, num_updates=789, lr=7.89e-05, gnorm=2.79, loss_scale=1, train_wall=11, gb_free=2.8, wall=8885
2021-06-18 21:07:01 | INFO | train_inner | epoch 001: 801 / 3002 loss=3.18, ppl=9.06, wps=5924.5, ups=0.09, wpb=64769, bsz=128, num_updates=790, lr=7.9e-05, gnorm=2.784, loss_scale=1, train_wall=10, gb_free=2.8, wall=8896
2021-06-18 21:07:12 | INFO | train_inner | epoch 001: 802 / 3002 loss=3.198, ppl=9.18, wps=5886.9, ups=0.09, wpb=64857, bsz=128, num_updates=791, lr=7.91e-05, gnorm=2.733, loss_scale=1, train_wall=11, gb_free=2.8, wall=8907
2021-06-18 21:07:23 | INFO | train_inner | epoch 001: 803 / 3002 loss=3.109, ppl=8.63, wps=5925.7, ups=0.09, wpb=64895, bsz=128, num_updates=792, lr=7.92e-05, gnorm=2.715, loss_scale=1, train_wall=10, gb_free=2.8, wall=8918
2021-06-18 21:07:34 | INFO | train_inner | epoch 001: 804 / 3002 loss=3.279, ppl=9.71, wps=5887.8, ups=0.09, wpb=64782, bsz=128, num_updates=793, lr=7.93e-05, gnorm=2.885, loss_scale=1, train_wall=11, gb_free=2.8, wall=8929
2021-06-18 21:07:45 | INFO | train_inner | epoch 001: 805 / 3002 loss=2.98, ppl=7.89, wps=5863.8, ups=0.09, wpb=64906, bsz=128, num_updates=794, lr=7.94e-05, gnorm=2.762, loss_scale=1, train_wall=11, gb_free=2.8, wall=8940
2021-06-18 21:07:57 | INFO | train_inner | epoch 001: 806 / 3002 loss=2.944, ppl=7.7, wps=5876.8, ups=0.09, wpb=64839, bsz=128, num_updates=795, lr=7.95e-05, gnorm=2.859, loss_scale=1, train_wall=11, gb_free=2.8, wall=8951
2021-06-18 21:08:08 | INFO | train_inner | epoch 001: 807 / 3002 loss=3.179, ppl=9.06, wps=5865.5, ups=0.09, wpb=64848, bsz=128, num_updates=796, lr=7.96e-05, gnorm=2.744, loss_scale=1, train_wall=11, gb_free=2.8, wall=8962
2021-06-18 21:08:19 | INFO | train_inner | epoch 001: 808 / 3002 loss=3.167, ppl=8.98, wps=5857, ups=0.09, wpb=64833, bsz=128, num_updates=797, lr=7.97e-05, gnorm=2.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=8973
2021-06-18 21:08:30 | INFO | train_inner | epoch 001: 809 / 3002 loss=3.111, ppl=8.64, wps=5810.6, ups=0.09, wpb=64835, bsz=128, num_updates=798, lr=7.98e-05, gnorm=2.721, loss_scale=1, train_wall=11, gb_free=2.8, wall=8984
2021-06-18 21:08:41 | INFO | train_inner | epoch 001: 810 / 3002 loss=3.018, ppl=8.1, wps=5876.8, ups=0.09, wpb=64808, bsz=128, num_updates=799, lr=7.99e-05, gnorm=2.747, loss_scale=1, train_wall=11, gb_free=2.8, wall=8995
2021-06-18 21:08:52 | INFO | train_inner | epoch 001: 811 / 3002 loss=3.114, ppl=8.66, wps=6001.2, ups=0.09, wpb=64896, bsz=128, num_updates=800, lr=8e-05, gnorm=3.18, loss_scale=1, train_wall=10, gb_free=2.8, wall=9006
2021-06-18 21:09:03 | INFO | train_inner | epoch 001: 812 / 3002 loss=3.055, ppl=8.31, wps=5930.6, ups=0.09, wpb=64817, bsz=128, num_updates=801, lr=8.01e-05, gnorm=2.888, loss_scale=1, train_wall=10, gb_free=2.8, wall=9017
2021-06-18 21:09:14 | INFO | train_inner | epoch 001: 813 / 3002 loss=3.108, ppl=8.62, wps=5803.4, ups=0.09, wpb=64762, bsz=128, num_updates=802, lr=8.02e-05, gnorm=2.641, loss_scale=1, train_wall=11, gb_free=2.8, wall=9028
2021-06-18 21:09:25 | INFO | train_inner | epoch 001: 814 / 3002 loss=3.007, ppl=8.04, wps=5789, ups=0.09, wpb=64876, bsz=128, num_updates=803, lr=8.03e-05, gnorm=2.78, loss_scale=1, train_wall=11, gb_free=2.8, wall=9039
2021-06-18 21:09:36 | INFO | train_inner | epoch 001: 815 / 3002 loss=3.182, ppl=9.08, wps=5906.7, ups=0.09, wpb=64780, bsz=128, num_updates=804, lr=8.04e-05, gnorm=2.987, loss_scale=1, train_wall=11, gb_free=2.8, wall=9050
2021-06-18 21:09:47 | INFO | train_inner | epoch 001: 816 / 3002 loss=3.158, ppl=8.93, wps=5851.8, ups=0.09, wpb=64754, bsz=128, num_updates=805, lr=8.05e-05, gnorm=2.91, loss_scale=1, train_wall=11, gb_free=2.8, wall=9061
2021-06-18 21:09:58 | INFO | train_inner | epoch 001: 817 / 3002 loss=3.139, ppl=8.81, wps=5822.2, ups=0.09, wpb=64869, bsz=128, num_updates=806, lr=8.06e-05, gnorm=2.696, loss_scale=1, train_wall=11, gb_free=2.8, wall=9072
2021-06-18 21:10:09 | INFO | train_inner | epoch 001: 818 / 3002 loss=3.08, ppl=8.46, wps=5908.3, ups=0.09, wpb=64766, bsz=128, num_updates=807, lr=8.07e-05, gnorm=2.742, loss_scale=1, train_wall=11, gb_free=2.8, wall=9083
2021-06-18 21:10:20 | INFO | train_inner | epoch 001: 819 / 3002 loss=3.24, ppl=9.45, wps=5859.6, ups=0.09, wpb=64724, bsz=128, num_updates=808, lr=8.08e-05, gnorm=2.821, loss_scale=1, train_wall=11, gb_free=2.8, wall=9094
2021-06-18 21:10:31 | INFO | train_inner | epoch 001: 820 / 3002 loss=3.092, ppl=8.53, wps=5913.5, ups=0.09, wpb=64881, bsz=128, num_updates=809, lr=8.09e-05, gnorm=2.677, loss_scale=1, train_wall=11, gb_free=2.8, wall=9105
2021-06-18 21:10:42 | INFO | train_inner | epoch 001: 821 / 3002 loss=3.242, ppl=9.46, wps=5932.7, ups=0.09, wpb=64770, bsz=128, num_updates=810, lr=8.1e-05, gnorm=2.733, loss_scale=1, train_wall=10, gb_free=2.8, wall=9116
2021-06-18 21:10:53 | INFO | train_inner | epoch 001: 822 / 3002 loss=3.008, ppl=8.04, wps=5836.9, ups=0.09, wpb=64877, bsz=128, num_updates=811, lr=8.11e-05, gnorm=2.882, loss_scale=1, train_wall=11, gb_free=2.8, wall=9127
2021-06-18 21:11:04 | INFO | train_inner | epoch 001: 823 / 3002 loss=3.284, ppl=9.74, wps=5997.1, ups=0.09, wpb=64818, bsz=128, num_updates=812, lr=8.12e-05, gnorm=3.021, loss_scale=1, train_wall=10, gb_free=2.8, wall=9138
2021-06-18 21:11:15 | INFO | train_inner | epoch 001: 824 / 3002 loss=3.135, ppl=8.78, wps=5808.7, ups=0.09, wpb=64898, bsz=128, num_updates=813, lr=8.13e-05, gnorm=2.811, loss_scale=1, train_wall=11, gb_free=2.8, wall=9149
2021-06-18 21:11:27 | INFO | train_inner | epoch 001: 825 / 3002 loss=3.247, ppl=9.49, wps=5640.3, ups=0.09, wpb=64760, bsz=128, num_updates=814, lr=8.14e-05, gnorm=5.567, loss_scale=1, train_wall=11, gb_free=2.8, wall=9161
2021-06-18 21:11:38 | INFO | train_inner | epoch 001: 826 / 3002 loss=2.953, ppl=7.74, wps=5931.3, ups=0.09, wpb=64822, bsz=128, num_updates=815, lr=8.15e-05, gnorm=2.872, loss_scale=1, train_wall=10, gb_free=2.8, wall=9172
2021-06-18 21:11:49 | INFO | train_inner | epoch 001: 827 / 3002 loss=3.279, ppl=9.71, wps=5815.4, ups=0.09, wpb=64827, bsz=128, num_updates=816, lr=8.16e-05, gnorm=2.802, loss_scale=1, train_wall=11, gb_free=2.8, wall=9183
2021-06-18 21:12:00 | INFO | train_inner | epoch 001: 828 / 3002 loss=3.152, ppl=8.89, wps=5813.3, ups=0.09, wpb=64884, bsz=128, num_updates=817, lr=8.17e-05, gnorm=2.747, loss_scale=1, train_wall=11, gb_free=2.8, wall=9194
2021-06-18 21:12:11 | INFO | train_inner | epoch 001: 829 / 3002 loss=3.121, ppl=8.7, wps=5840.9, ups=0.09, wpb=64755, bsz=128, num_updates=818, lr=8.18e-05, gnorm=3.632, loss_scale=1, train_wall=11, gb_free=2.8, wall=9205
2021-06-18 21:12:22 | INFO | train_inner | epoch 001: 830 / 3002 loss=3.162, ppl=8.95, wps=5813, ups=0.09, wpb=64863, bsz=128, num_updates=819, lr=8.19e-05, gnorm=2.943, loss_scale=1, train_wall=11, gb_free=2.8, wall=9216
2021-06-18 21:12:33 | INFO | train_inner | epoch 001: 831 / 3002 loss=3.182, ppl=9.07, wps=5896.2, ups=0.09, wpb=64778, bsz=128, num_updates=820, lr=8.2e-05, gnorm=2.788, loss_scale=1, train_wall=11, gb_free=2.8, wall=9227
2021-06-18 21:12:44 | INFO | train_inner | epoch 001: 832 / 3002 loss=3.216, ppl=9.29, wps=6019.2, ups=0.09, wpb=64903, bsz=128, num_updates=821, lr=8.21e-05, gnorm=2.718, loss_scale=1, train_wall=10, gb_free=2.8, wall=9238
2021-06-18 21:12:55 | INFO | train_inner | epoch 001: 833 / 3002 loss=3.087, ppl=8.5, wps=6021.6, ups=0.09, wpb=64892, bsz=128, num_updates=822, lr=8.22e-05, gnorm=2.864, loss_scale=1, train_wall=10, gb_free=2.8, wall=9249
2021-06-18 21:13:06 | INFO | train_inner | epoch 001: 834 / 3002 loss=3.271, ppl=9.66, wps=5802.9, ups=0.09, wpb=64875, bsz=128, num_updates=823, lr=8.23e-05, gnorm=2.944, loss_scale=1, train_wall=11, gb_free=2.8, wall=9260
2021-06-18 21:13:17 | INFO | train_inner | epoch 001: 835 / 3002 loss=3.193, ppl=9.15, wps=5765.8, ups=0.09, wpb=64784, bsz=128, num_updates=824, lr=8.24e-05, gnorm=2.73, loss_scale=1, train_wall=11, gb_free=2.8, wall=9271
2021-06-18 21:13:28 | INFO | train_inner | epoch 001: 836 / 3002 loss=3.159, ppl=8.93, wps=5823.7, ups=0.09, wpb=64837, bsz=128, num_updates=825, lr=8.25e-05, gnorm=2.599, loss_scale=1, train_wall=11, gb_free=2.8, wall=9283
2021-06-18 21:13:39 | INFO | train_inner | epoch 001: 837 / 3002 loss=2.895, ppl=7.44, wps=5857, ups=0.09, wpb=64809, bsz=128, num_updates=826, lr=8.26e-05, gnorm=2.75, loss_scale=1, train_wall=11, gb_free=2.8, wall=9294
2021-06-18 21:13:50 | INFO | train_inner | epoch 001: 838 / 3002 loss=3.139, ppl=8.81, wps=5856.8, ups=0.09, wpb=64781, bsz=128, num_updates=827, lr=8.27e-05, gnorm=2.766, loss_scale=1, train_wall=11, gb_free=2.8, wall=9305
2021-06-18 21:14:01 | INFO | train_inner | epoch 001: 839 / 3002 loss=3.184, ppl=9.09, wps=5803.3, ups=0.09, wpb=64902, bsz=128, num_updates=828, lr=8.28e-05, gnorm=2.835, loss_scale=1, train_wall=11, gb_free=2.8, wall=9316
2021-06-18 21:14:12 | INFO | train_inner | epoch 001: 840 / 3002 loss=3.097, ppl=8.55, wps=5918, ups=0.09, wpb=64890, bsz=128, num_updates=829, lr=8.29e-05, gnorm=2.672, loss_scale=1, train_wall=11, gb_free=2.8, wall=9327
2021-06-18 21:14:23 | INFO | train_inner | epoch 001: 841 / 3002 loss=3.16, ppl=8.94, wps=5889.3, ups=0.09, wpb=64863, bsz=128, num_updates=830, lr=8.3e-05, gnorm=2.727, loss_scale=1, train_wall=11, gb_free=2.8, wall=9338
2021-06-18 21:14:34 | INFO | train_inner | epoch 001: 842 / 3002 loss=3.16, ppl=8.94, wps=5929.2, ups=0.09, wpb=64751, bsz=128, num_updates=831, lr=8.31e-05, gnorm=2.671, loss_scale=1, train_wall=10, gb_free=2.8, wall=9349
2021-06-18 21:14:45 | INFO | train_inner | epoch 001: 843 / 3002 loss=3.235, ppl=9.42, wps=5939.8, ups=0.09, wpb=64794, bsz=128, num_updates=832, lr=8.32e-05, gnorm=2.799, loss_scale=1, train_wall=10, gb_free=2.8, wall=9360
2021-06-18 21:14:56 | INFO | train_inner | epoch 001: 844 / 3002 loss=3.09, ppl=8.52, wps=5867.6, ups=0.09, wpb=64857, bsz=128, num_updates=833, lr=8.33e-05, gnorm=2.786, loss_scale=1, train_wall=11, gb_free=2.8, wall=9371
2021-06-18 21:15:07 | INFO | train_inner | epoch 001: 845 / 3002 loss=3.305, ppl=9.89, wps=5937.9, ups=0.09, wpb=64765, bsz=128, num_updates=834, lr=8.34e-05, gnorm=2.991, loss_scale=1, train_wall=10, gb_free=2.8, wall=9382
2021-06-18 21:15:19 | INFO | train_inner | epoch 001: 846 / 3002 loss=2.988, ppl=7.93, wps=5730.9, ups=0.09, wpb=64790, bsz=128, num_updates=835, lr=8.35e-05, gnorm=3.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=9393
2021-06-18 21:15:30 | INFO | train_inner | epoch 001: 847 / 3002 loss=3.122, ppl=8.7, wps=5910, ups=0.09, wpb=64857, bsz=128, num_updates=836, lr=8.36e-05, gnorm=2.791, loss_scale=1, train_wall=10, gb_free=2.8, wall=9404
2021-06-18 21:15:41 | INFO | train_inner | epoch 001: 848 / 3002 loss=3.051, ppl=8.29, wps=5864.3, ups=0.09, wpb=64849, bsz=128, num_updates=837, lr=8.37e-05, gnorm=2.681, loss_scale=1, train_wall=11, gb_free=2.8, wall=9415
2021-06-18 21:15:51 | INFO | train_inner | epoch 001: 849 / 3002 loss=3.2, ppl=9.19, wps=5975.5, ups=0.09, wpb=64853, bsz=128, num_updates=838, lr=8.38e-05, gnorm=2.65, loss_scale=1, train_wall=10, gb_free=2.8, wall=9426
2021-06-18 21:16:02 | INFO | train_inner | epoch 001: 850 / 3002 loss=3.057, ppl=8.32, wps=5908, ups=0.09, wpb=64806, bsz=128, num_updates=839, lr=8.39e-05, gnorm=2.596, loss_scale=1, train_wall=11, gb_free=2.8, wall=9437
2021-06-18 21:16:13 | INFO | train_inner | epoch 001: 851 / 3002 loss=2.903, ppl=7.48, wps=5911.1, ups=0.09, wpb=64757, bsz=128, num_updates=840, lr=8.4e-05, gnorm=2.783, loss_scale=1, train_wall=11, gb_free=2.8, wall=9448
2021-06-18 21:16:24 | INFO | train_inner | epoch 001: 852 / 3002 loss=3.042, ppl=8.24, wps=5927.9, ups=0.09, wpb=64825, bsz=128, num_updates=841, lr=8.41e-05, gnorm=2.627, loss_scale=1, train_wall=11, gb_free=2.8, wall=9459
2021-06-18 21:16:35 | INFO | train_inner | epoch 001: 853 / 3002 loss=3.194, ppl=9.15, wps=5946.4, ups=0.09, wpb=64754, bsz=128, num_updates=842, lr=8.42e-05, gnorm=2.871, loss_scale=1, train_wall=10, gb_free=2.8, wall=9470
2021-06-18 21:16:46 | INFO | train_inner | epoch 001: 854 / 3002 loss=3.053, ppl=8.3, wps=5853.5, ups=0.09, wpb=64812, bsz=128, num_updates=843, lr=8.43e-05, gnorm=2.636, loss_scale=1, train_wall=11, gb_free=2.8, wall=9481
2021-06-18 21:16:57 | INFO | train_inner | epoch 001: 855 / 3002 loss=3.035, ppl=8.2, wps=5787, ups=0.09, wpb=64800, bsz=128, num_updates=844, lr=8.44e-05, gnorm=2.609, loss_scale=1, train_wall=11, gb_free=2.8, wall=9492
2021-06-18 21:17:08 | INFO | train_inner | epoch 001: 856 / 3002 loss=3.151, ppl=8.88, wps=5948.5, ups=0.09, wpb=64806, bsz=128, num_updates=845, lr=8.45e-05, gnorm=2.629, loss_scale=1, train_wall=10, gb_free=2.8, wall=9503
2021-06-18 21:17:19 | INFO | train_inner | epoch 001: 857 / 3002 loss=3.058, ppl=8.33, wps=5885.3, ups=0.09, wpb=64861, bsz=128, num_updates=846, lr=8.46e-05, gnorm=2.768, loss_scale=1, train_wall=11, gb_free=2.8, wall=9514
2021-06-18 21:17:30 | INFO | train_inner | epoch 001: 858 / 3002 loss=3.149, ppl=8.87, wps=5843.8, ups=0.09, wpb=64805, bsz=128, num_updates=847, lr=8.47e-05, gnorm=2.815, loss_scale=1, train_wall=11, gb_free=2.8, wall=9525
2021-06-18 21:17:42 | INFO | train_inner | epoch 001: 859 / 3002 loss=3.306, ppl=9.89, wps=5765, ups=0.09, wpb=64777, bsz=128, num_updates=848, lr=8.48e-05, gnorm=3.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=9536
2021-06-18 21:17:53 | INFO | train_inner | epoch 001: 860 / 3002 loss=3.176, ppl=9.04, wps=5857.2, ups=0.09, wpb=64774, bsz=128, num_updates=849, lr=8.49e-05, gnorm=2.618, loss_scale=1, train_wall=11, gb_free=2.8, wall=9547
2021-06-18 21:18:04 | INFO | train_inner | epoch 001: 861 / 3002 loss=3.012, ppl=8.07, wps=5852.8, ups=0.09, wpb=64872, bsz=128, num_updates=850, lr=8.5e-05, gnorm=2.719, loss_scale=1, train_wall=11, gb_free=2.8, wall=9558
2021-06-18 21:18:15 | INFO | train_inner | epoch 001: 862 / 3002 loss=3.261, ppl=9.59, wps=5814.8, ups=0.09, wpb=64807, bsz=128, num_updates=851, lr=8.51e-05, gnorm=2.658, loss_scale=1, train_wall=11, gb_free=2.8, wall=9569
2021-06-18 21:18:26 | INFO | train_inner | epoch 001: 863 / 3002 loss=3.234, ppl=9.41, wps=5846.5, ups=0.09, wpb=64857, bsz=128, num_updates=852, lr=8.52e-05, gnorm=4.327, loss_scale=1, train_wall=11, gb_free=2.8, wall=9580
2021-06-18 21:18:37 | INFO | train_inner | epoch 001: 864 / 3002 loss=3.353, ppl=10.22, wps=5971.1, ups=0.09, wpb=64752, bsz=128, num_updates=853, lr=8.53e-05, gnorm=2.888, loss_scale=1, train_wall=10, gb_free=2.8, wall=9591
2021-06-18 21:18:48 | INFO | train_inner | epoch 001: 865 / 3002 loss=3.093, ppl=8.53, wps=5708.5, ups=0.09, wpb=64831, bsz=128, num_updates=854, lr=8.54e-05, gnorm=2.739, loss_scale=1, train_wall=11, gb_free=2.8, wall=9603
2021-06-18 21:19:00 | INFO | train_inner | epoch 001: 866 / 3002 loss=3.23, ppl=9.38, wps=5776.6, ups=0.09, wpb=64751, bsz=128, num_updates=855, lr=8.55e-05, gnorm=2.882, loss_scale=1, train_wall=11, gb_free=2.8, wall=9614
2021-06-18 21:19:10 | INFO | train_inner | epoch 001: 867 / 3002 loss=3.249, ppl=9.51, wps=5970.4, ups=0.09, wpb=64796, bsz=128, num_updates=856, lr=8.56e-05, gnorm=2.827, loss_scale=1, train_wall=10, gb_free=2.8, wall=9625
2021-06-18 21:19:21 | INFO | train_inner | epoch 001: 868 / 3002 loss=3.22, ppl=9.32, wps=5822.1, ups=0.09, wpb=64882, bsz=128, num_updates=857, lr=8.57e-05, gnorm=2.581, loss_scale=1, train_wall=11, gb_free=2.8, wall=9636
2021-06-18 21:19:33 | INFO | train_inner | epoch 001: 869 / 3002 loss=3.184, ppl=9.09, wps=5819.2, ups=0.09, wpb=64859, bsz=128, num_updates=858, lr=8.58e-05, gnorm=2.751, loss_scale=1, train_wall=11, gb_free=2.8, wall=9647
2021-06-18 21:19:44 | INFO | train_inner | epoch 001: 870 / 3002 loss=3.13, ppl=8.75, wps=5854.4, ups=0.09, wpb=64851, bsz=128, num_updates=859, lr=8.59e-05, gnorm=2.663, loss_scale=1, train_wall=11, gb_free=2.8, wall=9658
2021-06-18 21:19:55 | INFO | train_inner | epoch 001: 871 / 3002 loss=3.115, ppl=8.66, wps=5877.7, ups=0.09, wpb=64858, bsz=128, num_updates=860, lr=8.6e-05, gnorm=2.856, loss_scale=1, train_wall=11, gb_free=2.8, wall=9669
2021-06-18 21:20:06 | INFO | train_inner | epoch 001: 872 / 3002 loss=2.993, ppl=7.96, wps=5922.8, ups=0.09, wpb=64755, bsz=128, num_updates=861, lr=8.61e-05, gnorm=2.836, loss_scale=1, train_wall=10, gb_free=2.8, wall=9680
2021-06-18 21:20:17 | INFO | train_inner | epoch 001: 873 / 3002 loss=3.074, ppl=8.42, wps=5827.5, ups=0.09, wpb=64824, bsz=128, num_updates=862, lr=8.62e-05, gnorm=2.752, loss_scale=1, train_wall=11, gb_free=2.8, wall=9691
2021-06-18 21:20:28 | INFO | train_inner | epoch 001: 874 / 3002 loss=3.055, ppl=8.31, wps=5820.7, ups=0.09, wpb=64937, bsz=128, num_updates=863, lr=8.63e-05, gnorm=2.642, loss_scale=1, train_wall=11, gb_free=2.8, wall=9702
2021-06-18 21:20:39 | INFO | train_inner | epoch 001: 875 / 3002 loss=3.029, ppl=8.16, wps=5806.8, ups=0.09, wpb=64863, bsz=128, num_updates=864, lr=8.64e-05, gnorm=2.728, loss_scale=1, train_wall=11, gb_free=2.8, wall=9713
2021-06-18 21:20:50 | INFO | train_inner | epoch 001: 876 / 3002 loss=3.175, ppl=9.03, wps=5906.9, ups=0.09, wpb=64874, bsz=128, num_updates=865, lr=8.65e-05, gnorm=2.7, loss_scale=1, train_wall=11, gb_free=2.8, wall=9724
2021-06-18 21:21:01 | INFO | train_inner | epoch 001: 877 / 3002 loss=3.225, ppl=9.35, wps=5728.4, ups=0.09, wpb=64868, bsz=128, num_updates=866, lr=8.66e-05, gnorm=3.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=9736
2021-06-18 21:21:13 | INFO | train_inner | epoch 001: 878 / 3002 loss=3.156, ppl=8.91, wps=5755, ups=0.09, wpb=64773, bsz=128, num_updates=867, lr=8.67e-05, gnorm=2.645, loss_scale=1, train_wall=11, gb_free=2.8, wall=9747
2021-06-18 21:21:24 | INFO | train_inner | epoch 001: 879 / 3002 loss=3.027, ppl=8.15, wps=5861.4, ups=0.09, wpb=64883, bsz=128, num_updates=868, lr=8.68e-05, gnorm=2.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=9758
2021-06-18 21:21:35 | INFO | train_inner | epoch 001: 880 / 3002 loss=3.156, ppl=8.91, wps=5901.1, ups=0.09, wpb=64815, bsz=128, num_updates=869, lr=8.69e-05, gnorm=2.81, loss_scale=1, train_wall=11, gb_free=2.8, wall=9769
2021-06-18 21:21:46 | INFO | train_inner | epoch 001: 881 / 3002 loss=2.964, ppl=7.8, wps=5829.3, ups=0.09, wpb=64805, bsz=128, num_updates=870, lr=8.7e-05, gnorm=2.689, loss_scale=1, train_wall=11, gb_free=2.8, wall=9780
2021-06-18 21:21:57 | INFO | train_inner | epoch 001: 882 / 3002 loss=3.105, ppl=8.6, wps=5869.8, ups=0.09, wpb=64816, bsz=128, num_updates=871, lr=8.71e-05, gnorm=2.775, loss_scale=1, train_wall=11, gb_free=2.8, wall=9791
2021-06-18 21:22:08 | INFO | train_inner | epoch 001: 883 / 3002 loss=3.24, ppl=9.45, wps=5787.8, ups=0.09, wpb=64825, bsz=128, num_updates=872, lr=8.72e-05, gnorm=2.9, loss_scale=1, train_wall=11, gb_free=2.8, wall=9802
2021-06-18 21:22:19 | INFO | train_inner | epoch 001: 884 / 3002 loss=3.111, ppl=8.64, wps=5866.8, ups=0.09, wpb=64797, bsz=128, num_updates=873, lr=8.73e-05, gnorm=2.857, loss_scale=1, train_wall=11, gb_free=2.8, wall=9813
2021-06-18 21:22:30 | INFO | train_inner | epoch 001: 885 / 3002 loss=3.275, ppl=9.68, wps=5742.7, ups=0.09, wpb=64835, bsz=128, num_updates=874, lr=8.74e-05, gnorm=2.788, loss_scale=1, train_wall=11, gb_free=2.8, wall=9825
2021-06-18 21:22:41 | INFO | train_inner | epoch 001: 886 / 3002 loss=3.193, ppl=9.14, wps=5885.1, ups=0.09, wpb=64781, bsz=128, num_updates=875, lr=8.75e-05, gnorm=2.716, loss_scale=1, train_wall=11, gb_free=2.8, wall=9836
2021-06-18 21:22:52 | INFO | train_inner | epoch 001: 887 / 3002 loss=3.172, ppl=9.01, wps=5949.7, ups=0.09, wpb=64896, bsz=128, num_updates=876, lr=8.76e-05, gnorm=2.701, loss_scale=1, train_wall=10, gb_free=2.8, wall=9847
2021-06-18 21:23:03 | INFO | train_inner | epoch 001: 888 / 3002 loss=3.056, ppl=8.32, wps=5880.5, ups=0.09, wpb=64889, bsz=128, num_updates=877, lr=8.77e-05, gnorm=2.837, loss_scale=1, train_wall=11, gb_free=2.8, wall=9858
2021-06-18 21:23:14 | INFO | train_inner | epoch 001: 889 / 3002 loss=2.954, ppl=7.75, wps=5841.1, ups=0.09, wpb=64735, bsz=128, num_updates=878, lr=8.78e-05, gnorm=2.735, loss_scale=2, train_wall=11, gb_free=2.8, wall=9869
2021-06-18 21:23:26 | INFO | train_inner | epoch 001: 890 / 3002 loss=3.315, ppl=9.95, wps=5862, ups=0.09, wpb=64844, bsz=128, num_updates=879, lr=8.79e-05, gnorm=2.818, loss_scale=2, train_wall=11, gb_free=2.8, wall=9880
2021-06-18 21:23:37 | INFO | train_inner | epoch 001: 891 / 3002 loss=3.14, ppl=8.82, wps=5868, ups=0.09, wpb=64780, bsz=128, num_updates=880, lr=8.8e-05, gnorm=2.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=9891
2021-06-18 21:23:48 | INFO | train_inner | epoch 001: 892 / 3002 loss=3.036, ppl=8.2, wps=5889.1, ups=0.09, wpb=64859, bsz=128, num_updates=881, lr=8.81e-05, gnorm=2.709, loss_scale=2, train_wall=11, gb_free=2.8, wall=9902
2021-06-18 21:23:59 | INFO | train_inner | epoch 001: 893 / 3002 loss=3.004, ppl=8.02, wps=5844, ups=0.09, wpb=64850, bsz=128, num_updates=882, lr=8.82e-05, gnorm=2.767, loss_scale=2, train_wall=11, gb_free=2.8, wall=9913
2021-06-18 21:24:10 | INFO | train_inner | epoch 001: 894 / 3002 loss=3.028, ppl=8.15, wps=5930.8, ups=0.09, wpb=64832, bsz=128, num_updates=883, lr=8.83e-05, gnorm=8.107, loss_scale=2, train_wall=10, gb_free=2.8, wall=9924
2021-06-18 21:24:20 | INFO | train_inner | epoch 001: 895 / 3002 loss=3.231, ppl=9.39, wps=6002.3, ups=0.09, wpb=64853, bsz=128, num_updates=884, lr=8.84e-05, gnorm=2.654, loss_scale=2, train_wall=10, gb_free=2.8, wall=9935
2021-06-18 21:24:32 | INFO | train_inner | epoch 001: 896 / 3002 loss=3.145, ppl=8.84, wps=5840.2, ups=0.09, wpb=64791, bsz=128, num_updates=885, lr=8.85e-05, gnorm=2.7, loss_scale=2, train_wall=11, gb_free=2.8, wall=9946
2021-06-18 21:24:43 | INFO | train_inner | epoch 001: 897 / 3002 loss=3.135, ppl=8.78, wps=5902, ups=0.09, wpb=64769, bsz=128, num_updates=886, lr=8.86e-05, gnorm=2.57, loss_scale=2, train_wall=11, gb_free=2.8, wall=9957
2021-06-18 21:24:54 | INFO | train_inner | epoch 001: 898 / 3002 loss=3.073, ppl=8.42, wps=5846.8, ups=0.09, wpb=64853, bsz=128, num_updates=887, lr=8.87e-05, gnorm=2.656, loss_scale=2, train_wall=11, gb_free=2.8, wall=9968
2021-06-18 21:25:05 | INFO | train_inner | epoch 001: 899 / 3002 loss=3.105, ppl=8.6, wps=5848.7, ups=0.09, wpb=64827, bsz=128, num_updates=888, lr=8.88e-05, gnorm=2.768, loss_scale=2, train_wall=11, gb_free=2.8, wall=9979
2021-06-18 21:25:16 | INFO | train_inner | epoch 001: 900 / 3002 loss=3.252, ppl=9.53, wps=5819.1, ups=0.09, wpb=64850, bsz=128, num_updates=889, lr=8.89e-05, gnorm=2.86, loss_scale=2, train_wall=11, gb_free=2.8, wall=9990
2021-06-18 21:25:27 | INFO | train_inner | epoch 001: 901 / 3002 loss=3.131, ppl=8.76, wps=5964.5, ups=0.09, wpb=64834, bsz=128, num_updates=890, lr=8.9e-05, gnorm=2.683, loss_scale=2, train_wall=10, gb_free=2.8, wall=10001
2021-06-18 21:25:38 | INFO | train_inner | epoch 001: 902 / 3002 loss=3.191, ppl=9.13, wps=5835.4, ups=0.09, wpb=64811, bsz=128, num_updates=891, lr=8.91e-05, gnorm=2.726, loss_scale=2, train_wall=11, gb_free=2.8, wall=10012
2021-06-18 21:25:49 | INFO | train_inner | epoch 001: 903 / 3002 loss=3.319, ppl=9.98, wps=5887.7, ups=0.09, wpb=64806, bsz=128, num_updates=892, lr=8.92e-05, gnorm=2.747, loss_scale=2, train_wall=11, gb_free=2.8, wall=10023
2021-06-18 21:26:00 | INFO | train_inner | epoch 001: 904 / 3002 loss=3.134, ppl=8.78, wps=5846.9, ups=0.09, wpb=64821, bsz=128, num_updates=893, lr=8.93e-05, gnorm=2.684, loss_scale=2, train_wall=11, gb_free=2.8, wall=10034
2021-06-18 21:26:11 | INFO | train_inner | epoch 001: 905 / 3002 loss=2.981, ppl=7.9, wps=5771.1, ups=0.09, wpb=64886, bsz=128, num_updates=894, lr=8.94e-05, gnorm=2.743, loss_scale=2, train_wall=11, gb_free=2.8, wall=10045
2021-06-18 21:26:22 | INFO | train_inner | epoch 001: 906 / 3002 loss=3.085, ppl=8.49, wps=5892.9, ups=0.09, wpb=64828, bsz=128, num_updates=895, lr=8.95e-05, gnorm=3.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=10056
2021-06-18 21:26:33 | INFO | train_inner | epoch 001: 907 / 3002 loss=3.013, ppl=8.07, wps=5865, ups=0.09, wpb=64891, bsz=128, num_updates=896, lr=8.96e-05, gnorm=2.629, loss_scale=2, train_wall=11, gb_free=2.8, wall=10068
2021-06-18 21:26:44 | INFO | train_inner | epoch 001: 908 / 3002 loss=3.128, ppl=8.75, wps=5936.5, ups=0.09, wpb=64861, bsz=128, num_updates=897, lr=8.97e-05, gnorm=2.765, loss_scale=2, train_wall=10, gb_free=2.8, wall=10078
2021-06-18 21:26:55 | INFO | train_inner | epoch 001: 909 / 3002 loss=3.033, ppl=8.19, wps=5872.1, ups=0.09, wpb=64857, bsz=128, num_updates=898, lr=8.98e-05, gnorm=2.55, loss_scale=2, train_wall=11, gb_free=2.8, wall=10090
2021-06-18 21:27:06 | INFO | train_inner | epoch 001: 910 / 3002 loss=3.098, ppl=8.56, wps=5812.2, ups=0.09, wpb=64815, bsz=128, num_updates=899, lr=8.99e-05, gnorm=2.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=10101
2021-06-18 21:27:17 | INFO | train_inner | epoch 001: 911 / 3002 loss=2.946, ppl=7.71, wps=5875.9, ups=0.09, wpb=64809, bsz=128, num_updates=900, lr=9e-05, gnorm=2.722, loss_scale=2, train_wall=11, gb_free=2.8, wall=10112
2021-06-18 21:27:29 | INFO | train_inner | epoch 001: 912 / 3002 loss=3.186, ppl=9.1, wps=5806.5, ups=0.09, wpb=64825, bsz=128, num_updates=901, lr=9.01e-05, gnorm=2.771, loss_scale=2, train_wall=11, gb_free=2.8, wall=10123
2021-06-18 21:27:40 | INFO | train_inner | epoch 001: 913 / 3002 loss=3.1, ppl=8.58, wps=5869.1, ups=0.09, wpb=64860, bsz=128, num_updates=902, lr=9.02e-05, gnorm=2.722, loss_scale=2, train_wall=11, gb_free=2.8, wall=10134
2021-06-18 21:27:50 | INFO | train_inner | epoch 001: 914 / 3002 loss=3.218, ppl=9.31, wps=5966.3, ups=0.09, wpb=64781, bsz=128, num_updates=903, lr=9.03e-05, gnorm=2.775, loss_scale=2, train_wall=10, gb_free=2.8, wall=10145
2021-06-18 21:28:02 | INFO | train_inner | epoch 001: 915 / 3002 loss=3.352, ppl=10.21, wps=5814.7, ups=0.09, wpb=64834, bsz=128, num_updates=904, lr=9.04e-05, gnorm=2.766, loss_scale=2, train_wall=11, gb_free=2.8, wall=10156
2021-06-18 21:28:13 | INFO | train_inner | epoch 001: 916 / 3002 loss=2.982, ppl=7.9, wps=5852.9, ups=0.09, wpb=64858, bsz=128, num_updates=905, lr=9.05e-05, gnorm=2.598, loss_scale=2, train_wall=11, gb_free=2.8, wall=10167
2021-06-18 21:28:23 | INFO | train_inner | epoch 001: 917 / 3002 loss=2.982, ppl=7.9, wps=6001, ups=0.09, wpb=64851, bsz=128, num_updates=906, lr=9.06e-05, gnorm=2.757, loss_scale=2, train_wall=10, gb_free=2.8, wall=10178
2021-06-18 21:28:35 | INFO | train_inner | epoch 001: 918 / 3002 loss=3.188, ppl=9.11, wps=5872.1, ups=0.09, wpb=64847, bsz=128, num_updates=907, lr=9.07e-05, gnorm=2.931, loss_scale=2, train_wall=11, gb_free=2.8, wall=10189
2021-06-18 21:28:46 | INFO | train_inner | epoch 001: 919 / 3002 loss=3.139, ppl=8.81, wps=5839.6, ups=0.09, wpb=64869, bsz=128, num_updates=908, lr=9.08e-05, gnorm=2.946, loss_scale=2, train_wall=11, gb_free=2.8, wall=10200
2021-06-18 21:28:57 | INFO | train_inner | epoch 001: 920 / 3002 loss=3.196, ppl=9.16, wps=5841.8, ups=0.09, wpb=64787, bsz=128, num_updates=909, lr=9.09e-05, gnorm=2.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=10211
2021-06-18 21:29:08 | INFO | train_inner | epoch 001: 921 / 3002 loss=3.228, ppl=9.37, wps=5885.1, ups=0.09, wpb=64835, bsz=128, num_updates=910, lr=9.1e-05, gnorm=2.815, loss_scale=2, train_wall=11, gb_free=2.8, wall=10222
2021-06-18 21:29:19 | INFO | train_inner | epoch 001: 922 / 3002 loss=3.166, ppl=8.97, wps=5862.7, ups=0.09, wpb=64826, bsz=128, num_updates=911, lr=9.11e-05, gnorm=2.745, loss_scale=2, train_wall=11, gb_free=2.8, wall=10233
2021-06-18 21:29:30 | INFO | train_inner | epoch 001: 923 / 3002 loss=3.085, ppl=8.49, wps=5906.8, ups=0.09, wpb=64916, bsz=128, num_updates=912, lr=9.12e-05, gnorm=2.779, loss_scale=2, train_wall=11, gb_free=2.8, wall=10244
2021-06-18 21:29:41 | INFO | train_inner | epoch 001: 924 / 3002 loss=3.181, ppl=9.07, wps=5782.5, ups=0.09, wpb=64748, bsz=128, num_updates=913, lr=9.13e-05, gnorm=2.704, loss_scale=2, train_wall=11, gb_free=2.8, wall=10255
2021-06-18 21:29:52 | INFO | train_inner | epoch 001: 925 / 3002 loss=3.105, ppl=8.6, wps=5847.1, ups=0.09, wpb=64835, bsz=128, num_updates=914, lr=9.14e-05, gnorm=2.598, loss_scale=2, train_wall=11, gb_free=2.8, wall=10266
2021-06-18 21:30:03 | INFO | train_inner | epoch 001: 926 / 3002 loss=2.991, ppl=7.95, wps=5883, ups=0.09, wpb=64791, bsz=128, num_updates=915, lr=9.15e-05, gnorm=2.782, loss_scale=2, train_wall=11, gb_free=2.8, wall=10277
2021-06-18 21:30:14 | INFO | train_inner | epoch 001: 927 / 3002 loss=3, ppl=8, wps=5819.4, ups=0.09, wpb=64756, bsz=128, num_updates=916, lr=9.16e-05, gnorm=2.531, loss_scale=2, train_wall=11, gb_free=2.8, wall=10289
2021-06-18 21:30:25 | INFO | train_inner | epoch 001: 928 / 3002 loss=3.085, ppl=8.48, wps=5978.1, ups=0.09, wpb=64934, bsz=128, num_updates=917, lr=9.17e-05, gnorm=2.824, loss_scale=2, train_wall=10, gb_free=2.8, wall=10299
2021-06-18 21:30:36 | INFO | train_inner | epoch 001: 929 / 3002 loss=3.074, ppl=8.42, wps=5682, ups=0.09, wpb=64327, bsz=128, num_updates=918, lr=9.18e-05, gnorm=2.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=10311
2021-06-18 21:30:47 | INFO | train_inner | epoch 001: 930 / 3002 loss=3.306, ppl=9.89, wps=5835.6, ups=0.09, wpb=64772, bsz=128, num_updates=919, lr=9.19e-05, gnorm=2.888, loss_scale=2, train_wall=11, gb_free=2.8, wall=10322
2021-06-18 21:30:59 | INFO | train_inner | epoch 001: 931 / 3002 loss=2.906, ppl=7.5, wps=5838.2, ups=0.09, wpb=64812, bsz=128, num_updates=920, lr=9.2e-05, gnorm=2.705, loss_scale=2, train_wall=11, gb_free=2.8, wall=10333
2021-06-18 21:31:10 | INFO | train_inner | epoch 001: 932 / 3002 loss=2.987, ppl=7.93, wps=5868.3, ups=0.09, wpb=64884, bsz=128, num_updates=921, lr=9.21e-05, gnorm=2.775, loss_scale=2, train_wall=11, gb_free=2.8, wall=10344
2021-06-18 21:31:21 | INFO | train_inner | epoch 001: 933 / 3002 loss=3.204, ppl=9.21, wps=5908.6, ups=0.09, wpb=64848, bsz=128, num_updates=922, lr=9.22e-05, gnorm=2.786, loss_scale=2, train_wall=11, gb_free=2.8, wall=10355
2021-06-18 21:31:32 | INFO | train_inner | epoch 001: 934 / 3002 loss=3.128, ppl=8.74, wps=5840.3, ups=0.09, wpb=64750, bsz=128, num_updates=923, lr=9.23e-05, gnorm=2.614, loss_scale=2, train_wall=11, gb_free=2.8, wall=10366
2021-06-18 21:31:43 | INFO | train_inner | epoch 001: 935 / 3002 loss=3.138, ppl=8.8, wps=5851.3, ups=0.09, wpb=64826, bsz=128, num_updates=924, lr=9.24e-05, gnorm=3.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=10377
2021-06-18 21:31:54 | INFO | train_inner | epoch 001: 936 / 3002 loss=3.162, ppl=8.95, wps=5803.3, ups=0.09, wpb=64859, bsz=128, num_updates=925, lr=9.25e-05, gnorm=2.72, loss_scale=2, train_wall=11, gb_free=2.8, wall=10388
2021-06-18 21:32:05 | INFO | train_inner | epoch 001: 937 / 3002 loss=3.074, ppl=8.42, wps=5890.3, ups=0.09, wpb=64863, bsz=128, num_updates=926, lr=9.26e-05, gnorm=2.884, loss_scale=2, train_wall=11, gb_free=2.8, wall=10399
2021-06-18 21:32:16 | INFO | train_inner | epoch 001: 938 / 3002 loss=3.236, ppl=9.42, wps=5783.1, ups=0.09, wpb=64851, bsz=128, num_updates=927, lr=9.27e-05, gnorm=2.833, loss_scale=2, train_wall=11, gb_free=2.8, wall=10411
2021-06-18 21:32:27 | INFO | train_inner | epoch 001: 939 / 3002 loss=3.353, ppl=10.21, wps=5900.6, ups=0.09, wpb=64803, bsz=128, num_updates=928, lr=9.28e-05, gnorm=2.777, loss_scale=2, train_wall=11, gb_free=2.8, wall=10422
2021-06-18 21:32:38 | INFO | train_inner | epoch 001: 940 / 3002 loss=3.127, ppl=8.74, wps=5842, ups=0.09, wpb=64781, bsz=128, num_updates=929, lr=9.29e-05, gnorm=2.819, loss_scale=2, train_wall=11, gb_free=2.8, wall=10433
2021-06-18 21:32:50 | INFO | train_inner | epoch 001: 941 / 3002 loss=3.112, ppl=8.65, wps=5768.9, ups=0.09, wpb=64839, bsz=128, num_updates=930, lr=9.3e-05, gnorm=2.906, loss_scale=2, train_wall=11, gb_free=2.8, wall=10444
2021-06-18 21:33:01 | INFO | train_inner | epoch 001: 942 / 3002 loss=3.111, ppl=8.64, wps=5876.8, ups=0.09, wpb=64910, bsz=128, num_updates=931, lr=9.31e-05, gnorm=2.841, loss_scale=2, train_wall=11, gb_free=2.8, wall=10455
2021-06-18 21:33:12 | INFO | train_inner | epoch 001: 943 / 3002 loss=2.986, ppl=7.92, wps=5916.9, ups=0.09, wpb=64912, bsz=128, num_updates=932, lr=9.32e-05, gnorm=2.714, loss_scale=2, train_wall=10, gb_free=2.8, wall=10466
2021-06-18 21:33:23 | INFO | train_inner | epoch 001: 944 / 3002 loss=3.212, ppl=9.27, wps=5836.7, ups=0.09, wpb=64796, bsz=128, num_updates=933, lr=9.33e-05, gnorm=2.922, loss_scale=2, train_wall=11, gb_free=2.8, wall=10477
2021-06-18 21:33:34 | INFO | train_inner | epoch 001: 945 / 3002 loss=3.211, ppl=9.26, wps=5959.3, ups=0.09, wpb=64838, bsz=128, num_updates=934, lr=9.34e-05, gnorm=2.82, loss_scale=2, train_wall=10, gb_free=2.8, wall=10488
2021-06-18 21:33:44 | INFO | train_inner | epoch 001: 946 / 3002 loss=3.078, ppl=8.44, wps=5943, ups=0.09, wpb=64882, bsz=128, num_updates=935, lr=9.35e-05, gnorm=2.855, loss_scale=2, train_wall=10, gb_free=2.8, wall=10499
2021-06-18 21:33:55 | INFO | train_inner | epoch 001: 947 / 3002 loss=3.127, ppl=8.74, wps=5850.6, ups=0.09, wpb=64754, bsz=128, num_updates=936, lr=9.36e-05, gnorm=2.698, loss_scale=2, train_wall=11, gb_free=2.8, wall=10510
2021-06-18 21:34:06 | INFO | train_inner | epoch 001: 948 / 3002 loss=3.072, ppl=8.41, wps=5989.2, ups=0.09, wpb=64817, bsz=128, num_updates=937, lr=9.37e-05, gnorm=2.974, loss_scale=2, train_wall=10, gb_free=2.8, wall=10521
2021-06-18 21:34:17 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-18 21:34:28 | INFO | train_inner | epoch 001: 950 / 3002 loss=2.964, ppl=7.8, wps=2956.2, ups=0.05, wpb=64863, bsz=128, num_updates=938, lr=9.38e-05, gnorm=2.655, loss_scale=1, train_wall=21, gb_free=2.8, wall=10543
2021-06-18 21:34:39 | INFO | train_inner | epoch 001: 951 / 3002 loss=2.974, ppl=7.86, wps=5898.7, ups=0.09, wpb=64851, bsz=128, num_updates=939, lr=9.39e-05, gnorm=2.704, loss_scale=1, train_wall=11, gb_free=2.8, wall=10554
2021-06-18 21:34:50 | INFO | train_inner | epoch 001: 952 / 3002 loss=3.013, ppl=8.07, wps=5896.2, ups=0.09, wpb=64851, bsz=128, num_updates=940, lr=9.4e-05, gnorm=2.607, loss_scale=1, train_wall=11, gb_free=2.8, wall=10565
2021-06-18 21:35:01 | INFO | train_inner | epoch 001: 953 / 3002 loss=2.988, ppl=7.93, wps=5890.2, ups=0.09, wpb=64892, bsz=128, num_updates=941, lr=9.41e-05, gnorm=2.753, loss_scale=1, train_wall=11, gb_free=2.8, wall=10576
2021-06-18 21:35:12 | INFO | train_inner | epoch 001: 954 / 3002 loss=3.11, ppl=8.63, wps=5890.3, ups=0.09, wpb=64900, bsz=128, num_updates=942, lr=9.42e-05, gnorm=2.674, loss_scale=1, train_wall=11, gb_free=2.8, wall=10587
2021-06-18 21:35:23 | INFO | train_inner | epoch 001: 955 / 3002 loss=3.167, ppl=8.98, wps=5806.1, ups=0.09, wpb=64761, bsz=128, num_updates=943, lr=9.43e-05, gnorm=3.364, loss_scale=1, train_wall=11, gb_free=2.8, wall=10598
2021-06-18 21:35:35 | INFO | train_inner | epoch 001: 956 / 3002 loss=3.055, ppl=8.31, wps=5795.9, ups=0.09, wpb=64848, bsz=128, num_updates=944, lr=9.44e-05, gnorm=2.777, loss_scale=1, train_wall=11, gb_free=2.8, wall=10609
2021-06-18 21:35:46 | INFO | train_inner | epoch 001: 957 / 3002 loss=3.282, ppl=9.73, wps=5898.6, ups=0.09, wpb=64876, bsz=128, num_updates=945, lr=9.45e-05, gnorm=2.812, loss_scale=1, train_wall=11, gb_free=2.8, wall=10620
2021-06-18 21:35:57 | INFO | train_inner | epoch 001: 958 / 3002 loss=3.099, ppl=8.57, wps=5777.5, ups=0.09, wpb=64835, bsz=128, num_updates=946, lr=9.46e-05, gnorm=2.774, loss_scale=1, train_wall=11, gb_free=2.8, wall=10631
2021-06-18 21:36:08 | INFO | train_inner | epoch 001: 959 / 3002 loss=3.179, ppl=9.06, wps=5834.1, ups=0.09, wpb=64867, bsz=128, num_updates=947, lr=9.47e-05, gnorm=2.819, loss_scale=1, train_wall=11, gb_free=2.8, wall=10642
2021-06-18 21:36:19 | INFO | train_inner | epoch 001: 960 / 3002 loss=3.264, ppl=9.61, wps=6018.3, ups=0.09, wpb=64825, bsz=128, num_updates=948, lr=9.48e-05, gnorm=2.756, loss_scale=1, train_wall=10, gb_free=2.8, wall=10653
2021-06-18 21:36:30 | INFO | train_inner | epoch 001: 961 / 3002 loss=3.176, ppl=9.04, wps=5777.2, ups=0.09, wpb=64823, bsz=128, num_updates=949, lr=9.49e-05, gnorm=2.686, loss_scale=1, train_wall=11, gb_free=2.8, wall=10664
2021-06-18 21:36:41 | INFO | train_inner | epoch 001: 962 / 3002 loss=3.232, ppl=9.39, wps=5827.2, ups=0.09, wpb=64728, bsz=128, num_updates=950, lr=9.5e-05, gnorm=2.909, loss_scale=1, train_wall=11, gb_free=2.8, wall=10675
2021-06-18 21:36:52 | INFO | train_inner | epoch 001: 963 / 3002 loss=3.164, ppl=8.96, wps=5797, ups=0.09, wpb=64762, bsz=128, num_updates=951, lr=9.51e-05, gnorm=2.75, loss_scale=1, train_wall=11, gb_free=2.8, wall=10687
2021-06-18 21:37:03 | INFO | train_inner | epoch 001: 964 / 3002 loss=3.084, ppl=8.48, wps=5947.3, ups=0.09, wpb=64933, bsz=128, num_updates=952, lr=9.52e-05, gnorm=4.081, loss_scale=1, train_wall=10, gb_free=2.8, wall=10697
2021-06-18 21:37:14 | INFO | train_inner | epoch 001: 965 / 3002 loss=3.049, ppl=8.27, wps=5879.2, ups=0.09, wpb=64832, bsz=128, num_updates=953, lr=9.53e-05, gnorm=2.679, loss_scale=1, train_wall=11, gb_free=2.8, wall=10709
2021-06-18 21:37:25 | INFO | train_inner | epoch 001: 966 / 3002 loss=3.224, ppl=9.34, wps=5947.5, ups=0.09, wpb=64805, bsz=128, num_updates=954, lr=9.54e-05, gnorm=3.124, loss_scale=1, train_wall=10, gb_free=2.8, wall=10719
2021-06-18 21:37:36 | INFO | train_inner | epoch 001: 967 / 3002 loss=3.117, ppl=8.67, wps=5834.8, ups=0.09, wpb=64908, bsz=128, num_updates=955, lr=9.55e-05, gnorm=2.712, loss_scale=1, train_wall=11, gb_free=2.8, wall=10731
2021-06-18 21:37:47 | INFO | train_inner | epoch 001: 968 / 3002 loss=2.96, ppl=7.78, wps=5857.5, ups=0.09, wpb=64858, bsz=128, num_updates=956, lr=9.56e-05, gnorm=2.675, loss_scale=1, train_wall=11, gb_free=2.8, wall=10742
2021-06-18 21:37:58 | INFO | train_inner | epoch 001: 969 / 3002 loss=3.221, ppl=9.32, wps=5969.5, ups=0.09, wpb=64871, bsz=128, num_updates=957, lr=9.57e-05, gnorm=2.895, loss_scale=1, train_wall=10, gb_free=2.8, wall=10752
2021-06-18 21:38:09 | INFO | train_inner | epoch 001: 970 / 3002 loss=2.94, ppl=7.67, wps=5761.2, ups=0.09, wpb=64814, bsz=128, num_updates=958, lr=9.58e-05, gnorm=2.672, loss_scale=1, train_wall=11, gb_free=2.8, wall=10764
2021-06-18 21:38:20 | INFO | train_inner | epoch 001: 971 / 3002 loss=3.031, ppl=8.17, wps=5867.9, ups=0.09, wpb=64827, bsz=128, num_updates=959, lr=9.59e-05, gnorm=14.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=10775
2021-06-18 21:38:31 | INFO | train_inner | epoch 001: 972 / 3002 loss=3.141, ppl=8.82, wps=5928.9, ups=0.09, wpb=64897, bsz=128, num_updates=960, lr=9.6e-05, gnorm=2.769, loss_scale=1, train_wall=10, gb_free=2.8, wall=10786
2021-06-18 21:38:42 | INFO | train_inner | epoch 001: 973 / 3002 loss=2.979, ppl=7.89, wps=5941.6, ups=0.09, wpb=64846, bsz=128, num_updates=961, lr=9.61e-05, gnorm=2.619, loss_scale=1, train_wall=10, gb_free=2.8, wall=10797
2021-06-18 21:38:53 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-18 21:39:04 | INFO | train_inner | epoch 001: 975 / 3002 loss=3.129, ppl=8.75, wps=2940.6, ups=0.05, wpb=64758, bsz=128, num_updates=962, lr=9.62e-05, gnorm=2.753, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=10819
2021-06-18 21:39:16 | INFO | train_inner | epoch 001: 976 / 3002 loss=3.236, ppl=9.43, wps=5796.6, ups=0.09, wpb=64793, bsz=128, num_updates=963, lr=9.63e-05, gnorm=2.633, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10830
2021-06-18 21:39:27 | INFO | train_inner | epoch 001: 977 / 3002 loss=3.124, ppl=8.72, wps=5861.2, ups=0.09, wpb=64852, bsz=128, num_updates=964, lr=9.64e-05, gnorm=2.656, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10841
2021-06-18 21:39:38 | INFO | train_inner | epoch 001: 978 / 3002 loss=3.203, ppl=9.21, wps=5854.4, ups=0.09, wpb=64792, bsz=128, num_updates=965, lr=9.65e-05, gnorm=2.611, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10852
2021-06-18 21:39:49 | INFO | train_inner | epoch 001: 979 / 3002 loss=3.024, ppl=8.13, wps=5866.2, ups=0.09, wpb=64773, bsz=128, num_updates=966, lr=9.66e-05, gnorm=2.595, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10863
2021-06-18 21:39:59 | INFO | train_inner | epoch 001: 980 / 3002 loss=3.288, ppl=9.76, wps=6087.4, ups=0.09, wpb=64882, bsz=128, num_updates=967, lr=9.67e-05, gnorm=2.708, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10874
2021-06-18 21:40:10 | INFO | train_inner | epoch 001: 981 / 3002 loss=3.036, ppl=8.2, wps=5953.9, ups=0.09, wpb=64829, bsz=128, num_updates=968, lr=9.68e-05, gnorm=2.488, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10885
2021-06-18 21:40:21 | INFO | train_inner | epoch 001: 982 / 3002 loss=3.16, ppl=8.94, wps=5829.9, ups=0.09, wpb=64861, bsz=128, num_updates=969, lr=9.69e-05, gnorm=2.708, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10896
2021-06-18 21:40:33 | INFO | train_inner | epoch 001: 983 / 3002 loss=3.293, ppl=9.8, wps=5743, ups=0.09, wpb=64817, bsz=128, num_updates=970, lr=9.7e-05, gnorm=2.782, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10907
2021-06-18 21:40:44 | INFO | train_inner | epoch 001: 984 / 3002 loss=3.211, ppl=9.26, wps=5933.7, ups=0.09, wpb=64826, bsz=128, num_updates=971, lr=9.71e-05, gnorm=2.593, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10918
2021-06-18 21:40:55 | INFO | train_inner | epoch 001: 985 / 3002 loss=3.101, ppl=8.58, wps=5899, ups=0.09, wpb=64890, bsz=128, num_updates=972, lr=9.72e-05, gnorm=2.593, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10929
2021-06-18 21:41:06 | INFO | train_inner | epoch 001: 986 / 3002 loss=3.229, ppl=9.38, wps=5884.3, ups=0.09, wpb=64860, bsz=128, num_updates=973, lr=9.73e-05, gnorm=5.495, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10940
2021-06-18 21:41:16 | INFO | train_inner | epoch 001: 987 / 3002 loss=3.056, ppl=8.31, wps=5943.9, ups=0.09, wpb=64828, bsz=128, num_updates=974, lr=9.74e-05, gnorm=2.59, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10951
2021-06-18 21:41:28 | INFO | train_inner | epoch 001: 988 / 3002 loss=2.948, ppl=7.72, wps=5837.3, ups=0.09, wpb=64737, bsz=128, num_updates=975, lr=9.75e-05, gnorm=2.608, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10962
2021-06-18 21:41:39 | INFO | train_inner | epoch 001: 989 / 3002 loss=3.152, ppl=8.89, wps=5857.8, ups=0.09, wpb=64831, bsz=128, num_updates=976, lr=9.76e-05, gnorm=2.957, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10973
2021-06-18 21:41:50 | INFO | train_inner | epoch 001: 990 / 3002 loss=2.964, ppl=7.81, wps=5790.9, ups=0.09, wpb=64860, bsz=128, num_updates=977, lr=9.77e-05, gnorm=2.743, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=10984
2021-06-18 21:42:01 | INFO | train_inner | epoch 001: 991 / 3002 loss=3.225, ppl=9.35, wps=5963.9, ups=0.09, wpb=64828, bsz=128, num_updates=978, lr=9.78e-05, gnorm=2.606, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=10995
2021-06-18 21:42:12 | INFO | train_inner | epoch 001: 992 / 3002 loss=3.192, ppl=9.14, wps=5916.4, ups=0.09, wpb=64818, bsz=128, num_updates=979, lr=9.79e-05, gnorm=2.728, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11006
2021-06-18 21:42:23 | INFO | train_inner | epoch 001: 993 / 3002 loss=3.085, ppl=8.48, wps=5870.3, ups=0.09, wpb=64805, bsz=128, num_updates=980, lr=9.8e-05, gnorm=2.746, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11017
2021-06-18 21:42:34 | INFO | train_inner | epoch 001: 994 / 3002 loss=3.064, ppl=8.36, wps=5835.2, ups=0.09, wpb=64778, bsz=128, num_updates=981, lr=9.81e-05, gnorm=2.586, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11028
2021-06-18 21:42:45 | INFO | train_inner | epoch 001: 995 / 3002 loss=3.118, ppl=8.68, wps=5890.1, ups=0.09, wpb=64729, bsz=128, num_updates=982, lr=9.82e-05, gnorm=2.73, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11039
2021-06-18 21:42:56 | INFO | train_inner | epoch 001: 996 / 3002 loss=3.345, ppl=10.16, wps=5925.4, ups=0.09, wpb=64855, bsz=128, num_updates=983, lr=9.83e-05, gnorm=2.888, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11050
2021-06-18 21:43:07 | INFO | train_inner | epoch 001: 997 / 3002 loss=3.071, ppl=8.41, wps=5797.6, ups=0.09, wpb=64822, bsz=128, num_updates=984, lr=9.84e-05, gnorm=2.692, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11061
2021-06-18 21:43:18 | INFO | train_inner | epoch 001: 998 / 3002 loss=3.229, ppl=9.38, wps=5871.5, ups=0.09, wpb=64832, bsz=128, num_updates=985, lr=9.85e-05, gnorm=2.808, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11072
2021-06-18 21:43:29 | INFO | train_inner | epoch 001: 999 / 3002 loss=3.149, ppl=8.87, wps=5925.8, ups=0.09, wpb=64862, bsz=128, num_updates=986, lr=9.86e-05, gnorm=2.739, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11083
2021-06-18 21:43:40 | INFO | train_inner | epoch 001: 1000 / 3002 loss=3.111, ppl=8.64, wps=5815.6, ups=0.09, wpb=64827, bsz=128, num_updates=987, lr=9.87e-05, gnorm=2.712, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11094
2021-06-18 21:43:51 | INFO | train_inner | epoch 001: 1001 / 3002 loss=3.292, ppl=9.79, wps=5915.3, ups=0.09, wpb=64752, bsz=128, num_updates=988, lr=9.88e-05, gnorm=2.789, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11105
2021-06-18 21:44:02 | INFO | train_inner | epoch 001: 1002 / 3002 loss=3.267, ppl=9.62, wps=5834.2, ups=0.09, wpb=64806, bsz=128, num_updates=989, lr=9.89e-05, gnorm=2.77, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11116
2021-06-18 21:44:13 | INFO | train_inner | epoch 001: 1003 / 3002 loss=3.151, ppl=8.88, wps=5817.7, ups=0.09, wpb=64802, bsz=128, num_updates=990, lr=9.9e-05, gnorm=2.861, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11128
2021-06-18 21:44:24 | INFO | train_inner | epoch 001: 1004 / 3002 loss=3.131, ppl=8.76, wps=5954.7, ups=0.09, wpb=64943, bsz=128, num_updates=991, lr=9.91e-05, gnorm=2.751, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11139
2021-06-18 21:44:35 | INFO | train_inner | epoch 001: 1005 / 3002 loss=2.959, ppl=7.78, wps=5893.1, ups=0.09, wpb=64900, bsz=128, num_updates=992, lr=9.92e-05, gnorm=2.653, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11150
2021-06-18 21:44:46 | INFO | train_inner | epoch 001: 1006 / 3002 loss=3.136, ppl=8.79, wps=5881.3, ups=0.09, wpb=64889, bsz=128, num_updates=993, lr=9.93e-05, gnorm=2.789, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11161
2021-06-18 21:44:57 | INFO | train_inner | epoch 001: 1007 / 3002 loss=3.059, ppl=8.34, wps=5930.2, ups=0.09, wpb=64771, bsz=128, num_updates=994, lr=9.94e-05, gnorm=2.71, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11171
2021-06-18 21:45:08 | INFO | train_inner | epoch 001: 1008 / 3002 loss=3.043, ppl=8.24, wps=5905.7, ups=0.09, wpb=64853, bsz=128, num_updates=995, lr=9.95e-05, gnorm=2.651, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11182
2021-06-18 21:45:19 | INFO | train_inner | epoch 001: 1009 / 3002 loss=3.201, ppl=9.2, wps=5777.7, ups=0.09, wpb=64849, bsz=128, num_updates=996, lr=9.96e-05, gnorm=2.797, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11194
2021-06-18 21:45:31 | INFO | train_inner | epoch 001: 1010 / 3002 loss=3.063, ppl=8.36, wps=5798.2, ups=0.09, wpb=64820, bsz=128, num_updates=997, lr=9.97e-05, gnorm=2.679, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11205
2021-06-18 21:45:42 | INFO | train_inner | epoch 001: 1011 / 3002 loss=3.131, ppl=8.76, wps=5878.4, ups=0.09, wpb=64837, bsz=128, num_updates=998, lr=9.98e-05, gnorm=2.632, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11216
2021-06-18 21:45:53 | INFO | train_inner | epoch 001: 1012 / 3002 loss=3.085, ppl=8.49, wps=5825, ups=0.09, wpb=64760, bsz=128, num_updates=999, lr=9.99e-05, gnorm=2.694, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11227
2021-06-18 21:46:04 | INFO | train_inner | epoch 001: 1013 / 3002 loss=3.131, ppl=8.76, wps=5960.3, ups=0.09, wpb=64806, bsz=128, num_updates=1000, lr=0.0001, gnorm=2.837, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11238
2021-06-18 21:46:15 | INFO | train_inner | epoch 001: 1014 / 3002 loss=3.196, ppl=9.16, wps=5874.9, ups=0.09, wpb=64806, bsz=128, num_updates=1001, lr=0.0001, gnorm=2.637, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11249
2021-06-18 21:46:26 | INFO | train_inner | epoch 001: 1015 / 3002 loss=3.072, ppl=8.41, wps=5783.2, ups=0.09, wpb=64865, bsz=128, num_updates=1002, lr=0.0001, gnorm=2.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11260
2021-06-18 21:46:37 | INFO | train_inner | epoch 001: 1016 / 3002 loss=3.073, ppl=8.42, wps=5853.4, ups=0.09, wpb=64773, bsz=128, num_updates=1003, lr=0.0001, gnorm=2.6, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11271
2021-06-18 21:46:48 | INFO | train_inner | epoch 001: 1017 / 3002 loss=3.027, ppl=8.15, wps=5972, ups=0.09, wpb=64779, bsz=128, num_updates=1004, lr=0.0001, gnorm=2.708, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11282
2021-06-18 21:46:59 | INFO | train_inner | epoch 001: 1018 / 3002 loss=3.19, ppl=9.13, wps=5890, ups=0.09, wpb=64874, bsz=128, num_updates=1005, lr=0.0001, gnorm=3.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11293
2021-06-18 21:47:10 | INFO | train_inner | epoch 001: 1019 / 3002 loss=3.042, ppl=8.24, wps=5866.9, ups=0.09, wpb=64824, bsz=128, num_updates=1006, lr=0.0001, gnorm=3.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11304
2021-06-18 21:47:21 | INFO | train_inner | epoch 001: 1020 / 3002 loss=2.997, ppl=7.99, wps=5849.5, ups=0.09, wpb=64807, bsz=128, num_updates=1007, lr=9.99999e-05, gnorm=2.579, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11315
2021-06-18 21:47:32 | INFO | train_inner | epoch 001: 1021 / 3002 loss=3.199, ppl=9.18, wps=5962.3, ups=0.09, wpb=64875, bsz=128, num_updates=1008, lr=9.99999e-05, gnorm=4.003, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11326
2021-06-18 21:47:43 | INFO | train_inner | epoch 001: 1022 / 3002 loss=3.102, ppl=8.59, wps=5781.7, ups=0.09, wpb=64829, bsz=128, num_updates=1009, lr=9.99999e-05, gnorm=2.699, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11337
2021-06-18 21:47:54 | INFO | train_inner | epoch 001: 1023 / 3002 loss=2.9, ppl=7.46, wps=5982, ups=0.09, wpb=64859, bsz=128, num_updates=1010, lr=9.99999e-05, gnorm=2.563, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11348
2021-06-18 21:48:05 | INFO | train_inner | epoch 001: 1024 / 3002 loss=3.081, ppl=8.46, wps=5994.1, ups=0.09, wpb=64969, bsz=128, num_updates=1011, lr=9.99999e-05, gnorm=2.506, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11359
2021-06-18 21:48:16 | INFO | train_inner | epoch 001: 1025 / 3002 loss=2.985, ppl=7.92, wps=5788.1, ups=0.09, wpb=64880, bsz=128, num_updates=1012, lr=9.99999e-05, gnorm=2.581, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11370
2021-06-18 21:48:27 | INFO | train_inner | epoch 001: 1026 / 3002 loss=3.127, ppl=8.74, wps=5831.2, ups=0.09, wpb=64814, bsz=128, num_updates=1013, lr=9.99999e-05, gnorm=3.348, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11381
2021-06-18 21:48:38 | INFO | train_inner | epoch 001: 1027 / 3002 loss=3.103, ppl=8.59, wps=5881.8, ups=0.09, wpb=64817, bsz=128, num_updates=1014, lr=9.99999e-05, gnorm=2.737, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11392
2021-06-18 21:48:49 | INFO | train_inner | epoch 001: 1028 / 3002 loss=3.079, ppl=8.45, wps=5795, ups=0.09, wpb=64792, bsz=128, num_updates=1015, lr=9.99999e-05, gnorm=2.64, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11403
2021-06-18 21:49:00 | INFO | train_inner | epoch 001: 1029 / 3002 loss=3.281, ppl=9.72, wps=5772.5, ups=0.09, wpb=64818, bsz=128, num_updates=1016, lr=9.99999e-05, gnorm=2.743, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11415
2021-06-18 21:49:11 | INFO | train_inner | epoch 001: 1030 / 3002 loss=3.112, ppl=8.65, wps=5872.8, ups=0.09, wpb=64766, bsz=128, num_updates=1017, lr=9.99999e-05, gnorm=3.064, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11426
2021-06-18 21:49:22 | INFO | train_inner | epoch 001: 1031 / 3002 loss=2.871, ppl=7.32, wps=6081.8, ups=0.09, wpb=64908, bsz=128, num_updates=1018, lr=9.99999e-05, gnorm=2.604, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11436
2021-06-18 21:49:33 | INFO | train_inner | epoch 001: 1032 / 3002 loss=3.078, ppl=8.44, wps=5995.4, ups=0.09, wpb=64835, bsz=128, num_updates=1019, lr=9.99998e-05, gnorm=3.192, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11447
2021-06-18 21:49:44 | INFO | train_inner | epoch 001: 1033 / 3002 loss=3.012, ppl=8.06, wps=5776.6, ups=0.09, wpb=64889, bsz=128, num_updates=1020, lr=9.99998e-05, gnorm=2.676, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11458
2021-06-18 21:49:55 | INFO | train_inner | epoch 001: 1034 / 3002 loss=3.012, ppl=8.07, wps=5778.8, ups=0.09, wpb=64780, bsz=128, num_updates=1021, lr=9.99998e-05, gnorm=2.519, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11470
2021-06-18 21:50:06 | INFO | train_inner | epoch 001: 1035 / 3002 loss=3.211, ppl=9.26, wps=5831.7, ups=0.09, wpb=64710, bsz=128, num_updates=1022, lr=9.99998e-05, gnorm=2.805, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11481
2021-06-18 21:50:17 | INFO | train_inner | epoch 001: 1036 / 3002 loss=3.14, ppl=8.82, wps=5870.2, ups=0.09, wpb=64821, bsz=128, num_updates=1023, lr=9.99998e-05, gnorm=2.801, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11492
2021-06-18 21:50:28 | INFO | train_inner | epoch 001: 1037 / 3002 loss=3.148, ppl=8.87, wps=5914.2, ups=0.09, wpb=64854, bsz=128, num_updates=1024, lr=9.99998e-05, gnorm=2.62, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11503
2021-06-18 21:50:39 | INFO | train_inner | epoch 001: 1038 / 3002 loss=3.248, ppl=9.5, wps=5917.5, ups=0.09, wpb=64867, bsz=128, num_updates=1025, lr=9.99998e-05, gnorm=2.712, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11514
2021-06-18 21:50:50 | INFO | train_inner | epoch 001: 1039 / 3002 loss=3.121, ppl=8.7, wps=5952.3, ups=0.09, wpb=64904, bsz=128, num_updates=1026, lr=9.99998e-05, gnorm=2.747, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11525
2021-06-18 21:51:01 | INFO | train_inner | epoch 001: 1040 / 3002 loss=3.177, ppl=9.04, wps=5875, ups=0.09, wpb=64836, bsz=128, num_updates=1027, lr=9.99998e-05, gnorm=2.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11536
2021-06-18 21:51:12 | INFO | train_inner | epoch 001: 1041 / 3002 loss=3.237, ppl=9.43, wps=5841.9, ups=0.09, wpb=64869, bsz=128, num_updates=1028, lr=9.99998e-05, gnorm=2.681, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11547
2021-06-18 21:51:23 | INFO | train_inner | epoch 001: 1042 / 3002 loss=3.158, ppl=8.93, wps=6033.9, ups=0.09, wpb=64853, bsz=128, num_updates=1029, lr=9.99998e-05, gnorm=2.676, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11558
2021-06-18 21:51:34 | INFO | train_inner | epoch 001: 1043 / 3002 loss=3.1, ppl=8.57, wps=5855.8, ups=0.09, wpb=64854, bsz=128, num_updates=1030, lr=9.99998e-05, gnorm=2.627, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11569
2021-06-18 21:51:45 | INFO | train_inner | epoch 001: 1044 / 3002 loss=3.036, ppl=8.2, wps=5849.7, ups=0.09, wpb=64864, bsz=128, num_updates=1031, lr=9.99998e-05, gnorm=2.549, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11580
2021-06-18 21:51:56 | INFO | train_inner | epoch 001: 1045 / 3002 loss=3.1, ppl=8.58, wps=5877.3, ups=0.09, wpb=64748, bsz=128, num_updates=1032, lr=9.99997e-05, gnorm=2.646, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11591
2021-06-18 21:52:08 | INFO | train_inner | epoch 001: 1046 / 3002 loss=3.114, ppl=8.66, wps=5800.3, ups=0.09, wpb=64804, bsz=128, num_updates=1033, lr=9.99997e-05, gnorm=2.673, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11602
2021-06-18 21:52:19 | INFO | train_inner | epoch 001: 1047 / 3002 loss=2.978, ppl=7.88, wps=5883, ups=0.09, wpb=64811, bsz=128, num_updates=1034, lr=9.99997e-05, gnorm=2.617, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11613
2021-06-18 21:52:30 | INFO | train_inner | epoch 001: 1048 / 3002 loss=3.182, ppl=9.08, wps=5910.8, ups=0.09, wpb=64810, bsz=128, num_updates=1035, lr=9.99997e-05, gnorm=2.525, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11624
2021-06-18 21:52:41 | INFO | train_inner | epoch 001: 1049 / 3002 loss=3.277, ppl=9.69, wps=5829.2, ups=0.09, wpb=64847, bsz=128, num_updates=1036, lr=9.99997e-05, gnorm=2.702, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11635
2021-06-18 21:52:52 | INFO | train_inner | epoch 001: 1050 / 3002 loss=3.245, ppl=9.48, wps=5828.7, ups=0.09, wpb=64743, bsz=128, num_updates=1037, lr=9.99997e-05, gnorm=2.779, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11646
2021-06-18 21:53:03 | INFO | train_inner | epoch 001: 1051 / 3002 loss=3.124, ppl=8.72, wps=5818.5, ups=0.09, wpb=64878, bsz=128, num_updates=1038, lr=9.99997e-05, gnorm=3.002, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11657
2021-06-18 21:53:14 | INFO | train_inner | epoch 001: 1052 / 3002 loss=3.085, ppl=8.49, wps=6041.4, ups=0.09, wpb=64806, bsz=128, num_updates=1039, lr=9.99997e-05, gnorm=2.627, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11668
2021-06-18 21:53:25 | INFO | train_inner | epoch 001: 1053 / 3002 loss=3.267, ppl=9.63, wps=5834.9, ups=0.09, wpb=64803, bsz=128, num_updates=1040, lr=9.99997e-05, gnorm=3.23, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11679
2021-06-18 21:53:36 | INFO | train_inner | epoch 001: 1054 / 3002 loss=3.155, ppl=8.91, wps=5805.9, ups=0.09, wpb=64798, bsz=128, num_updates=1041, lr=9.99997e-05, gnorm=2.606, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11690
2021-06-18 21:53:47 | INFO | train_inner | epoch 001: 1055 / 3002 loss=3.1, ppl=8.58, wps=5767.9, ups=0.09, wpb=64787, bsz=128, num_updates=1042, lr=9.99997e-05, gnorm=2.652, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11701
2021-06-18 21:53:58 | INFO | train_inner | epoch 001: 1056 / 3002 loss=3.163, ppl=8.96, wps=6037.5, ups=0.09, wpb=64810, bsz=128, num_updates=1043, lr=9.99997e-05, gnorm=2.877, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11712
2021-06-18 21:54:09 | INFO | train_inner | epoch 001: 1057 / 3002 loss=3.294, ppl=9.81, wps=5913.4, ups=0.09, wpb=64752, bsz=128, num_updates=1044, lr=9.99996e-05, gnorm=2.65, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11723
2021-06-18 21:54:20 | INFO | train_inner | epoch 001: 1058 / 3002 loss=3.038, ppl=8.21, wps=5739.2, ups=0.09, wpb=64824, bsz=128, num_updates=1045, lr=9.99996e-05, gnorm=2.575, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11734
2021-06-18 21:54:31 | INFO | train_inner | epoch 001: 1059 / 3002 loss=3.134, ppl=8.78, wps=5833.3, ups=0.09, wpb=64869, bsz=128, num_updates=1046, lr=9.99996e-05, gnorm=2.597, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11746
2021-06-18 21:54:42 | INFO | train_inner | epoch 001: 1060 / 3002 loss=3.002, ppl=8.01, wps=5869.5, ups=0.09, wpb=64827, bsz=128, num_updates=1047, lr=9.99996e-05, gnorm=2.775, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11757
2021-06-18 21:54:54 | INFO | train_inner | epoch 001: 1061 / 3002 loss=2.931, ppl=7.63, wps=5791.4, ups=0.09, wpb=64913, bsz=128, num_updates=1048, lr=9.99996e-05, gnorm=2.491, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11768
2021-06-18 21:55:04 | INFO | train_inner | epoch 001: 1062 / 3002 loss=3.14, ppl=8.82, wps=5901.2, ups=0.09, wpb=64784, bsz=128, num_updates=1049, lr=9.99996e-05, gnorm=2.812, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11779
2021-06-18 21:55:16 | INFO | train_inner | epoch 001: 1063 / 3002 loss=2.964, ppl=7.81, wps=5858.2, ups=0.09, wpb=64796, bsz=128, num_updates=1050, lr=9.99996e-05, gnorm=2.53, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11790
2021-06-18 21:55:27 | INFO | train_inner | epoch 001: 1064 / 3002 loss=3.116, ppl=8.67, wps=5905.6, ups=0.09, wpb=64855, bsz=128, num_updates=1051, lr=9.99996e-05, gnorm=2.553, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11801
2021-06-18 21:55:38 | INFO | train_inner | epoch 001: 1065 / 3002 loss=3.171, ppl=9.01, wps=5911.9, ups=0.09, wpb=64843, bsz=128, num_updates=1052, lr=9.99996e-05, gnorm=2.594, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11812
2021-06-18 21:55:49 | INFO | train_inner | epoch 001: 1066 / 3002 loss=3.103, ppl=8.59, wps=5867.7, ups=0.09, wpb=64797, bsz=128, num_updates=1053, lr=9.99996e-05, gnorm=2.631, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11823
2021-06-18 21:55:59 | INFO | train_inner | epoch 001: 1067 / 3002 loss=3.037, ppl=8.21, wps=5972.2, ups=0.09, wpb=64865, bsz=128, num_updates=1054, lr=9.99996e-05, gnorm=2.614, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11834
2021-06-18 21:56:11 | INFO | train_inner | epoch 001: 1068 / 3002 loss=2.985, ppl=7.92, wps=5827.2, ups=0.09, wpb=64854, bsz=128, num_updates=1055, lr=9.99996e-05, gnorm=2.519, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11845
2021-06-18 21:56:22 | INFO | train_inner | epoch 001: 1069 / 3002 loss=3.183, ppl=9.08, wps=5892.9, ups=0.09, wpb=64842, bsz=128, num_updates=1056, lr=9.99996e-05, gnorm=2.633, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11856
2021-06-18 21:56:32 | INFO | train_inner | epoch 001: 1070 / 3002 loss=3.051, ppl=8.29, wps=5956.7, ups=0.09, wpb=64833, bsz=128, num_updates=1057, lr=9.99995e-05, gnorm=2.513, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11867
2021-06-18 21:56:44 | INFO | train_inner | epoch 001: 1071 / 3002 loss=3.092, ppl=8.53, wps=5816.2, ups=0.09, wpb=64840, bsz=128, num_updates=1058, lr=9.99995e-05, gnorm=2.598, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11878
2021-06-18 21:56:54 | INFO | train_inner | epoch 001: 1072 / 3002 loss=3.126, ppl=8.73, wps=6029.8, ups=0.09, wpb=64766, bsz=128, num_updates=1059, lr=9.99995e-05, gnorm=2.668, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11889
2021-06-18 21:57:06 | INFO | train_inner | epoch 001: 1073 / 3002 loss=3.007, ppl=8.04, wps=5766.1, ups=0.09, wpb=64808, bsz=128, num_updates=1060, lr=9.99995e-05, gnorm=2.796, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11900
2021-06-18 21:57:17 | INFO | train_inner | epoch 001: 1074 / 3002 loss=3.022, ppl=8.12, wps=5781, ups=0.09, wpb=64812, bsz=128, num_updates=1061, lr=9.99995e-05, gnorm=2.61, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11911
2021-06-18 21:57:28 | INFO | train_inner | epoch 001: 1075 / 3002 loss=3.178, ppl=9.05, wps=5857.1, ups=0.09, wpb=64804, bsz=128, num_updates=1062, lr=9.99995e-05, gnorm=2.629, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11922
2021-06-18 21:57:39 | INFO | train_inner | epoch 001: 1076 / 3002 loss=3.182, ppl=9.07, wps=5900.5, ups=0.09, wpb=64828, bsz=128, num_updates=1063, lr=9.99995e-05, gnorm=2.652, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11933
2021-06-18 21:57:50 | INFO | train_inner | epoch 001: 1077 / 3002 loss=3.094, ppl=8.54, wps=5893.7, ups=0.09, wpb=64864, bsz=128, num_updates=1064, lr=9.99995e-05, gnorm=2.61, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11944
2021-06-18 21:58:01 | INFO | train_inner | epoch 001: 1078 / 3002 loss=3.199, ppl=9.18, wps=5894.9, ups=0.09, wpb=64826, bsz=128, num_updates=1065, lr=9.99995e-05, gnorm=2.778, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11955
2021-06-18 21:58:12 | INFO | train_inner | epoch 001: 1079 / 3002 loss=3.047, ppl=8.26, wps=5915.2, ups=0.09, wpb=64871, bsz=128, num_updates=1066, lr=9.99995e-05, gnorm=2.644, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11966
2021-06-18 21:58:23 | INFO | train_inner | epoch 001: 1080 / 3002 loss=3.078, ppl=8.44, wps=5932.2, ups=0.09, wpb=64772, bsz=128, num_updates=1067, lr=9.99995e-05, gnorm=2.598, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=11977
2021-06-18 21:58:34 | INFO | train_inner | epoch 001: 1081 / 3002 loss=3.126, ppl=8.73, wps=5854.8, ups=0.09, wpb=64820, bsz=128, num_updates=1068, lr=9.99995e-05, gnorm=2.627, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11988
2021-06-18 21:58:45 | INFO | train_inner | epoch 001: 1082 / 3002 loss=3.011, ppl=8.06, wps=5830.4, ups=0.09, wpb=64877, bsz=128, num_updates=1069, lr=9.99994e-05, gnorm=2.577, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=11999
2021-06-18 21:58:56 | INFO | train_inner | epoch 001: 1083 / 3002 loss=3.11, ppl=8.64, wps=5805.8, ups=0.09, wpb=64826, bsz=128, num_updates=1070, lr=9.99994e-05, gnorm=2.546, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12010
2021-06-18 21:59:07 | INFO | train_inner | epoch 001: 1084 / 3002 loss=3.078, ppl=8.44, wps=5966.6, ups=0.09, wpb=64832, bsz=128, num_updates=1071, lr=9.99994e-05, gnorm=2.618, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12021
2021-06-18 21:59:18 | INFO | train_inner | epoch 001: 1085 / 3002 loss=3.257, ppl=9.56, wps=5835.4, ups=0.09, wpb=64790, bsz=128, num_updates=1072, lr=9.99994e-05, gnorm=2.576, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12032
2021-06-18 21:59:29 | INFO | train_inner | epoch 001: 1086 / 3002 loss=3.088, ppl=8.5, wps=5780.7, ups=0.09, wpb=64823, bsz=128, num_updates=1073, lr=9.99994e-05, gnorm=2.657, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12044
2021-06-18 21:59:40 | INFO | train_inner | epoch 001: 1087 / 3002 loss=3.078, ppl=8.45, wps=5899.7, ups=0.09, wpb=64868, bsz=128, num_updates=1074, lr=9.99994e-05, gnorm=2.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12055
2021-06-18 21:59:51 | INFO | train_inner | epoch 001: 1088 / 3002 loss=3.02, ppl=8.11, wps=5896.6, ups=0.09, wpb=64828, bsz=128, num_updates=1075, lr=9.99994e-05, gnorm=2.591, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12066
2021-06-18 22:00:02 | INFO | train_inner | epoch 001: 1089 / 3002 loss=3.168, ppl=8.99, wps=5942.9, ups=0.09, wpb=64785, bsz=128, num_updates=1076, lr=9.99994e-05, gnorm=2.883, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12076
2021-06-18 22:00:13 | INFO | train_inner | epoch 001: 1090 / 3002 loss=3.03, ppl=8.17, wps=5894.6, ups=0.09, wpb=64774, bsz=128, num_updates=1077, lr=9.99994e-05, gnorm=3.151, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12087
2021-06-18 22:00:24 | INFO | train_inner | epoch 001: 1091 / 3002 loss=2.999, ppl=8, wps=5814.2, ups=0.09, wpb=64796, bsz=128, num_updates=1078, lr=9.99994e-05, gnorm=2.464, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12099
2021-06-18 22:00:35 | INFO | train_inner | epoch 001: 1092 / 3002 loss=3.137, ppl=8.8, wps=5903, ups=0.09, wpb=64808, bsz=128, num_updates=1079, lr=9.99994e-05, gnorm=2.671, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12110
2021-06-18 22:00:46 | INFO | train_inner | epoch 001: 1093 / 3002 loss=3.228, ppl=9.37, wps=5955.1, ups=0.09, wpb=64836, bsz=128, num_updates=1080, lr=9.99994e-05, gnorm=2.662, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12120
2021-06-18 22:00:57 | INFO | train_inner | epoch 001: 1094 / 3002 loss=3.192, ppl=9.14, wps=5881.3, ups=0.09, wpb=64784, bsz=128, num_updates=1081, lr=9.99994e-05, gnorm=2.492, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12131
2021-06-18 22:01:08 | INFO | train_inner | epoch 001: 1095 / 3002 loss=3.132, ppl=8.76, wps=5916.7, ups=0.09, wpb=64914, bsz=128, num_updates=1082, lr=9.99993e-05, gnorm=2.771, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12142
2021-06-18 22:01:19 | INFO | train_inner | epoch 001: 1096 / 3002 loss=2.923, ppl=7.58, wps=5870.8, ups=0.09, wpb=64894, bsz=128, num_updates=1083, lr=9.99993e-05, gnorm=2.694, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12154
2021-06-18 22:01:30 | INFO | train_inner | epoch 001: 1097 / 3002 loss=3.192, ppl=9.14, wps=5789.4, ups=0.09, wpb=64837, bsz=128, num_updates=1084, lr=9.99993e-05, gnorm=3.095, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12165
2021-06-18 22:01:41 | INFO | train_inner | epoch 001: 1098 / 3002 loss=3.049, ppl=8.28, wps=5859.2, ups=0.09, wpb=64788, bsz=128, num_updates=1085, lr=9.99993e-05, gnorm=2.647, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12176
2021-06-18 22:01:53 | INFO | train_inner | epoch 001: 1099 / 3002 loss=2.98, ppl=7.89, wps=5796.5, ups=0.09, wpb=64773, bsz=128, num_updates=1086, lr=9.99993e-05, gnorm=2.597, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12187
2021-06-18 22:02:04 | INFO | train_inner | epoch 001: 1100 / 3002 loss=3.203, ppl=9.21, wps=5751.9, ups=0.09, wpb=64785, bsz=128, num_updates=1087, lr=9.99993e-05, gnorm=2.609, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12198
2021-06-18 22:02:15 | INFO | train_inner | epoch 001: 1101 / 3002 loss=3.128, ppl=8.74, wps=5930.6, ups=0.09, wpb=64811, bsz=128, num_updates=1088, lr=9.99993e-05, gnorm=2.634, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12209
2021-06-18 22:02:26 | INFO | train_inner | epoch 001: 1102 / 3002 loss=3.192, ppl=9.14, wps=5799.3, ups=0.09, wpb=64785, bsz=128, num_updates=1089, lr=9.99993e-05, gnorm=7.973, loss_scale=1, train_wall=11, gb_free=2.8, wall=12220
2021-06-18 22:02:37 | INFO | train_inner | epoch 001: 1103 / 3002 loss=3.084, ppl=8.48, wps=5965.5, ups=0.09, wpb=64890, bsz=128, num_updates=1090, lr=9.99993e-05, gnorm=2.732, loss_scale=1, train_wall=10, gb_free=2.8, wall=12231
2021-06-18 22:02:48 | INFO | train_inner | epoch 001: 1104 / 3002 loss=3.153, ppl=8.9, wps=5797.5, ups=0.09, wpb=64807, bsz=128, num_updates=1091, lr=9.99993e-05, gnorm=2.73, loss_scale=1, train_wall=11, gb_free=2.8, wall=12242
2021-06-18 22:02:59 | INFO | train_inner | epoch 001: 1105 / 3002 loss=3.157, ppl=8.92, wps=5848.7, ups=0.09, wpb=64831, bsz=128, num_updates=1092, lr=9.99993e-05, gnorm=2.692, loss_scale=1, train_wall=11, gb_free=2.8, wall=12253
2021-06-18 22:03:10 | INFO | train_inner | epoch 001: 1106 / 3002 loss=3.233, ppl=9.4, wps=5816.5, ups=0.09, wpb=64864, bsz=128, num_updates=1093, lr=9.99993e-05, gnorm=2.701, loss_scale=1, train_wall=11, gb_free=2.8, wall=12265
2021-06-18 22:03:21 | INFO | train_inner | epoch 001: 1107 / 3002 loss=3.076, ppl=8.43, wps=5860.1, ups=0.09, wpb=64858, bsz=128, num_updates=1094, lr=9.99992e-05, gnorm=2.713, loss_scale=1, train_wall=11, gb_free=2.8, wall=12276
2021-06-18 22:03:32 | INFO | train_inner | epoch 001: 1108 / 3002 loss=3.148, ppl=8.87, wps=5949.5, ups=0.09, wpb=64837, bsz=128, num_updates=1095, lr=9.99992e-05, gnorm=2.967, loss_scale=1, train_wall=10, gb_free=2.8, wall=12287
2021-06-18 22:03:43 | INFO | train_inner | epoch 001: 1109 / 3002 loss=3.152, ppl=8.89, wps=5800.5, ups=0.09, wpb=64764, bsz=128, num_updates=1096, lr=9.99992e-05, gnorm=2.723, loss_scale=1, train_wall=11, gb_free=2.8, wall=12298
2021-06-18 22:03:54 | INFO | train_inner | epoch 001: 1110 / 3002 loss=3.057, ppl=8.32, wps=5866.3, ups=0.09, wpb=64877, bsz=128, num_updates=1097, lr=9.99992e-05, gnorm=2.518, loss_scale=1, train_wall=11, gb_free=2.8, wall=12309
2021-06-18 22:04:06 | INFO | train_inner | epoch 001: 1111 / 3002 loss=3.135, ppl=8.78, wps=5810.5, ups=0.09, wpb=64776, bsz=128, num_updates=1098, lr=9.99992e-05, gnorm=2.601, loss_scale=1, train_wall=11, gb_free=2.8, wall=12320
2021-06-18 22:04:17 | INFO | train_inner | epoch 001: 1112 / 3002 loss=3.105, ppl=8.61, wps=5833.3, ups=0.09, wpb=64864, bsz=128, num_updates=1099, lr=9.99992e-05, gnorm=2.763, loss_scale=1, train_wall=11, gb_free=2.8, wall=12331
2021-06-18 22:04:28 | INFO | train_inner | epoch 001: 1113 / 3002 loss=3, ppl=8, wps=5982.8, ups=0.09, wpb=64847, bsz=128, num_updates=1100, lr=9.99992e-05, gnorm=2.603, loss_scale=1, train_wall=10, gb_free=2.8, wall=12342
2021-06-18 22:04:39 | INFO | train_inner | epoch 001: 1114 / 3002 loss=3.056, ppl=8.31, wps=5738.3, ups=0.09, wpb=64815, bsz=128, num_updates=1101, lr=9.99992e-05, gnorm=2.554, loss_scale=1, train_wall=11, gb_free=2.8, wall=12353
2021-06-18 22:04:50 | INFO | train_inner | epoch 001: 1115 / 3002 loss=2.913, ppl=7.53, wps=5858, ups=0.09, wpb=64831, bsz=128, num_updates=1102, lr=9.99992e-05, gnorm=2.593, loss_scale=1, train_wall=11, gb_free=2.8, wall=12364
2021-06-18 22:05:01 | INFO | train_inner | epoch 001: 1116 / 3002 loss=2.984, ppl=7.91, wps=5853.6, ups=0.09, wpb=64800, bsz=128, num_updates=1103, lr=9.99992e-05, gnorm=2.65, loss_scale=1, train_wall=11, gb_free=2.8, wall=12375
2021-06-18 22:05:12 | INFO | train_inner | epoch 001: 1117 / 3002 loss=3.107, ppl=8.61, wps=5990.4, ups=0.09, wpb=64891, bsz=128, num_updates=1104, lr=9.99992e-05, gnorm=2.699, loss_scale=1, train_wall=10, gb_free=2.8, wall=12386
2021-06-18 22:05:23 | INFO | train_inner | epoch 001: 1118 / 3002 loss=3.092, ppl=8.53, wps=5814.1, ups=0.09, wpb=64806, bsz=128, num_updates=1105, lr=9.99992e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=12397
2021-06-18 22:05:34 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-18 22:05:45 | INFO | train_inner | epoch 001: 1120 / 3002 loss=3.11, ppl=8.64, wps=2971.1, ups=0.05, wpb=64802, bsz=128, num_updates=1106, lr=9.99992e-05, gnorm=2.89, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=12419
2021-06-18 22:05:56 | INFO | train_inner | epoch 001: 1121 / 3002 loss=3.161, ppl=8.94, wps=5859.7, ups=0.09, wpb=64838, bsz=128, num_updates=1107, lr=9.99991e-05, gnorm=2.783, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12430
2021-06-18 22:06:07 | INFO | train_inner | epoch 001: 1122 / 3002 loss=2.974, ppl=7.86, wps=5865.1, ups=0.09, wpb=64900, bsz=128, num_updates=1108, lr=9.99991e-05, gnorm=2.546, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12441
2021-06-18 22:06:18 | INFO | train_inner | epoch 001: 1123 / 3002 loss=3.052, ppl=8.3, wps=5928.7, ups=0.09, wpb=64884, bsz=128, num_updates=1109, lr=9.99991e-05, gnorm=2.659, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12452
2021-06-18 22:06:29 | INFO | train_inner | epoch 001: 1124 / 3002 loss=3.011, ppl=8.06, wps=5773.1, ups=0.09, wpb=64827, bsz=128, num_updates=1110, lr=9.99991e-05, gnorm=2.566, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12463
2021-06-18 22:06:40 | INFO | train_inner | epoch 001: 1125 / 3002 loss=3.123, ppl=8.71, wps=5927.1, ups=0.09, wpb=64817, bsz=128, num_updates=1111, lr=9.99991e-05, gnorm=2.615, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12474
2021-06-18 22:06:51 | INFO | train_inner | epoch 001: 1126 / 3002 loss=3.099, ppl=8.57, wps=5901.9, ups=0.09, wpb=64885, bsz=128, num_updates=1112, lr=9.99991e-05, gnorm=2.596, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12485
2021-06-18 22:07:02 | INFO | train_inner | epoch 001: 1127 / 3002 loss=3.085, ppl=8.48, wps=5902.6, ups=0.09, wpb=64866, bsz=128, num_updates=1113, lr=9.99991e-05, gnorm=2.509, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12496
2021-06-18 22:07:13 | INFO | train_inner | epoch 001: 1128 / 3002 loss=3.253, ppl=9.54, wps=5991.7, ups=0.09, wpb=64824, bsz=128, num_updates=1114, lr=9.99991e-05, gnorm=2.69, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12507
2021-06-18 22:07:24 | INFO | train_inner | epoch 001: 1129 / 3002 loss=3.113, ppl=8.65, wps=5999.4, ups=0.09, wpb=64845, bsz=128, num_updates=1115, lr=9.99991e-05, gnorm=2.535, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12518
2021-06-18 22:07:35 | INFO | train_inner | epoch 001: 1130 / 3002 loss=2.949, ppl=7.72, wps=5899.9, ups=0.09, wpb=64944, bsz=128, num_updates=1116, lr=9.99991e-05, gnorm=2.591, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12529
2021-06-18 22:07:46 | INFO | train_inner | epoch 001: 1131 / 3002 loss=3.134, ppl=8.78, wps=5789.5, ups=0.09, wpb=64797, bsz=128, num_updates=1117, lr=9.99991e-05, gnorm=2.64, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12540
2021-06-18 22:07:57 | INFO | train_inner | epoch 001: 1132 / 3002 loss=3.186, ppl=9.1, wps=5924.2, ups=0.09, wpb=64920, bsz=128, num_updates=1118, lr=9.99991e-05, gnorm=2.609, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12551
2021-06-18 22:08:08 | INFO | train_inner | epoch 001: 1133 / 3002 loss=2.999, ppl=8, wps=5813.4, ups=0.09, wpb=64841, bsz=128, num_updates=1119, lr=9.9999e-05, gnorm=3.685, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12562
2021-06-18 22:08:19 | INFO | train_inner | epoch 001: 1134 / 3002 loss=3.025, ppl=8.14, wps=5859.4, ups=0.09, wpb=64805, bsz=128, num_updates=1120, lr=9.9999e-05, gnorm=2.426, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12573
2021-06-18 22:08:30 | INFO | train_inner | epoch 001: 1135 / 3002 loss=3.213, ppl=9.27, wps=5872.5, ups=0.09, wpb=64884, bsz=128, num_updates=1121, lr=9.9999e-05, gnorm=2.59, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12584
2021-06-18 22:08:41 | INFO | train_inner | epoch 001: 1136 / 3002 loss=3.129, ppl=8.75, wps=5920.5, ups=0.09, wpb=64830, bsz=128, num_updates=1122, lr=9.9999e-05, gnorm=2.605, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12595
2021-06-18 22:08:52 | INFO | train_inner | epoch 001: 1137 / 3002 loss=3.17, ppl=9, wps=5689.7, ups=0.09, wpb=64760, bsz=128, num_updates=1123, lr=9.9999e-05, gnorm=2.645, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12607
2021-06-18 22:09:04 | INFO | train_inner | epoch 001: 1138 / 3002 loss=3.21, ppl=9.25, wps=5754.2, ups=0.09, wpb=64738, bsz=128, num_updates=1124, lr=9.9999e-05, gnorm=3.132, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12618
2021-06-18 22:09:15 | INFO | train_inner | epoch 001: 1139 / 3002 loss=3.394, ppl=10.51, wps=5837.4, ups=0.09, wpb=64800, bsz=128, num_updates=1125, lr=9.9999e-05, gnorm=2.569, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12629
2021-06-18 22:09:26 | INFO | train_inner | epoch 001: 1140 / 3002 loss=2.882, ppl=7.37, wps=5858.5, ups=0.09, wpb=64858, bsz=128, num_updates=1126, lr=9.9999e-05, gnorm=3.107, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12640
2021-06-18 22:09:37 | INFO | train_inner | epoch 001: 1141 / 3002 loss=3.251, ppl=9.52, wps=6019.3, ups=0.09, wpb=64902, bsz=128, num_updates=1127, lr=9.9999e-05, gnorm=2.641, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12651
2021-06-18 22:09:48 | INFO | train_inner | epoch 001: 1142 / 3002 loss=3.131, ppl=8.76, wps=5873.2, ups=0.09, wpb=64770, bsz=128, num_updates=1128, lr=9.9999e-05, gnorm=2.591, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12662
2021-06-18 22:09:59 | INFO | train_inner | epoch 001: 1143 / 3002 loss=3.088, ppl=8.5, wps=5915.6, ups=0.09, wpb=64785, bsz=128, num_updates=1129, lr=9.9999e-05, gnorm=2.617, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12673
2021-06-18 22:10:10 | INFO | train_inner | epoch 001: 1144 / 3002 loss=3.223, ppl=9.33, wps=5860.3, ups=0.09, wpb=64795, bsz=128, num_updates=1130, lr=9.9999e-05, gnorm=2.644, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12684
2021-06-18 22:10:21 | INFO | train_inner | epoch 001: 1145 / 3002 loss=3.204, ppl=9.21, wps=5951.1, ups=0.09, wpb=64830, bsz=128, num_updates=1131, lr=9.9999e-05, gnorm=2.613, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12695
2021-06-18 22:10:32 | INFO | train_inner | epoch 001: 1146 / 3002 loss=3.25, ppl=9.51, wps=5876, ups=0.09, wpb=64958, bsz=128, num_updates=1132, lr=9.99989e-05, gnorm=2.617, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12706
2021-06-18 22:10:43 | INFO | train_inner | epoch 001: 1147 / 3002 loss=3.099, ppl=8.57, wps=5841.6, ups=0.09, wpb=64808, bsz=128, num_updates=1133, lr=9.99989e-05, gnorm=2.603, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12717
2021-06-18 22:10:54 | INFO | train_inner | epoch 001: 1148 / 3002 loss=3.11, ppl=8.63, wps=5831.2, ups=0.09, wpb=64871, bsz=128, num_updates=1134, lr=9.99989e-05, gnorm=2.777, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12728
2021-06-18 22:11:05 | INFO | train_inner | epoch 001: 1149 / 3002 loss=3.098, ppl=8.56, wps=5839.9, ups=0.09, wpb=64753, bsz=128, num_updates=1135, lr=9.99989e-05, gnorm=2.604, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12739
2021-06-18 22:11:16 | INFO | train_inner | epoch 001: 1150 / 3002 loss=3.04, ppl=8.23, wps=5889, ups=0.09, wpb=64760, bsz=128, num_updates=1136, lr=9.99989e-05, gnorm=2.698, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12750
2021-06-18 22:11:27 | INFO | train_inner | epoch 001: 1151 / 3002 loss=3.03, ppl=8.17, wps=5912.9, ups=0.09, wpb=64848, bsz=128, num_updates=1137, lr=9.99989e-05, gnorm=2.748, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12761
2021-06-18 22:11:38 | INFO | train_inner | epoch 001: 1152 / 3002 loss=3.11, ppl=8.63, wps=5866.5, ups=0.09, wpb=64830, bsz=128, num_updates=1138, lr=9.99989e-05, gnorm=2.976, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12772
2021-06-18 22:11:49 | INFO | train_inner | epoch 001: 1153 / 3002 loss=3.215, ppl=9.28, wps=5985.8, ups=0.09, wpb=64834, bsz=128, num_updates=1139, lr=9.99989e-05, gnorm=2.591, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12783
2021-06-18 22:12:00 | INFO | train_inner | epoch 001: 1154 / 3002 loss=2.929, ppl=7.62, wps=5861.8, ups=0.09, wpb=64862, bsz=128, num_updates=1140, lr=9.99989e-05, gnorm=6.929, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12794
2021-06-18 22:12:11 | INFO | train_inner | epoch 001: 1155 / 3002 loss=3.12, ppl=8.69, wps=5913.8, ups=0.09, wpb=64842, bsz=128, num_updates=1141, lr=9.99989e-05, gnorm=2.84, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12805
2021-06-18 22:12:22 | INFO | train_inner | epoch 001: 1156 / 3002 loss=3.121, ppl=8.7, wps=5873.5, ups=0.09, wpb=64802, bsz=128, num_updates=1142, lr=9.99989e-05, gnorm=2.568, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12816
2021-06-18 22:12:33 | INFO | train_inner | epoch 001: 1157 / 3002 loss=3.313, ppl=9.94, wps=5840.6, ups=0.09, wpb=64754, bsz=128, num_updates=1143, lr=9.99989e-05, gnorm=2.844, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12827
2021-06-18 22:12:44 | INFO | train_inner | epoch 001: 1158 / 3002 loss=3.143, ppl=8.84, wps=5881.4, ups=0.09, wpb=64838, bsz=128, num_updates=1144, lr=9.99988e-05, gnorm=2.559, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12838
2021-06-18 22:12:55 | INFO | train_inner | epoch 001: 1159 / 3002 loss=2.942, ppl=7.69, wps=5785.3, ups=0.09, wpb=64830, bsz=128, num_updates=1145, lr=9.99988e-05, gnorm=2.556, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12849
2021-06-18 22:13:06 | INFO | train_inner | epoch 001: 1160 / 3002 loss=3.104, ppl=8.6, wps=5789.8, ups=0.09, wpb=64862, bsz=128, num_updates=1146, lr=9.99988e-05, gnorm=2.659, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12861
2021-06-18 22:13:17 | INFO | train_inner | epoch 001: 1161 / 3002 loss=3.329, ppl=10.05, wps=5913, ups=0.09, wpb=64725, bsz=128, num_updates=1147, lr=9.99988e-05, gnorm=2.818, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12872
2021-06-18 22:13:28 | INFO | train_inner | epoch 001: 1162 / 3002 loss=3.225, ppl=9.35, wps=5962.2, ups=0.09, wpb=64751, bsz=128, num_updates=1148, lr=9.99988e-05, gnorm=2.641, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12882
2021-06-18 22:13:39 | INFO | train_inner | epoch 001: 1163 / 3002 loss=3.043, ppl=8.24, wps=5837.4, ups=0.09, wpb=64846, bsz=128, num_updates=1149, lr=9.99988e-05, gnorm=3.128, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12894
2021-06-18 22:13:50 | INFO | train_inner | epoch 001: 1164 / 3002 loss=3.171, ppl=9, wps=5906.4, ups=0.09, wpb=64843, bsz=128, num_updates=1150, lr=9.99988e-05, gnorm=2.785, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12905
2021-06-18 22:14:01 | INFO | train_inner | epoch 001: 1165 / 3002 loss=3.12, ppl=8.69, wps=5884, ups=0.09, wpb=64703, bsz=128, num_updates=1151, lr=9.99988e-05, gnorm=2.687, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12916
2021-06-18 22:14:12 | INFO | train_inner | epoch 001: 1166 / 3002 loss=3.082, ppl=8.47, wps=5987.2, ups=0.09, wpb=64836, bsz=128, num_updates=1152, lr=9.99988e-05, gnorm=2.889, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12926
2021-06-18 22:14:23 | INFO | train_inner | epoch 001: 1167 / 3002 loss=3.111, ppl=8.64, wps=5966.8, ups=0.09, wpb=64852, bsz=128, num_updates=1153, lr=9.99988e-05, gnorm=2.859, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12937
2021-06-18 22:14:34 | INFO | train_inner | epoch 001: 1168 / 3002 loss=3.043, ppl=8.24, wps=5835.4, ups=0.09, wpb=64755, bsz=128, num_updates=1154, lr=9.99988e-05, gnorm=2.593, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12948
2021-06-18 22:14:45 | INFO | train_inner | epoch 001: 1169 / 3002 loss=3.26, ppl=9.58, wps=5781.5, ups=0.09, wpb=64826, bsz=128, num_updates=1155, lr=9.99988e-05, gnorm=2.629, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12960
2021-06-18 22:14:56 | INFO | train_inner | epoch 001: 1170 / 3002 loss=3.154, ppl=8.9, wps=5838.4, ups=0.09, wpb=64764, bsz=128, num_updates=1156, lr=9.99988e-05, gnorm=6.202, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12971
2021-06-18 22:15:07 | INFO | train_inner | epoch 001: 1171 / 3002 loss=3.098, ppl=8.56, wps=5914.8, ups=0.09, wpb=64857, bsz=128, num_updates=1157, lr=9.99987e-05, gnorm=2.665, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=12982
2021-06-18 22:15:18 | INFO | train_inner | epoch 001: 1172 / 3002 loss=2.997, ppl=7.98, wps=5912.4, ups=0.09, wpb=64875, bsz=128, num_updates=1158, lr=9.99987e-05, gnorm=2.66, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=12993
2021-06-18 22:15:29 | INFO | train_inner | epoch 001: 1173 / 3002 loss=2.981, ppl=7.9, wps=5939.8, ups=0.09, wpb=64877, bsz=128, num_updates=1159, lr=9.99987e-05, gnorm=2.563, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13004
2021-06-18 22:15:40 | INFO | train_inner | epoch 001: 1174 / 3002 loss=2.985, ppl=7.92, wps=5838.4, ups=0.09, wpb=64819, bsz=128, num_updates=1160, lr=9.99987e-05, gnorm=2.483, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13015
2021-06-18 22:15:51 | INFO | train_inner | epoch 001: 1175 / 3002 loss=3.05, ppl=8.28, wps=6003, ups=0.09, wpb=64771, bsz=128, num_updates=1161, lr=9.99987e-05, gnorm=2.674, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13025
2021-06-18 22:16:02 | INFO | train_inner | epoch 001: 1176 / 3002 loss=3.048, ppl=8.27, wps=5921.4, ups=0.09, wpb=64885, bsz=128, num_updates=1162, lr=9.99987e-05, gnorm=2.658, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13036
2021-06-18 22:16:13 | INFO | train_inner | epoch 001: 1177 / 3002 loss=3.236, ppl=9.42, wps=5720.7, ups=0.09, wpb=64863, bsz=128, num_updates=1163, lr=9.99987e-05, gnorm=2.71, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13048
2021-06-18 22:16:24 | INFO | train_inner | epoch 001: 1178 / 3002 loss=3.165, ppl=8.97, wps=5851.7, ups=0.09, wpb=64861, bsz=128, num_updates=1164, lr=9.99987e-05, gnorm=2.669, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13059
2021-06-18 22:16:36 | INFO | train_inner | epoch 001: 1179 / 3002 loss=3.1, ppl=8.58, wps=5824.4, ups=0.09, wpb=64833, bsz=128, num_updates=1165, lr=9.99987e-05, gnorm=2.525, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13070
2021-06-18 22:16:46 | INFO | train_inner | epoch 001: 1180 / 3002 loss=3.067, ppl=8.38, wps=6028, ups=0.09, wpb=64938, bsz=128, num_updates=1166, lr=9.99987e-05, gnorm=2.591, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13081
2021-06-18 22:16:57 | INFO | train_inner | epoch 001: 1181 / 3002 loss=3.251, ppl=9.52, wps=5876.4, ups=0.09, wpb=64875, bsz=128, num_updates=1167, lr=9.99987e-05, gnorm=2.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13092
2021-06-18 22:17:08 | INFO | train_inner | epoch 001: 1182 / 3002 loss=2.977, ppl=7.87, wps=6009.7, ups=0.09, wpb=64900, bsz=128, num_updates=1168, lr=9.99987e-05, gnorm=2.514, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13103
2021-06-18 22:17:19 | INFO | train_inner | epoch 001: 1183 / 3002 loss=3.1, ppl=8.57, wps=5836.8, ups=0.09, wpb=64846, bsz=128, num_updates=1169, lr=9.99986e-05, gnorm=2.642, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13114
2021-06-18 22:17:30 | INFO | train_inner | epoch 001: 1184 / 3002 loss=3.067, ppl=8.38, wps=5871.9, ups=0.09, wpb=64790, bsz=128, num_updates=1170, lr=9.99986e-05, gnorm=2.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13125
2021-06-18 22:17:41 | INFO | train_inner | epoch 001: 1185 / 3002 loss=3.048, ppl=8.27, wps=5971.4, ups=0.09, wpb=64810, bsz=128, num_updates=1171, lr=9.99986e-05, gnorm=2.64, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13136
2021-06-18 22:17:52 | INFO | train_inner | epoch 001: 1186 / 3002 loss=3.059, ppl=8.34, wps=5925.1, ups=0.09, wpb=64785, bsz=128, num_updates=1172, lr=9.99986e-05, gnorm=2.586, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13146
2021-06-18 22:18:03 | INFO | train_inner | epoch 001: 1187 / 3002 loss=3.01, ppl=8.06, wps=5763.1, ups=0.09, wpb=64828, bsz=128, num_updates=1173, lr=9.99986e-05, gnorm=2.567, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13158
2021-06-18 22:18:15 | INFO | train_inner | epoch 001: 1188 / 3002 loss=3.072, ppl=8.41, wps=5763.8, ups=0.09, wpb=64814, bsz=128, num_updates=1174, lr=9.99986e-05, gnorm=2.499, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13169
2021-06-18 22:18:26 | INFO | train_inner | epoch 001: 1189 / 3002 loss=3.175, ppl=9.03, wps=5795.2, ups=0.09, wpb=64811, bsz=128, num_updates=1175, lr=9.99986e-05, gnorm=2.538, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13180
2021-06-18 22:18:37 | INFO | train_inner | epoch 001: 1190 / 3002 loss=2.988, ppl=7.93, wps=5980.4, ups=0.09, wpb=64852, bsz=128, num_updates=1176, lr=9.99986e-05, gnorm=2.563, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13191
2021-06-18 22:18:48 | INFO | train_inner | epoch 001: 1191 / 3002 loss=3.109, ppl=8.63, wps=5797.7, ups=0.09, wpb=64846, bsz=128, num_updates=1177, lr=9.99986e-05, gnorm=2.499, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13202
2021-06-18 22:18:59 | INFO | train_inner | epoch 001: 1192 / 3002 loss=2.939, ppl=7.67, wps=5759.5, ups=0.09, wpb=64756, bsz=128, num_updates=1178, lr=9.99986e-05, gnorm=2.53, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13213
2021-06-18 22:19:10 | INFO | train_inner | epoch 001: 1193 / 3002 loss=3.049, ppl=8.28, wps=5971.6, ups=0.09, wpb=64876, bsz=128, num_updates=1179, lr=9.99986e-05, gnorm=2.626, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13224
2021-06-18 22:19:21 | INFO | train_inner | epoch 001: 1194 / 3002 loss=2.946, ppl=7.71, wps=5841.5, ups=0.09, wpb=64837, bsz=128, num_updates=1180, lr=9.99986e-05, gnorm=2.566, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13235
2021-06-18 22:19:32 | INFO | train_inner | epoch 001: 1195 / 3002 loss=3.108, ppl=8.62, wps=5950.3, ups=0.09, wpb=64777, bsz=128, num_updates=1181, lr=9.99986e-05, gnorm=4.18, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13246
2021-06-18 22:19:43 | INFO | train_inner | epoch 001: 1196 / 3002 loss=3.18, ppl=9.06, wps=5891, ups=0.09, wpb=64807, bsz=128, num_updates=1182, lr=9.99985e-05, gnorm=2.596, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13257
2021-06-18 22:19:54 | INFO | train_inner | epoch 001: 1197 / 3002 loss=3.074, ppl=8.42, wps=5827.3, ups=0.09, wpb=64748, bsz=128, num_updates=1183, lr=9.99985e-05, gnorm=6.758, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13268
2021-06-18 22:20:05 | INFO | train_inner | epoch 001: 1198 / 3002 loss=3.239, ppl=9.44, wps=5854.2, ups=0.09, wpb=64812, bsz=128, num_updates=1184, lr=9.99985e-05, gnorm=2.896, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13279
2021-06-18 22:20:16 | INFO | train_inner | epoch 001: 1199 / 3002 loss=3, ppl=8, wps=5826.1, ups=0.09, wpb=64871, bsz=128, num_updates=1185, lr=9.99985e-05, gnorm=2.635, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13291
2021-06-18 22:20:27 | INFO | train_inner | epoch 001: 1200 / 3002 loss=2.875, ppl=7.34, wps=5891.1, ups=0.09, wpb=64906, bsz=128, num_updates=1186, lr=9.99985e-05, gnorm=2.524, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13302
2021-06-18 22:20:38 | INFO | train_inner | epoch 001: 1201 / 3002 loss=3.153, ppl=8.9, wps=5806.4, ups=0.09, wpb=64898, bsz=128, num_updates=1187, lr=9.99985e-05, gnorm=2.873, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13313
2021-06-18 22:20:50 | INFO | train_inner | epoch 001: 1202 / 3002 loss=3.087, ppl=8.49, wps=5862.1, ups=0.09, wpb=64859, bsz=128, num_updates=1188, lr=9.99985e-05, gnorm=2.905, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13324
2021-06-18 22:21:00 | INFO | train_inner | epoch 001: 1203 / 3002 loss=3.005, ppl=8.03, wps=5973.9, ups=0.09, wpb=64871, bsz=128, num_updates=1189, lr=9.99985e-05, gnorm=2.901, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13335
2021-06-18 22:21:11 | INFO | train_inner | epoch 001: 1204 / 3002 loss=3.124, ppl=8.72, wps=5832.4, ups=0.09, wpb=64822, bsz=128, num_updates=1190, lr=9.99985e-05, gnorm=2.85, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13346
2021-06-18 22:21:23 | INFO | train_inner | epoch 001: 1205 / 3002 loss=3.107, ppl=8.62, wps=5798, ups=0.09, wpb=64904, bsz=128, num_updates=1191, lr=9.99985e-05, gnorm=2.683, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13357
2021-06-18 22:21:34 | INFO | train_inner | epoch 001: 1206 / 3002 loss=3.059, ppl=8.33, wps=5889.3, ups=0.09, wpb=64742, bsz=128, num_updates=1192, lr=9.99985e-05, gnorm=2.609, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13368
2021-06-18 22:21:45 | INFO | train_inner | epoch 001: 1207 / 3002 loss=3.066, ppl=8.37, wps=5848.8, ups=0.09, wpb=64872, bsz=128, num_updates=1193, lr=9.99985e-05, gnorm=2.845, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13379
2021-06-18 22:21:56 | INFO | train_inner | epoch 001: 1208 / 3002 loss=3.162, ppl=8.95, wps=5903, ups=0.09, wpb=64800, bsz=128, num_updates=1194, lr=9.99984e-05, gnorm=2.533, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13390
2021-06-18 22:22:07 | INFO | train_inner | epoch 001: 1209 / 3002 loss=3.047, ppl=8.26, wps=5883.9, ups=0.09, wpb=64805, bsz=128, num_updates=1195, lr=9.99984e-05, gnorm=2.675, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13401
2021-06-18 22:22:18 | INFO | train_inner | epoch 001: 1210 / 3002 loss=2.942, ppl=7.68, wps=5852.2, ups=0.09, wpb=64860, bsz=128, num_updates=1196, lr=9.99984e-05, gnorm=2.602, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13412
2021-06-18 22:22:29 | INFO | train_inner | epoch 001: 1211 / 3002 loss=3.023, ppl=8.13, wps=5819.8, ups=0.09, wpb=64794, bsz=128, num_updates=1197, lr=9.99984e-05, gnorm=2.642, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13423
2021-06-18 22:22:40 | INFO | train_inner | epoch 001: 1212 / 3002 loss=3.116, ppl=8.67, wps=5829.5, ups=0.09, wpb=64866, bsz=128, num_updates=1198, lr=9.99984e-05, gnorm=2.685, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13434
2021-06-18 22:22:51 | INFO | train_inner | epoch 001: 1213 / 3002 loss=3.029, ppl=8.16, wps=6028.2, ups=0.09, wpb=64872, bsz=128, num_updates=1199, lr=9.99984e-05, gnorm=2.597, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13445
2021-06-18 22:23:02 | INFO | train_inner | epoch 001: 1214 / 3002 loss=3.048, ppl=8.27, wps=5888.4, ups=0.09, wpb=64809, bsz=128, num_updates=1200, lr=9.99984e-05, gnorm=2.816, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13456
2021-06-18 22:23:13 | INFO | train_inner | epoch 001: 1215 / 3002 loss=3.338, ppl=10.11, wps=5853.1, ups=0.09, wpb=64905, bsz=128, num_updates=1201, lr=9.99984e-05, gnorm=2.648, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13467
2021-06-18 22:23:24 | INFO | train_inner | epoch 001: 1216 / 3002 loss=3.118, ppl=8.68, wps=5859.9, ups=0.09, wpb=64758, bsz=128, num_updates=1202, lr=9.99984e-05, gnorm=5.829, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13478
2021-06-18 22:23:35 | INFO | train_inner | epoch 001: 1217 / 3002 loss=3.316, ppl=9.96, wps=5816.5, ups=0.09, wpb=64747, bsz=128, num_updates=1203, lr=9.99984e-05, gnorm=2.617, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13489
2021-06-18 22:23:46 | INFO | train_inner | epoch 001: 1218 / 3002 loss=3, ppl=8, wps=5917.2, ups=0.09, wpb=64807, bsz=128, num_updates=1204, lr=9.99984e-05, gnorm=2.591, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13500
2021-06-18 22:23:57 | INFO | train_inner | epoch 001: 1219 / 3002 loss=3.063, ppl=8.36, wps=5882.7, ups=0.09, wpb=64812, bsz=128, num_updates=1205, lr=9.99984e-05, gnorm=2.705, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13511
2021-06-18 22:24:08 | INFO | train_inner | epoch 001: 1220 / 3002 loss=3.11, ppl=8.63, wps=5940, ups=0.09, wpb=64895, bsz=128, num_updates=1206, lr=9.99984e-05, gnorm=2.466, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13522
2021-06-18 22:24:19 | INFO | train_inner | epoch 001: 1221 / 3002 loss=3.148, ppl=8.86, wps=5856.1, ups=0.09, wpb=64818, bsz=128, num_updates=1207, lr=9.99983e-05, gnorm=2.641, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13533
2021-06-18 22:24:30 | INFO | train_inner | epoch 001: 1222 / 3002 loss=3.037, ppl=8.21, wps=5966.3, ups=0.09, wpb=64912, bsz=128, num_updates=1208, lr=9.99983e-05, gnorm=2.778, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13544
2021-06-18 22:24:41 | INFO | train_inner | epoch 001: 1223 / 3002 loss=3.075, ppl=8.43, wps=5874.3, ups=0.09, wpb=64807, bsz=128, num_updates=1209, lr=9.99983e-05, gnorm=2.568, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13555
2021-06-18 22:24:52 | INFO | train_inner | epoch 001: 1224 / 3002 loss=3.015, ppl=8.08, wps=5890.4, ups=0.09, wpb=64904, bsz=128, num_updates=1210, lr=9.99983e-05, gnorm=4.254, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13566
2021-06-18 22:25:03 | INFO | train_inner | epoch 001: 1225 / 3002 loss=3.134, ppl=8.78, wps=5805.6, ups=0.09, wpb=64851, bsz=128, num_updates=1211, lr=9.99983e-05, gnorm=2.853, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13578
2021-06-18 22:25:14 | INFO | train_inner | epoch 001: 1226 / 3002 loss=3.186, ppl=9.1, wps=5975.3, ups=0.09, wpb=64805, bsz=128, num_updates=1212, lr=9.99983e-05, gnorm=3.364, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13588
2021-06-18 22:25:25 | INFO | train_inner | epoch 001: 1227 / 3002 loss=3.008, ppl=8.05, wps=5842.8, ups=0.09, wpb=64857, bsz=128, num_updates=1213, lr=9.99983e-05, gnorm=2.797, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13600
2021-06-18 22:25:36 | INFO | train_inner | epoch 001: 1228 / 3002 loss=3.039, ppl=8.22, wps=5748.8, ups=0.09, wpb=64797, bsz=128, num_updates=1214, lr=9.99983e-05, gnorm=2.647, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13611
2021-06-18 22:25:47 | INFO | train_inner | epoch 001: 1229 / 3002 loss=3.034, ppl=8.19, wps=5879.2, ups=0.09, wpb=64881, bsz=128, num_updates=1215, lr=9.99983e-05, gnorm=5.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13622
2021-06-18 22:25:59 | INFO | train_inner | epoch 001: 1230 / 3002 loss=2.985, ppl=7.92, wps=5805.9, ups=0.09, wpb=64844, bsz=128, num_updates=1216, lr=9.99983e-05, gnorm=2.706, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13633
2021-06-18 22:26:10 | INFO | train_inner | epoch 001: 1231 / 3002 loss=3.165, ppl=8.97, wps=5815.8, ups=0.09, wpb=64763, bsz=128, num_updates=1217, lr=9.99983e-05, gnorm=2.653, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13644
2021-06-18 22:26:21 | INFO | train_inner | epoch 001: 1232 / 3002 loss=3.086, ppl=8.49, wps=5802.1, ups=0.09, wpb=64858, bsz=128, num_updates=1218, lr=9.99983e-05, gnorm=3.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13655
2021-06-18 22:26:32 | INFO | train_inner | epoch 001: 1233 / 3002 loss=3.023, ppl=8.13, wps=5866.3, ups=0.09, wpb=64859, bsz=128, num_updates=1219, lr=9.99982e-05, gnorm=3.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13666
2021-06-18 22:26:43 | INFO | train_inner | epoch 001: 1234 / 3002 loss=3.092, ppl=8.53, wps=5965, ups=0.09, wpb=64831, bsz=128, num_updates=1220, lr=9.99982e-05, gnorm=2.63, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13677
2021-06-18 22:26:54 | INFO | train_inner | epoch 001: 1235 / 3002 loss=3.25, ppl=9.51, wps=5943.9, ups=0.09, wpb=64845, bsz=128, num_updates=1221, lr=9.99982e-05, gnorm=2.95, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13688
2021-06-18 22:27:05 | INFO | train_inner | epoch 001: 1236 / 3002 loss=3.137, ppl=8.8, wps=5866.6, ups=0.09, wpb=64750, bsz=128, num_updates=1222, lr=9.99982e-05, gnorm=2.773, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13699
2021-06-18 22:27:16 | INFO | train_inner | epoch 001: 1237 / 3002 loss=3.056, ppl=8.32, wps=5785.6, ups=0.09, wpb=64875, bsz=128, num_updates=1223, lr=9.99982e-05, gnorm=2.783, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13710
2021-06-18 22:27:27 | INFO | train_inner | epoch 001: 1238 / 3002 loss=3.156, ppl=8.91, wps=5942.1, ups=0.09, wpb=64833, bsz=128, num_updates=1224, lr=9.99982e-05, gnorm=2.667, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13721
2021-06-18 22:27:38 | INFO | train_inner | epoch 001: 1239 / 3002 loss=2.976, ppl=7.87, wps=6069.4, ups=0.09, wpb=64858, bsz=128, num_updates=1225, lr=9.99982e-05, gnorm=2.527, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13732
2021-06-18 22:27:48 | INFO | train_inner | epoch 001: 1240 / 3002 loss=3.04, ppl=8.23, wps=5982.8, ups=0.09, wpb=64816, bsz=128, num_updates=1226, lr=9.99982e-05, gnorm=2.693, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=13743
2021-06-18 22:28:00 | INFO | train_inner | epoch 001: 1241 / 3002 loss=3.128, ppl=8.74, wps=5817.8, ups=0.09, wpb=64886, bsz=128, num_updates=1227, lr=9.99982e-05, gnorm=2.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13754
2021-06-18 22:28:11 | INFO | train_inner | epoch 001: 1242 / 3002 loss=3.043, ppl=8.24, wps=5887.8, ups=0.09, wpb=64875, bsz=128, num_updates=1228, lr=9.99982e-05, gnorm=2.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13765
2021-06-18 22:28:22 | INFO | train_inner | epoch 001: 1243 / 3002 loss=2.805, ppl=6.99, wps=5844, ups=0.09, wpb=64873, bsz=128, num_updates=1229, lr=9.99982e-05, gnorm=4.171, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13776
2021-06-18 22:28:33 | INFO | train_inner | epoch 001: 1244 / 3002 loss=2.923, ppl=7.59, wps=5807.1, ups=0.09, wpb=64809, bsz=128, num_updates=1230, lr=9.99982e-05, gnorm=5.66, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13787
2021-06-18 22:28:44 | INFO | train_inner | epoch 001: 1245 / 3002 loss=3.057, ppl=8.32, wps=5838.8, ups=0.09, wpb=64794, bsz=128, num_updates=1231, lr=9.99982e-05, gnorm=2.882, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13798
2021-06-18 22:28:55 | INFO | train_inner | epoch 001: 1246 / 3002 loss=3.066, ppl=8.37, wps=5862.1, ups=0.09, wpb=64822, bsz=128, num_updates=1232, lr=9.99981e-05, gnorm=2.865, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=13809
2021-06-18 22:29:06 | INFO | train_inner | epoch 001: 1247 / 3002 loss=3.213, ppl=9.27, wps=5905.1, ups=0.09, wpb=64802, bsz=128, num_updates=1233, lr=9.99981e-05, gnorm=2.631, loss_scale=1, train_wall=10, gb_free=2.8, wall=13820
2021-06-18 22:29:17 | INFO | train_inner | epoch 001: 1248 / 3002 loss=3.084, ppl=8.48, wps=5757.9, ups=0.09, wpb=64854, bsz=128, num_updates=1234, lr=9.99981e-05, gnorm=2.679, loss_scale=1, train_wall=11, gb_free=2.8, wall=13832
2021-06-18 22:29:28 | INFO | train_inner | epoch 001: 1249 / 3002 loss=3.062, ppl=8.35, wps=5875.7, ups=0.09, wpb=64782, bsz=128, num_updates=1235, lr=9.99981e-05, gnorm=2.742, loss_scale=1, train_wall=11, gb_free=2.8, wall=13843
2021-06-18 22:29:39 | INFO | train_inner | epoch 001: 1250 / 3002 loss=3.225, ppl=9.35, wps=5860.1, ups=0.09, wpb=64876, bsz=128, num_updates=1236, lr=9.99981e-05, gnorm=3.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=13854
2021-06-18 22:29:50 | INFO | train_inner | epoch 001: 1251 / 3002 loss=3.072, ppl=8.41, wps=5917.3, ups=0.09, wpb=64780, bsz=128, num_updates=1237, lr=9.99981e-05, gnorm=2.85, loss_scale=1, train_wall=11, gb_free=2.8, wall=13865
2021-06-18 22:30:01 | INFO | train_inner | epoch 001: 1252 / 3002 loss=3.209, ppl=9.25, wps=5944.4, ups=0.09, wpb=64832, bsz=128, num_updates=1238, lr=9.99981e-05, gnorm=2.713, loss_scale=1, train_wall=10, gb_free=2.8, wall=13876
2021-06-18 22:30:12 | INFO | train_inner | epoch 001: 1253 / 3002 loss=3.053, ppl=8.3, wps=5857.7, ups=0.09, wpb=64940, bsz=128, num_updates=1239, lr=9.99981e-05, gnorm=2.658, loss_scale=1, train_wall=11, gb_free=2.8, wall=13887
2021-06-18 22:30:23 | INFO | train_inner | epoch 001: 1254 / 3002 loss=3.051, ppl=8.29, wps=5840.5, ups=0.09, wpb=64811, bsz=128, num_updates=1240, lr=9.99981e-05, gnorm=2.574, loss_scale=1, train_wall=11, gb_free=2.8, wall=13898
2021-06-18 22:30:34 | INFO | train_inner | epoch 001: 1255 / 3002 loss=3.125, ppl=8.72, wps=5886.3, ups=0.09, wpb=64835, bsz=128, num_updates=1241, lr=9.99981e-05, gnorm=2.705, loss_scale=1, train_wall=11, gb_free=2.8, wall=13909
2021-06-18 22:30:45 | INFO | train_inner | epoch 001: 1256 / 3002 loss=2.967, ppl=7.82, wps=5924.8, ups=0.09, wpb=64832, bsz=128, num_updates=1242, lr=9.99981e-05, gnorm=2.558, loss_scale=1, train_wall=11, gb_free=2.8, wall=13920
2021-06-18 22:30:56 | INFO | train_inner | epoch 001: 1257 / 3002 loss=2.963, ppl=7.8, wps=5848.6, ups=0.09, wpb=64838, bsz=128, num_updates=1243, lr=9.99981e-05, gnorm=2.553, loss_scale=1, train_wall=11, gb_free=2.8, wall=13931
2021-06-18 22:31:07 | INFO | train_inner | epoch 001: 1258 / 3002 loss=3.045, ppl=8.25, wps=5974.7, ups=0.09, wpb=64853, bsz=128, num_updates=1244, lr=9.9998e-05, gnorm=2.538, loss_scale=1, train_wall=10, gb_free=2.8, wall=13942
2021-06-18 22:31:18 | INFO | train_inner | epoch 001: 1259 / 3002 loss=3.162, ppl=8.95, wps=5896.2, ups=0.09, wpb=64878, bsz=128, num_updates=1245, lr=9.9998e-05, gnorm=4.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=13953
2021-06-18 22:31:30 | INFO | train_inner | epoch 001: 1260 / 3002 loss=3.177, ppl=9.04, wps=5760.2, ups=0.09, wpb=64797, bsz=128, num_updates=1246, lr=9.9998e-05, gnorm=2.672, loss_scale=1, train_wall=11, gb_free=2.8, wall=13964
2021-06-18 22:31:41 | INFO | train_inner | epoch 001: 1261 / 3002 loss=3.101, ppl=8.58, wps=5838.6, ups=0.09, wpb=64832, bsz=128, num_updates=1247, lr=9.9998e-05, gnorm=2.715, loss_scale=1, train_wall=11, gb_free=2.8, wall=13975
2021-06-18 22:31:52 | INFO | train_inner | epoch 001: 1262 / 3002 loss=2.835, ppl=7.14, wps=5747.3, ups=0.09, wpb=64854, bsz=128, num_updates=1248, lr=9.9998e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=13986
2021-06-18 22:32:03 | INFO | train_inner | epoch 001: 1263 / 3002 loss=3.138, ppl=8.81, wps=5801.9, ups=0.09, wpb=64809, bsz=128, num_updates=1249, lr=9.9998e-05, gnorm=2.544, loss_scale=1, train_wall=11, gb_free=2.8, wall=13997
2021-06-18 22:32:14 | INFO | train_inner | epoch 001: 1264 / 3002 loss=3.011, ppl=8.06, wps=5828.6, ups=0.09, wpb=64906, bsz=128, num_updates=1250, lr=9.9998e-05, gnorm=2.583, loss_scale=1, train_wall=11, gb_free=2.8, wall=14009
2021-06-18 22:32:25 | INFO | train_inner | epoch 001: 1265 / 3002 loss=2.905, ppl=7.49, wps=5906.3, ups=0.09, wpb=64832, bsz=128, num_updates=1251, lr=9.9998e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=14020
2021-06-18 22:32:36 | INFO | train_inner | epoch 001: 1266 / 3002 loss=3.084, ppl=8.48, wps=5827.4, ups=0.09, wpb=64797, bsz=128, num_updates=1252, lr=9.9998e-05, gnorm=3.686, loss_scale=1, train_wall=11, gb_free=2.8, wall=14031
2021-06-18 22:32:48 | INFO | train_inner | epoch 001: 1267 / 3002 loss=3.169, ppl=9, wps=5743.3, ups=0.09, wpb=64748, bsz=128, num_updates=1253, lr=9.9998e-05, gnorm=2.53, loss_scale=1, train_wall=11, gb_free=2.8, wall=14042
2021-06-18 22:32:59 | INFO | train_inner | epoch 001: 1268 / 3002 loss=3.028, ppl=8.16, wps=5885.5, ups=0.09, wpb=64831, bsz=128, num_updates=1254, lr=9.9998e-05, gnorm=2.618, loss_scale=1, train_wall=11, gb_free=2.8, wall=14053
2021-06-18 22:33:10 | INFO | train_inner | epoch 001: 1269 / 3002 loss=3.125, ppl=8.72, wps=5829.7, ups=0.09, wpb=64832, bsz=128, num_updates=1255, lr=9.9998e-05, gnorm=2.527, loss_scale=1, train_wall=11, gb_free=2.8, wall=14064
2021-06-18 22:33:21 | INFO | train_inner | epoch 001: 1270 / 3002 loss=2.99, ppl=7.95, wps=5930.3, ups=0.09, wpb=64897, bsz=128, num_updates=1256, lr=9.9998e-05, gnorm=2.597, loss_scale=1, train_wall=11, gb_free=2.8, wall=14075
2021-06-18 22:33:32 | INFO | train_inner | epoch 001: 1271 / 3002 loss=2.995, ppl=7.97, wps=5844.8, ups=0.09, wpb=64876, bsz=128, num_updates=1257, lr=9.99979e-05, gnorm=2.54, loss_scale=1, train_wall=11, gb_free=2.8, wall=14086
2021-06-18 22:33:43 | INFO | train_inner | epoch 001: 1272 / 3002 loss=3.064, ppl=8.36, wps=5779.7, ups=0.09, wpb=64871, bsz=128, num_updates=1258, lr=9.99979e-05, gnorm=2.739, loss_scale=1, train_wall=11, gb_free=2.8, wall=14097
2021-06-18 22:33:54 | INFO | train_inner | epoch 001: 1273 / 3002 loss=3.014, ppl=8.08, wps=5955.1, ups=0.09, wpb=64860, bsz=128, num_updates=1259, lr=9.99979e-05, gnorm=3.225, loss_scale=1, train_wall=10, gb_free=2.8, wall=14108
2021-06-18 22:34:05 | INFO | train_inner | epoch 001: 1274 / 3002 loss=3.101, ppl=8.58, wps=5858.3, ups=0.09, wpb=64786, bsz=128, num_updates=1260, lr=9.99979e-05, gnorm=2.479, loss_scale=1, train_wall=11, gb_free=2.8, wall=14119
2021-06-18 22:34:16 | INFO | train_inner | epoch 001: 1275 / 3002 loss=3.329, ppl=10.05, wps=5821.6, ups=0.09, wpb=64827, bsz=128, num_updates=1261, lr=9.99979e-05, gnorm=2.689, loss_scale=1, train_wall=11, gb_free=2.8, wall=14130
2021-06-18 22:34:27 | INFO | train_inner | epoch 001: 1276 / 3002 loss=3.067, ppl=8.38, wps=5859.2, ups=0.09, wpb=64823, bsz=128, num_updates=1262, lr=9.99979e-05, gnorm=2.575, loss_scale=1, train_wall=11, gb_free=2.8, wall=14142
2021-06-18 22:34:38 | INFO | train_inner | epoch 001: 1277 / 3002 loss=3.228, ppl=9.37, wps=5810, ups=0.09, wpb=64834, bsz=128, num_updates=1263, lr=9.99979e-05, gnorm=2.592, loss_scale=1, train_wall=11, gb_free=2.8, wall=14153
2021-06-18 22:34:49 | INFO | train_inner | epoch 001: 1278 / 3002 loss=3.027, ppl=8.15, wps=5837.6, ups=0.09, wpb=64875, bsz=128, num_updates=1264, lr=9.99979e-05, gnorm=2.615, loss_scale=1, train_wall=11, gb_free=2.8, wall=14164
2021-06-18 22:35:01 | INFO | train_inner | epoch 001: 1279 / 3002 loss=3.027, ppl=8.15, wps=5871.6, ups=0.09, wpb=64881, bsz=128, num_updates=1265, lr=9.99979e-05, gnorm=2.631, loss_scale=1, train_wall=11, gb_free=2.8, wall=14175
2021-06-18 22:35:12 | INFO | train_inner | epoch 001: 1280 / 3002 loss=2.975, ppl=7.86, wps=5809.9, ups=0.09, wpb=64852, bsz=128, num_updates=1266, lr=9.99979e-05, gnorm=2.433, loss_scale=1, train_wall=11, gb_free=2.8, wall=14186
2021-06-18 22:35:23 | INFO | train_inner | epoch 001: 1281 / 3002 loss=3.096, ppl=8.55, wps=5829.8, ups=0.09, wpb=64812, bsz=128, num_updates=1267, lr=9.99979e-05, gnorm=2.677, loss_scale=1, train_wall=11, gb_free=2.8, wall=14197
2021-06-18 22:35:34 | INFO | train_inner | epoch 001: 1282 / 3002 loss=3.168, ppl=8.99, wps=5923.8, ups=0.09, wpb=64815, bsz=128, num_updates=1268, lr=9.99979e-05, gnorm=2.635, loss_scale=1, train_wall=10, gb_free=2.8, wall=14208
2021-06-18 22:35:45 | INFO | train_inner | epoch 001: 1283 / 3002 loss=2.936, ppl=7.65, wps=5884, ups=0.09, wpb=64830, bsz=128, num_updates=1269, lr=9.99978e-05, gnorm=2.509, loss_scale=1, train_wall=11, gb_free=2.8, wall=14219
2021-06-18 22:35:56 | INFO | train_inner | epoch 001: 1284 / 3002 loss=3.009, ppl=8.05, wps=5806.9, ups=0.09, wpb=64782, bsz=128, num_updates=1270, lr=9.99978e-05, gnorm=2.542, loss_scale=1, train_wall=11, gb_free=2.8, wall=14230
2021-06-18 22:36:07 | INFO | train_inner | epoch 001: 1285 / 3002 loss=3.073, ppl=8.42, wps=5845.3, ups=0.09, wpb=64899, bsz=128, num_updates=1271, lr=9.99978e-05, gnorm=2.577, loss_scale=1, train_wall=11, gb_free=2.8, wall=14241
2021-06-18 22:36:18 | INFO | train_inner | epoch 001: 1286 / 3002 loss=3.077, ppl=8.44, wps=5913.1, ups=0.09, wpb=64793, bsz=128, num_updates=1272, lr=9.99978e-05, gnorm=2.578, loss_scale=1, train_wall=11, gb_free=2.8, wall=14252
2021-06-18 22:36:29 | INFO | train_inner | epoch 001: 1287 / 3002 loss=3.024, ppl=8.14, wps=5975.8, ups=0.09, wpb=64906, bsz=128, num_updates=1273, lr=9.99978e-05, gnorm=2.833, loss_scale=1, train_wall=10, gb_free=2.8, wall=14263
2021-06-18 22:36:40 | INFO | train_inner | epoch 001: 1288 / 3002 loss=2.868, ppl=7.3, wps=5927.4, ups=0.09, wpb=64891, bsz=128, num_updates=1274, lr=9.99978e-05, gnorm=2.757, loss_scale=1, train_wall=10, gb_free=2.8, wall=14274
2021-06-18 22:36:51 | INFO | train_inner | epoch 001: 1289 / 3002 loss=3.026, ppl=8.14, wps=5868.4, ups=0.09, wpb=64839, bsz=128, num_updates=1275, lr=9.99978e-05, gnorm=2.625, loss_scale=1, train_wall=11, gb_free=2.8, wall=14285
2021-06-18 22:37:02 | INFO | train_inner | epoch 001: 1290 / 3002 loss=3.119, ppl=8.69, wps=5979.5, ups=0.09, wpb=64776, bsz=128, num_updates=1276, lr=9.99978e-05, gnorm=4.228, loss_scale=1, train_wall=10, gb_free=2.8, wall=14296
2021-06-18 22:37:13 | INFO | train_inner | epoch 001: 1291 / 3002 loss=2.983, ppl=7.91, wps=5911.7, ups=0.09, wpb=64837, bsz=128, num_updates=1277, lr=9.99978e-05, gnorm=2.55, loss_scale=1, train_wall=11, gb_free=2.8, wall=14307
2021-06-18 22:37:24 | INFO | train_inner | epoch 001: 1292 / 3002 loss=3.089, ppl=8.51, wps=5783.7, ups=0.09, wpb=64825, bsz=128, num_updates=1278, lr=9.99978e-05, gnorm=2.597, loss_scale=1, train_wall=11, gb_free=2.8, wall=14318
2021-06-18 22:37:35 | INFO | train_inner | epoch 001: 1293 / 3002 loss=3.08, ppl=8.46, wps=5797, ups=0.09, wpb=64797, bsz=128, num_updates=1279, lr=9.99978e-05, gnorm=2.544, loss_scale=1, train_wall=11, gb_free=2.8, wall=14329
2021-06-18 22:37:46 | INFO | train_inner | epoch 001: 1294 / 3002 loss=3.033, ppl=8.19, wps=5963.8, ups=0.09, wpb=64883, bsz=128, num_updates=1280, lr=9.99978e-05, gnorm=2.748, loss_scale=1, train_wall=10, gb_free=2.8, wall=14340
2021-06-18 22:37:57 | INFO | train_inner | epoch 001: 1295 / 3002 loss=3.273, ppl=9.67, wps=5903.5, ups=0.09, wpb=64751, bsz=128, num_updates=1281, lr=9.99978e-05, gnorm=5.855, loss_scale=1, train_wall=11, gb_free=2.8, wall=14351
2021-06-18 22:38:08 | INFO | train_inner | epoch 001: 1296 / 3002 loss=2.982, ppl=7.9, wps=5753.5, ups=0.09, wpb=64867, bsz=128, num_updates=1282, lr=9.99977e-05, gnorm=2.443, loss_scale=1, train_wall=11, gb_free=2.8, wall=14362
2021-06-18 22:38:19 | INFO | train_inner | epoch 001: 1297 / 3002 loss=3.278, ppl=9.7, wps=5917.5, ups=0.09, wpb=64831, bsz=128, num_updates=1283, lr=9.99977e-05, gnorm=2.66, loss_scale=1, train_wall=11, gb_free=2.8, wall=14373
2021-06-18 22:38:30 | INFO | train_inner | epoch 001: 1298 / 3002 loss=3.061, ppl=8.35, wps=5886.6, ups=0.09, wpb=64872, bsz=128, num_updates=1284, lr=9.99977e-05, gnorm=2.804, loss_scale=1, train_wall=11, gb_free=2.8, wall=14384
2021-06-18 22:38:41 | INFO | train_inner | epoch 001: 1299 / 3002 loss=3.188, ppl=9.11, wps=5734, ups=0.09, wpb=64845, bsz=128, num_updates=1285, lr=9.99977e-05, gnorm=2.843, loss_scale=1, train_wall=11, gb_free=2.8, wall=14396
2021-06-18 22:38:53 | INFO | train_inner | epoch 001: 1300 / 3002 loss=3.039, ppl=8.22, wps=5865, ups=0.09, wpb=64851, bsz=128, num_updates=1286, lr=9.99977e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=14407
2021-06-18 22:39:04 | INFO | train_inner | epoch 001: 1301 / 3002 loss=3.085, ppl=8.49, wps=5861.8, ups=0.09, wpb=64843, bsz=128, num_updates=1287, lr=9.99977e-05, gnorm=2.973, loss_scale=1, train_wall=11, gb_free=2.8, wall=14418
2021-06-18 22:39:15 | INFO | train_inner | epoch 001: 1302 / 3002 loss=3.086, ppl=8.49, wps=5818.1, ups=0.09, wpb=64803, bsz=128, num_updates=1288, lr=9.99977e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=14429
2021-06-18 22:39:26 | INFO | train_inner | epoch 001: 1303 / 3002 loss=3.065, ppl=8.37, wps=5881.9, ups=0.09, wpb=64760, bsz=128, num_updates=1289, lr=9.99977e-05, gnorm=2.625, loss_scale=1, train_wall=11, gb_free=2.8, wall=14440
2021-06-18 22:39:37 | INFO | train_inner | epoch 001: 1304 / 3002 loss=3.007, ppl=8.04, wps=5857.7, ups=0.09, wpb=64880, bsz=128, num_updates=1290, lr=9.99977e-05, gnorm=2.7, loss_scale=1, train_wall=11, gb_free=2.8, wall=14451
2021-06-18 22:39:48 | INFO | train_inner | epoch 001: 1305 / 3002 loss=2.913, ppl=7.53, wps=5798.2, ups=0.09, wpb=64797, bsz=128, num_updates=1291, lr=9.99977e-05, gnorm=2.628, loss_scale=1, train_wall=11, gb_free=2.8, wall=14462
2021-06-18 22:39:59 | INFO | train_inner | epoch 001: 1306 / 3002 loss=2.978, ppl=7.88, wps=5848.9, ups=0.09, wpb=64843, bsz=128, num_updates=1292, lr=9.99977e-05, gnorm=2.584, loss_scale=1, train_wall=11, gb_free=2.8, wall=14473
2021-06-18 22:40:10 | INFO | train_inner | epoch 001: 1307 / 3002 loss=3.152, ppl=8.89, wps=5958.1, ups=0.09, wpb=64783, bsz=128, num_updates=1293, lr=9.99977e-05, gnorm=2.59, loss_scale=1, train_wall=10, gb_free=2.8, wall=14484
2021-06-18 22:40:21 | INFO | train_inner | epoch 001: 1308 / 3002 loss=3.051, ppl=8.29, wps=5754, ups=0.09, wpb=64824, bsz=128, num_updates=1294, lr=9.99976e-05, gnorm=2.636, loss_scale=1, train_wall=11, gb_free=2.8, wall=14496
2021-06-18 22:40:33 | INFO | train_inner | epoch 001: 1309 / 3002 loss=3.184, ppl=9.09, wps=5732, ups=0.09, wpb=64851, bsz=128, num_updates=1295, lr=9.99976e-05, gnorm=2.702, loss_scale=1, train_wall=11, gb_free=2.8, wall=14507
2021-06-18 22:40:44 | INFO | train_inner | epoch 001: 1310 / 3002 loss=3.08, ppl=8.46, wps=5830.8, ups=0.09, wpb=64804, bsz=128, num_updates=1296, lr=9.99976e-05, gnorm=2.562, loss_scale=1, train_wall=11, gb_free=2.8, wall=14518
2021-06-18 22:40:55 | INFO | train_inner | epoch 001: 1311 / 3002 loss=3.096, ppl=8.55, wps=5949.6, ups=0.09, wpb=64890, bsz=128, num_updates=1297, lr=9.99976e-05, gnorm=2.446, loss_scale=1, train_wall=10, gb_free=2.8, wall=14529
2021-06-18 22:41:05 | INFO | train_inner | epoch 001: 1312 / 3002 loss=2.957, ppl=7.76, wps=5914.9, ups=0.09, wpb=64774, bsz=128, num_updates=1298, lr=9.99976e-05, gnorm=2.627, loss_scale=1, train_wall=11, gb_free=2.8, wall=14540
2021-06-18 22:41:16 | INFO | train_inner | epoch 001: 1313 / 3002 loss=2.983, ppl=7.91, wps=5921.7, ups=0.09, wpb=64768, bsz=128, num_updates=1299, lr=9.99976e-05, gnorm=2.555, loss_scale=1, train_wall=10, gb_free=2.8, wall=14551
2021-06-18 22:41:28 | INFO | train_inner | epoch 001: 1314 / 3002 loss=3.328, ppl=10.04, wps=5837.1, ups=0.09, wpb=64793, bsz=128, num_updates=1300, lr=9.99976e-05, gnorm=2.716, loss_scale=1, train_wall=11, gb_free=2.8, wall=14562
2021-06-18 22:41:39 | INFO | train_inner | epoch 001: 1315 / 3002 loss=3.188, ppl=9.11, wps=5861.8, ups=0.09, wpb=64886, bsz=128, num_updates=1301, lr=9.99976e-05, gnorm=2.501, loss_scale=1, train_wall=11, gb_free=2.8, wall=14573
2021-06-18 22:41:49 | INFO | train_inner | epoch 001: 1316 / 3002 loss=3.005, ppl=8.03, wps=5960.7, ups=0.09, wpb=64890, bsz=128, num_updates=1302, lr=9.99976e-05, gnorm=2.484, loss_scale=1, train_wall=10, gb_free=2.8, wall=14584
2021-06-18 22:42:01 | INFO | train_inner | epoch 001: 1317 / 3002 loss=3.06, ppl=8.34, wps=5770.4, ups=0.09, wpb=64791, bsz=128, num_updates=1303, lr=9.99976e-05, gnorm=2.643, loss_scale=1, train_wall=11, gb_free=2.8, wall=14595
2021-06-18 22:42:12 | INFO | train_inner | epoch 001: 1318 / 3002 loss=3.01, ppl=8.06, wps=5933.1, ups=0.09, wpb=64793, bsz=128, num_updates=1304, lr=9.99976e-05, gnorm=2.456, loss_scale=1, train_wall=10, gb_free=2.8, wall=14606
2021-06-18 22:42:23 | INFO | train_inner | epoch 001: 1319 / 3002 loss=3.063, ppl=8.36, wps=5902.7, ups=0.09, wpb=64761, bsz=128, num_updates=1305, lr=9.99976e-05, gnorm=2.676, loss_scale=1, train_wall=11, gb_free=2.8, wall=14617
2021-06-18 22:42:34 | INFO | train_inner | epoch 001: 1320 / 3002 loss=2.955, ppl=7.75, wps=5806, ups=0.09, wpb=64863, bsz=128, num_updates=1306, lr=9.99976e-05, gnorm=2.552, loss_scale=1, train_wall=11, gb_free=2.8, wall=14628
2021-06-18 22:42:45 | INFO | train_inner | epoch 001: 1321 / 3002 loss=3.157, ppl=8.92, wps=5798.9, ups=0.09, wpb=64842, bsz=128, num_updates=1307, lr=9.99975e-05, gnorm=2.711, loss_scale=1, train_wall=11, gb_free=2.8, wall=14639
2021-06-18 22:42:56 | INFO | train_inner | epoch 001: 1322 / 3002 loss=2.905, ppl=7.49, wps=5862.2, ups=0.09, wpb=64918, bsz=128, num_updates=1308, lr=9.99975e-05, gnorm=2.54, loss_scale=1, train_wall=11, gb_free=2.8, wall=14650
2021-06-18 22:43:07 | INFO | train_inner | epoch 001: 1323 / 3002 loss=3.161, ppl=8.95, wps=5889.5, ups=0.09, wpb=64827, bsz=128, num_updates=1309, lr=9.99975e-05, gnorm=2.623, loss_scale=1, train_wall=11, gb_free=2.8, wall=14661
2021-06-18 22:43:18 | INFO | train_inner | epoch 001: 1324 / 3002 loss=2.933, ppl=7.64, wps=5905.1, ups=0.09, wpb=64808, bsz=128, num_updates=1310, lr=9.99975e-05, gnorm=2.626, loss_scale=1, train_wall=11, gb_free=2.8, wall=14672
2021-06-18 22:43:29 | INFO | train_inner | epoch 001: 1325 / 3002 loss=2.978, ppl=7.88, wps=5823.7, ups=0.09, wpb=64867, bsz=128, num_updates=1311, lr=9.99975e-05, gnorm=2.552, loss_scale=1, train_wall=11, gb_free=2.8, wall=14683
2021-06-18 22:43:40 | INFO | train_inner | epoch 001: 1326 / 3002 loss=3.083, ppl=8.48, wps=5928.6, ups=0.09, wpb=64882, bsz=128, num_updates=1312, lr=9.99975e-05, gnorm=2.643, loss_scale=1, train_wall=11, gb_free=2.8, wall=14694
2021-06-18 22:43:51 | INFO | train_inner | epoch 001: 1327 / 3002 loss=3.107, ppl=8.62, wps=5721.2, ups=0.09, wpb=64763, bsz=128, num_updates=1313, lr=9.99975e-05, gnorm=2.556, loss_scale=1, train_wall=11, gb_free=2.8, wall=14706
2021-06-18 22:44:03 | INFO | train_inner | epoch 001: 1328 / 3002 loss=2.875, ppl=7.34, wps=5811.7, ups=0.09, wpb=64777, bsz=128, num_updates=1314, lr=9.99975e-05, gnorm=2.497, loss_scale=1, train_wall=11, gb_free=2.8, wall=14717
2021-06-18 22:44:14 | INFO | train_inner | epoch 001: 1329 / 3002 loss=3.151, ppl=8.88, wps=5908.3, ups=0.09, wpb=64699, bsz=128, num_updates=1315, lr=9.99975e-05, gnorm=2.654, loss_scale=1, train_wall=10, gb_free=2.8, wall=14728
2021-06-18 22:44:25 | INFO | train_inner | epoch 001: 1330 / 3002 loss=3.057, ppl=8.32, wps=5783.4, ups=0.09, wpb=64848, bsz=128, num_updates=1316, lr=9.99975e-05, gnorm=2.54, loss_scale=1, train_wall=11, gb_free=2.8, wall=14739
2021-06-18 22:44:36 | INFO | train_inner | epoch 001: 1331 / 3002 loss=3.05, ppl=8.28, wps=5883.8, ups=0.09, wpb=64766, bsz=128, num_updates=1317, lr=9.99975e-05, gnorm=2.634, loss_scale=1, train_wall=11, gb_free=2.8, wall=14750
2021-06-18 22:44:47 | INFO | train_inner | epoch 001: 1332 / 3002 loss=3.053, ppl=8.3, wps=5758.9, ups=0.09, wpb=64931, bsz=128, num_updates=1318, lr=9.99975e-05, gnorm=2.463, loss_scale=1, train_wall=11, gb_free=2.8, wall=14761
2021-06-18 22:44:58 | INFO | train_inner | epoch 001: 1333 / 3002 loss=3.012, ppl=8.07, wps=5752.5, ups=0.09, wpb=64767, bsz=128, num_updates=1319, lr=9.99974e-05, gnorm=2.494, loss_scale=1, train_wall=11, gb_free=2.8, wall=14773
2021-06-18 22:45:10 | INFO | train_inner | epoch 001: 1334 / 3002 loss=3.173, ppl=9.02, wps=5737.4, ups=0.09, wpb=64873, bsz=128, num_updates=1320, lr=9.99974e-05, gnorm=2.646, loss_scale=1, train_wall=11, gb_free=2.8, wall=14784
2021-06-18 22:45:21 | INFO | train_inner | epoch 001: 1335 / 3002 loss=3.037, ppl=8.21, wps=5890.3, ups=0.09, wpb=64823, bsz=128, num_updates=1321, lr=9.99974e-05, gnorm=2.519, loss_scale=1, train_wall=11, gb_free=2.8, wall=14795
2021-06-18 22:45:31 | INFO | train_inner | epoch 001: 1336 / 3002 loss=3.035, ppl=8.2, wps=5970.1, ups=0.09, wpb=64816, bsz=128, num_updates=1322, lr=9.99974e-05, gnorm=2.585, loss_scale=1, train_wall=10, gb_free=2.8, wall=14806
2021-06-18 22:45:43 | INFO | train_inner | epoch 001: 1337 / 3002 loss=3.192, ppl=9.14, wps=5767.4, ups=0.09, wpb=64874, bsz=128, num_updates=1323, lr=9.99974e-05, gnorm=2.591, loss_scale=1, train_wall=11, gb_free=2.8, wall=14817
2021-06-18 22:45:54 | INFO | train_inner | epoch 001: 1338 / 3002 loss=3.05, ppl=8.28, wps=5780.7, ups=0.09, wpb=64832, bsz=128, num_updates=1324, lr=9.99974e-05, gnorm=2.401, loss_scale=1, train_wall=11, gb_free=2.8, wall=14828
2021-06-18 22:46:05 | INFO | train_inner | epoch 001: 1339 / 3002 loss=3.044, ppl=8.25, wps=5908.3, ups=0.09, wpb=64884, bsz=128, num_updates=1325, lr=9.99974e-05, gnorm=2.661, loss_scale=1, train_wall=11, gb_free=2.8, wall=14839
2021-06-18 22:46:16 | INFO | train_inner | epoch 001: 1340 / 3002 loss=3.033, ppl=8.18, wps=5959.1, ups=0.09, wpb=64850, bsz=128, num_updates=1326, lr=9.99974e-05, gnorm=2.533, loss_scale=1, train_wall=10, gb_free=2.8, wall=14850
2021-06-18 22:46:27 | INFO | train_inner | epoch 001: 1341 / 3002 loss=3.067, ppl=8.38, wps=5825.8, ups=0.09, wpb=64802, bsz=128, num_updates=1327, lr=9.99974e-05, gnorm=2.439, loss_scale=1, train_wall=11, gb_free=2.8, wall=14861
2021-06-18 22:46:38 | INFO | train_inner | epoch 001: 1342 / 3002 loss=3.08, ppl=8.45, wps=5838.4, ups=0.09, wpb=64876, bsz=128, num_updates=1328, lr=9.99974e-05, gnorm=2.531, loss_scale=1, train_wall=11, gb_free=2.8, wall=14872
2021-06-18 22:46:49 | INFO | train_inner | epoch 001: 1343 / 3002 loss=3.087, ppl=8.5, wps=5818, ups=0.09, wpb=64889, bsz=128, num_updates=1329, lr=9.99974e-05, gnorm=2.642, loss_scale=1, train_wall=11, gb_free=2.8, wall=14883
2021-06-18 22:47:00 | INFO | train_inner | epoch 001: 1344 / 3002 loss=2.983, ppl=7.91, wps=5939.9, ups=0.09, wpb=64851, bsz=128, num_updates=1330, lr=9.99974e-05, gnorm=2.561, loss_scale=1, train_wall=10, gb_free=2.8, wall=14894
2021-06-18 22:47:11 | INFO | train_inner | epoch 001: 1345 / 3002 loss=2.929, ppl=7.62, wps=5846.7, ups=0.09, wpb=64731, bsz=128, num_updates=1331, lr=9.99974e-05, gnorm=2.468, loss_scale=1, train_wall=11, gb_free=2.8, wall=14905
2021-06-18 22:47:22 | INFO | train_inner | epoch 001: 1346 / 3002 loss=2.761, ppl=6.78, wps=5784.7, ups=0.09, wpb=64797, bsz=128, num_updates=1332, lr=9.99973e-05, gnorm=2.461, loss_scale=1, train_wall=11, gb_free=2.8, wall=14917
2021-06-18 22:47:33 | INFO | train_inner | epoch 001: 1347 / 3002 loss=3.025, ppl=8.14, wps=5954.4, ups=0.09, wpb=64812, bsz=128, num_updates=1333, lr=9.99973e-05, gnorm=3.014, loss_scale=1, train_wall=10, gb_free=2.8, wall=14928
2021-06-18 22:47:44 | INFO | train_inner | epoch 001: 1348 / 3002 loss=3.181, ppl=9.07, wps=5930.3, ups=0.09, wpb=64796, bsz=128, num_updates=1334, lr=9.99973e-05, gnorm=2.634, loss_scale=1, train_wall=10, gb_free=2.8, wall=14938
2021-06-18 22:47:55 | INFO | train_inner | epoch 001: 1349 / 3002 loss=3.03, ppl=8.17, wps=5951, ups=0.09, wpb=64808, bsz=128, num_updates=1335, lr=9.99973e-05, gnorm=2.529, loss_scale=1, train_wall=10, gb_free=2.8, wall=14949
2021-06-18 22:48:06 | INFO | train_inner | epoch 001: 1350 / 3002 loss=2.956, ppl=7.76, wps=5840.3, ups=0.09, wpb=64826, bsz=128, num_updates=1336, lr=9.99973e-05, gnorm=2.427, loss_scale=1, train_wall=11, gb_free=2.8, wall=14960
2021-06-18 22:48:17 | INFO | train_inner | epoch 001: 1351 / 3002 loss=2.983, ppl=7.91, wps=5866, ups=0.09, wpb=64883, bsz=128, num_updates=1337, lr=9.99973e-05, gnorm=2.512, loss_scale=1, train_wall=11, gb_free=2.8, wall=14972
2021-06-18 22:48:28 | INFO | train_inner | epoch 001: 1352 / 3002 loss=3.003, ppl=8.02, wps=5985.6, ups=0.09, wpb=64868, bsz=128, num_updates=1338, lr=9.99973e-05, gnorm=2.537, loss_scale=1, train_wall=10, gb_free=2.8, wall=14982
2021-06-18 22:48:39 | INFO | train_inner | epoch 001: 1353 / 3002 loss=3.073, ppl=8.42, wps=5811, ups=0.09, wpb=64854, bsz=128, num_updates=1339, lr=9.99973e-05, gnorm=2.445, loss_scale=1, train_wall=11, gb_free=2.8, wall=14994
2021-06-18 22:48:50 | INFO | train_inner | epoch 001: 1354 / 3002 loss=3.021, ppl=8.12, wps=5777.2, ups=0.09, wpb=64830, bsz=128, num_updates=1340, lr=9.99973e-05, gnorm=2.625, loss_scale=1, train_wall=11, gb_free=2.8, wall=15005
2021-06-18 22:49:01 | INFO | train_inner | epoch 001: 1355 / 3002 loss=3.109, ppl=8.63, wps=5960, ups=0.09, wpb=64829, bsz=128, num_updates=1341, lr=9.99973e-05, gnorm=2.59, loss_scale=1, train_wall=10, gb_free=2.8, wall=15016
2021-06-18 22:49:12 | INFO | train_inner | epoch 001: 1356 / 3002 loss=3.063, ppl=8.36, wps=5803.4, ups=0.09, wpb=64799, bsz=128, num_updates=1342, lr=9.99973e-05, gnorm=2.799, loss_scale=1, train_wall=11, gb_free=2.8, wall=15027
2021-06-18 22:49:24 | INFO | train_inner | epoch 001: 1357 / 3002 loss=3.013, ppl=8.07, wps=5852.8, ups=0.09, wpb=64812, bsz=128, num_updates=1343, lr=9.99973e-05, gnorm=2.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=15038
2021-06-18 22:49:34 | INFO | train_inner | epoch 001: 1358 / 3002 loss=3.101, ppl=8.58, wps=5968.1, ups=0.09, wpb=64836, bsz=128, num_updates=1344, lr=9.99972e-05, gnorm=2.526, loss_scale=1, train_wall=10, gb_free=2.8, wall=15049
2021-06-18 22:49:46 | INFO | train_inner | epoch 001: 1359 / 3002 loss=3.005, ppl=8.03, wps=5834, ups=0.09, wpb=64834, bsz=128, num_updates=1345, lr=9.99972e-05, gnorm=2.548, loss_scale=1, train_wall=11, gb_free=2.8, wall=15060
2021-06-18 22:49:57 | INFO | train_inner | epoch 001: 1360 / 3002 loss=3.076, ppl=8.43, wps=5855.6, ups=0.09, wpb=64761, bsz=128, num_updates=1346, lr=9.99972e-05, gnorm=2.481, loss_scale=1, train_wall=11, gb_free=2.8, wall=15071
2021-06-18 22:50:08 | INFO | train_inner | epoch 001: 1361 / 3002 loss=2.946, ppl=7.71, wps=5907.5, ups=0.09, wpb=64780, bsz=128, num_updates=1347, lr=9.99972e-05, gnorm=2.441, loss_scale=1, train_wall=11, gb_free=2.8, wall=15082
2021-06-18 22:50:19 | INFO | train_inner | epoch 001: 1362 / 3002 loss=3.092, ppl=8.53, wps=5900.6, ups=0.09, wpb=64838, bsz=128, num_updates=1348, lr=9.99972e-05, gnorm=2.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=15093
2021-06-18 22:50:29 | INFO | train_inner | epoch 001: 1363 / 3002 loss=3.191, ppl=9.13, wps=6017.6, ups=0.09, wpb=64831, bsz=128, num_updates=1349, lr=9.99972e-05, gnorm=2.727, loss_scale=1, train_wall=10, gb_free=2.8, wall=15104
2021-06-18 22:50:40 | INFO | train_inner | epoch 001: 1364 / 3002 loss=2.985, ppl=7.92, wps=5970.6, ups=0.09, wpb=64874, bsz=128, num_updates=1350, lr=9.99972e-05, gnorm=5.209, loss_scale=1, train_wall=10, gb_free=2.8, wall=15115
2021-06-18 22:50:51 | INFO | train_inner | epoch 001: 1365 / 3002 loss=3.148, ppl=8.86, wps=5935.9, ups=0.09, wpb=64767, bsz=128, num_updates=1351, lr=9.99972e-05, gnorm=2.405, loss_scale=1, train_wall=10, gb_free=2.8, wall=15125
2021-06-18 22:51:02 | INFO | train_inner | epoch 001: 1366 / 3002 loss=3.121, ppl=8.7, wps=5850.8, ups=0.09, wpb=64866, bsz=128, num_updates=1352, lr=9.99972e-05, gnorm=3.326, loss_scale=1, train_wall=11, gb_free=2.8, wall=15137
2021-06-18 22:51:13 | INFO | train_inner | epoch 001: 1367 / 3002 loss=2.884, ppl=7.38, wps=5927.5, ups=0.09, wpb=64851, bsz=128, num_updates=1353, lr=9.99972e-05, gnorm=2.615, loss_scale=1, train_wall=10, gb_free=2.8, wall=15147
2021-06-18 22:51:24 | INFO | train_inner | epoch 001: 1368 / 3002 loss=3.144, ppl=8.84, wps=5755.8, ups=0.09, wpb=64841, bsz=128, num_updates=1354, lr=9.99972e-05, gnorm=2.693, loss_scale=1, train_wall=11, gb_free=2.8, wall=15159
2021-06-18 22:51:36 | INFO | train_inner | epoch 001: 1369 / 3002 loss=3.207, ppl=9.24, wps=5686.9, ups=0.09, wpb=64850, bsz=128, num_updates=1355, lr=9.99972e-05, gnorm=2.638, loss_scale=1, train_wall=11, gb_free=2.8, wall=15170
2021-06-18 22:51:47 | INFO | train_inner | epoch 001: 1370 / 3002 loss=3.02, ppl=8.11, wps=5699.7, ups=0.09, wpb=64819, bsz=128, num_updates=1356, lr=9.99972e-05, gnorm=2.657, loss_scale=1, train_wall=11, gb_free=2.8, wall=15181
2021-06-18 22:51:58 | INFO | train_inner | epoch 001: 1371 / 3002 loss=3.056, ppl=8.32, wps=5909.7, ups=0.09, wpb=64724, bsz=128, num_updates=1357, lr=9.99971e-05, gnorm=2.872, loss_scale=1, train_wall=11, gb_free=2.8, wall=15192
2021-06-18 22:52:09 | INFO | train_inner | epoch 001: 1372 / 3002 loss=3.152, ppl=8.89, wps=5874.6, ups=0.09, wpb=64780, bsz=128, num_updates=1358, lr=9.99971e-05, gnorm=2.745, loss_scale=1, train_wall=11, gb_free=2.8, wall=15203
2021-06-18 22:52:20 | INFO | train_inner | epoch 001: 1373 / 3002 loss=3.073, ppl=8.41, wps=5792.3, ups=0.09, wpb=64766, bsz=128, num_updates=1359, lr=9.99971e-05, gnorm=2.526, loss_scale=1, train_wall=11, gb_free=2.8, wall=15215
2021-06-18 22:52:31 | INFO | train_inner | epoch 001: 1374 / 3002 loss=3.085, ppl=8.49, wps=5923.9, ups=0.09, wpb=64921, bsz=128, num_updates=1360, lr=9.99971e-05, gnorm=25.934, loss_scale=1, train_wall=10, gb_free=2.8, wall=15226
2021-06-18 22:52:42 | INFO | train_inner | epoch 001: 1375 / 3002 loss=3.354, ppl=10.23, wps=5930.7, ups=0.09, wpb=64835, bsz=128, num_updates=1361, lr=9.99971e-05, gnorm=2.713, loss_scale=2, train_wall=10, gb_free=2.8, wall=15237
2021-06-18 22:52:53 | INFO | train_inner | epoch 001: 1376 / 3002 loss=3.016, ppl=8.09, wps=5777.5, ups=0.09, wpb=64800, bsz=128, num_updates=1362, lr=9.99971e-05, gnorm=3.702, loss_scale=2, train_wall=11, gb_free=2.8, wall=15248
2021-06-18 22:53:05 | INFO | train_inner | epoch 001: 1377 / 3002 loss=3.176, ppl=9.04, wps=5758.1, ups=0.09, wpb=64823, bsz=128, num_updates=1363, lr=9.99971e-05, gnorm=3.501, loss_scale=2, train_wall=11, gb_free=2.8, wall=15259
2021-06-18 22:53:16 | INFO | train_inner | epoch 001: 1378 / 3002 loss=3.275, ppl=9.68, wps=5841.7, ups=0.09, wpb=64819, bsz=128, num_updates=1364, lr=9.99971e-05, gnorm=3.395, loss_scale=2, train_wall=11, gb_free=2.8, wall=15270
2021-06-18 22:53:27 | INFO | train_inner | epoch 001: 1379 / 3002 loss=3.181, ppl=9.07, wps=5668.6, ups=0.09, wpb=64846, bsz=128, num_updates=1365, lr=9.99971e-05, gnorm=4.376, loss_scale=2, train_wall=11, gb_free=2.8, wall=15282
2021-06-18 22:53:38 | INFO | train_inner | epoch 001: 1380 / 3002 loss=3.215, ppl=9.29, wps=5791.4, ups=0.09, wpb=64699, bsz=128, num_updates=1366, lr=9.99971e-05, gnorm=3.361, loss_scale=2, train_wall=11, gb_free=2.8, wall=15293
2021-06-18 22:53:50 | INFO | train_inner | epoch 001: 1381 / 3002 loss=3.157, ppl=8.92, wps=5736.5, ups=0.09, wpb=64770, bsz=128, num_updates=1367, lr=9.99971e-05, gnorm=3.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=15304
2021-06-18 22:54:01 | INFO | train_inner | epoch 001: 1382 / 3002 loss=3.236, ppl=9.42, wps=5910.7, ups=0.09, wpb=64861, bsz=128, num_updates=1368, lr=9.99971e-05, gnorm=4.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=15315
2021-06-18 22:54:12 | INFO | train_inner | epoch 001: 1383 / 3002 loss=2.981, ppl=7.89, wps=5875.2, ups=0.09, wpb=64856, bsz=128, num_updates=1369, lr=9.9997e-05, gnorm=3.226, loss_scale=2, train_wall=11, gb_free=2.8, wall=15326
2021-06-18 22:54:23 | INFO | train_inner | epoch 001: 1384 / 3002 loss=3.063, ppl=8.36, wps=5851.9, ups=0.09, wpb=64818, bsz=128, num_updates=1370, lr=9.9997e-05, gnorm=3.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=15337
2021-06-18 22:54:33 | INFO | train_inner | epoch 001: 1385 / 3002 loss=3.061, ppl=8.34, wps=6138.2, ups=0.09, wpb=64850, bsz=128, num_updates=1371, lr=9.9997e-05, gnorm=2.945, loss_scale=2, train_wall=10, gb_free=2.8, wall=15348
2021-06-18 22:54:44 | INFO | train_inner | epoch 001: 1386 / 3002 loss=3.119, ppl=8.69, wps=5953, ups=0.09, wpb=64822, bsz=128, num_updates=1372, lr=9.9997e-05, gnorm=2.84, loss_scale=2, train_wall=10, gb_free=2.8, wall=15359
2021-06-18 22:54:55 | INFO | train_inner | epoch 001: 1387 / 3002 loss=3.391, ppl=10.49, wps=5877.1, ups=0.09, wpb=64803, bsz=128, num_updates=1373, lr=9.9997e-05, gnorm=2.919, loss_scale=2, train_wall=11, gb_free=2.8, wall=15370
2021-06-18 22:55:06 | INFO | train_inner | epoch 001: 1388 / 3002 loss=3.014, ppl=8.08, wps=5940.8, ups=0.09, wpb=64919, bsz=128, num_updates=1374, lr=9.9997e-05, gnorm=2.784, loss_scale=2, train_wall=10, gb_free=2.8, wall=15381
2021-06-18 22:55:17 | INFO | train_inner | epoch 001: 1389 / 3002 loss=3.064, ppl=8.36, wps=5871.2, ups=0.09, wpb=64826, bsz=128, num_updates=1375, lr=9.9997e-05, gnorm=3.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=15392
2021-06-18 22:55:28 | INFO | train_inner | epoch 001: 1390 / 3002 loss=2.934, ppl=7.64, wps=5796.3, ups=0.09, wpb=64833, bsz=128, num_updates=1376, lr=9.9997e-05, gnorm=2.567, loss_scale=2, train_wall=11, gb_free=2.8, wall=15403
2021-06-18 22:55:40 | INFO | train_inner | epoch 001: 1391 / 3002 loss=3.071, ppl=8.41, wps=5782.2, ups=0.09, wpb=64760, bsz=128, num_updates=1377, lr=9.9997e-05, gnorm=2.623, loss_scale=2, train_wall=11, gb_free=2.8, wall=15414
2021-06-18 22:55:50 | INFO | train_inner | epoch 001: 1392 / 3002 loss=3.157, ppl=8.92, wps=6074.1, ups=0.09, wpb=64857, bsz=128, num_updates=1378, lr=9.9997e-05, gnorm=2.68, loss_scale=2, train_wall=10, gb_free=2.8, wall=15425
2021-06-18 22:56:01 | INFO | train_inner | epoch 001: 1393 / 3002 loss=3.113, ppl=8.65, wps=5904, ups=0.09, wpb=64838, bsz=128, num_updates=1379, lr=9.9997e-05, gnorm=2.809, loss_scale=2, train_wall=11, gb_free=2.8, wall=15436
2021-06-18 22:56:12 | INFO | train_inner | epoch 001: 1394 / 3002 loss=3.091, ppl=8.52, wps=5872.2, ups=0.09, wpb=64835, bsz=128, num_updates=1380, lr=9.9997e-05, gnorm=2.594, loss_scale=2, train_wall=11, gb_free=2.8, wall=15447
2021-06-18 22:56:23 | INFO | train_inner | epoch 001: 1395 / 3002 loss=3.107, ppl=8.61, wps=5793.1, ups=0.09, wpb=64736, bsz=128, num_updates=1381, lr=9.9997e-05, gnorm=2.68, loss_scale=2, train_wall=11, gb_free=2.8, wall=15458
2021-06-18 22:56:35 | INFO | train_inner | epoch 001: 1396 / 3002 loss=2.918, ppl=7.56, wps=5842.6, ups=0.09, wpb=64814, bsz=128, num_updates=1382, lr=9.99969e-05, gnorm=2.628, loss_scale=2, train_wall=11, gb_free=2.8, wall=15469
2021-06-18 22:56:46 | INFO | train_inner | epoch 001: 1397 / 3002 loss=3.085, ppl=8.49, wps=5830.1, ups=0.09, wpb=64890, bsz=128, num_updates=1383, lr=9.99969e-05, gnorm=2.661, loss_scale=2, train_wall=11, gb_free=2.8, wall=15480
2021-06-18 22:56:57 | INFO | train_inner | epoch 001: 1398 / 3002 loss=2.912, ppl=7.53, wps=5874.6, ups=0.09, wpb=64859, bsz=128, num_updates=1384, lr=9.99969e-05, gnorm=2.849, loss_scale=2, train_wall=11, gb_free=2.8, wall=15491
2021-06-18 22:57:08 | INFO | train_inner | epoch 001: 1399 / 3002 loss=3.041, ppl=8.23, wps=5873.8, ups=0.09, wpb=64755, bsz=128, num_updates=1385, lr=9.99969e-05, gnorm=3.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=15502
2021-06-18 22:57:19 | INFO | train_inner | epoch 001: 1400 / 3002 loss=2.889, ppl=7.41, wps=5850.4, ups=0.09, wpb=64829, bsz=128, num_updates=1386, lr=9.99969e-05, gnorm=2.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=15513
2021-06-18 22:57:30 | INFO | train_inner | epoch 001: 1401 / 3002 loss=3.222, ppl=9.33, wps=5773.1, ups=0.09, wpb=64869, bsz=128, num_updates=1387, lr=9.99969e-05, gnorm=2.77, loss_scale=2, train_wall=11, gb_free=2.8, wall=15524
2021-06-18 22:57:41 | INFO | train_inner | epoch 001: 1402 / 3002 loss=3.091, ppl=8.52, wps=5866.9, ups=0.09, wpb=64787, bsz=128, num_updates=1388, lr=9.99969e-05, gnorm=2.618, loss_scale=2, train_wall=11, gb_free=2.8, wall=15535
2021-06-18 22:57:52 | INFO | train_inner | epoch 001: 1403 / 3002 loss=3.181, ppl=9.07, wps=5890.2, ups=0.09, wpb=64802, bsz=128, num_updates=1389, lr=9.99969e-05, gnorm=2.634, loss_scale=2, train_wall=11, gb_free=2.8, wall=15546
2021-06-18 22:58:03 | INFO | train_inner | epoch 001: 1404 / 3002 loss=2.863, ppl=7.27, wps=5891.5, ups=0.09, wpb=64842, bsz=128, num_updates=1390, lr=9.99969e-05, gnorm=2.691, loss_scale=2, train_wall=11, gb_free=2.8, wall=15557
2021-06-18 22:58:14 | INFO | train_inner | epoch 001: 1405 / 3002 loss=2.987, ppl=7.93, wps=6028.9, ups=0.09, wpb=64891, bsz=128, num_updates=1391, lr=9.99969e-05, gnorm=2.681, loss_scale=2, train_wall=10, gb_free=2.8, wall=15568
2021-06-18 22:58:25 | INFO | train_inner | epoch 001: 1406 / 3002 loss=2.988, ppl=7.93, wps=5795.5, ups=0.09, wpb=64789, bsz=128, num_updates=1392, lr=9.99969e-05, gnorm=2.749, loss_scale=2, train_wall=11, gb_free=2.8, wall=15579
2021-06-18 22:58:36 | INFO | train_inner | epoch 001: 1407 / 3002 loss=3.146, ppl=8.85, wps=5864.6, ups=0.09, wpb=64872, bsz=128, num_updates=1393, lr=9.99969e-05, gnorm=2.646, loss_scale=2, train_wall=11, gb_free=2.8, wall=15590
2021-06-18 22:58:47 | INFO | train_inner | epoch 001: 1408 / 3002 loss=3.185, ppl=9.09, wps=5889.7, ups=0.09, wpb=64813, bsz=128, num_updates=1394, lr=9.99968e-05, gnorm=2.605, loss_scale=2, train_wall=11, gb_free=2.8, wall=15601
2021-06-18 22:58:58 | INFO | train_inner | epoch 001: 1409 / 3002 loss=3.067, ppl=8.38, wps=5813.3, ups=0.09, wpb=64749, bsz=128, num_updates=1395, lr=9.99968e-05, gnorm=2.502, loss_scale=2, train_wall=11, gb_free=2.8, wall=15613
2021-06-18 22:59:10 | INFO | train_inner | epoch 001: 1410 / 3002 loss=3.087, ppl=8.5, wps=5764.2, ups=0.09, wpb=64760, bsz=128, num_updates=1396, lr=9.99968e-05, gnorm=2.484, loss_scale=2, train_wall=11, gb_free=2.8, wall=15624
2021-06-18 22:59:21 | INFO | train_inner | epoch 001: 1411 / 3002 loss=3.157, ppl=8.92, wps=5768.5, ups=0.09, wpb=64768, bsz=128, num_updates=1397, lr=9.99968e-05, gnorm=2.551, loss_scale=2, train_wall=11, gb_free=2.8, wall=15635
2021-06-18 22:59:32 | INFO | train_inner | epoch 001: 1412 / 3002 loss=3.096, ppl=8.55, wps=5850.1, ups=0.09, wpb=64751, bsz=128, num_updates=1398, lr=9.99968e-05, gnorm=2.45, loss_scale=2, train_wall=11, gb_free=2.8, wall=15646
2021-06-18 22:59:43 | INFO | train_inner | epoch 001: 1413 / 3002 loss=3.111, ppl=8.64, wps=5872.1, ups=0.09, wpb=64839, bsz=128, num_updates=1399, lr=9.99968e-05, gnorm=2.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=15657
2021-06-18 22:59:54 | INFO | train_inner | epoch 001: 1414 / 3002 loss=3.028, ppl=8.16, wps=5891.1, ups=0.09, wpb=64837, bsz=128, num_updates=1400, lr=9.99968e-05, gnorm=2.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=15668
2021-06-18 23:00:05 | INFO | train_inner | epoch 001: 1415 / 3002 loss=3.173, ppl=9.02, wps=5839.4, ups=0.09, wpb=64878, bsz=128, num_updates=1401, lr=9.99968e-05, gnorm=2.575, loss_scale=2, train_wall=11, gb_free=2.8, wall=15679
2021-06-18 23:00:16 | INFO | train_inner | epoch 001: 1416 / 3002 loss=3.049, ppl=8.28, wps=5907.5, ups=0.09, wpb=64752, bsz=128, num_updates=1402, lr=9.99968e-05, gnorm=2.669, loss_scale=2, train_wall=11, gb_free=2.8, wall=15690
2021-06-18 23:00:27 | INFO | train_inner | epoch 001: 1417 / 3002 loss=3.213, ppl=9.27, wps=5828.5, ups=0.09, wpb=64754, bsz=128, num_updates=1403, lr=9.99968e-05, gnorm=2.583, loss_scale=2, train_wall=11, gb_free=2.8, wall=15701
2021-06-18 23:00:38 | INFO | train_inner | epoch 001: 1418 / 3002 loss=3.112, ppl=8.65, wps=5765.3, ups=0.09, wpb=64843, bsz=128, num_updates=1404, lr=9.99968e-05, gnorm=2.51, loss_scale=2, train_wall=11, gb_free=2.8, wall=15713
2021-06-18 23:00:49 | INFO | train_inner | epoch 001: 1419 / 3002 loss=2.998, ppl=7.99, wps=5842.2, ups=0.09, wpb=64820, bsz=128, num_updates=1405, lr=9.99968e-05, gnorm=2.606, loss_scale=2, train_wall=11, gb_free=2.8, wall=15724
2021-06-18 23:01:00 | INFO | train_inner | epoch 001: 1420 / 3002 loss=3.094, ppl=8.54, wps=5947.3, ups=0.09, wpb=64857, bsz=128, num_updates=1406, lr=9.99968e-05, gnorm=4.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=15735
2021-06-18 23:01:11 | INFO | train_inner | epoch 001: 1421 / 3002 loss=3.089, ppl=8.51, wps=5817.9, ups=0.09, wpb=64748, bsz=128, num_updates=1407, lr=9.99967e-05, gnorm=2.612, loss_scale=2, train_wall=11, gb_free=2.8, wall=15746
2021-06-18 23:01:23 | INFO | train_inner | epoch 001: 1422 / 3002 loss=3.142, ppl=8.83, wps=5777.5, ups=0.09, wpb=64849, bsz=128, num_updates=1408, lr=9.99967e-05, gnorm=2.589, loss_scale=2, train_wall=11, gb_free=2.8, wall=15757
2021-06-18 23:01:34 | INFO | train_inner | epoch 001: 1423 / 3002 loss=3.053, ppl=8.3, wps=5870, ups=0.09, wpb=64793, bsz=128, num_updates=1409, lr=9.99967e-05, gnorm=2.602, loss_scale=2, train_wall=11, gb_free=2.8, wall=15768
2021-06-18 23:01:45 | INFO | train_inner | epoch 001: 1424 / 3002 loss=2.926, ppl=7.6, wps=5980.4, ups=0.09, wpb=64914, bsz=128, num_updates=1410, lr=9.99967e-05, gnorm=2.542, loss_scale=2, train_wall=10, gb_free=2.8, wall=15779
2021-06-18 23:01:56 | INFO | train_inner | epoch 001: 1425 / 3002 loss=2.96, ppl=7.78, wps=5770.4, ups=0.09, wpb=64842, bsz=128, num_updates=1411, lr=9.99967e-05, gnorm=2.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=15790
2021-06-18 23:02:07 | INFO | train_inner | epoch 001: 1426 / 3002 loss=3.074, ppl=8.42, wps=5924.9, ups=0.09, wpb=64880, bsz=128, num_updates=1412, lr=9.99967e-05, gnorm=2.911, loss_scale=2, train_wall=11, gb_free=2.8, wall=15801
2021-06-18 23:02:18 | INFO | train_inner | epoch 001: 1427 / 3002 loss=2.946, ppl=7.71, wps=5946.9, ups=0.09, wpb=64828, bsz=128, num_updates=1413, lr=9.99967e-05, gnorm=2.444, loss_scale=2, train_wall=10, gb_free=2.8, wall=15812
2021-06-18 23:02:29 | INFO | train_inner | epoch 001: 1428 / 3002 loss=3.087, ppl=8.5, wps=5783, ups=0.09, wpb=64795, bsz=128, num_updates=1414, lr=9.99967e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=15823
2021-06-18 23:02:40 | INFO | train_inner | epoch 001: 1429 / 3002 loss=3.006, ppl=8.03, wps=5859.5, ups=0.09, wpb=64835, bsz=128, num_updates=1415, lr=9.99967e-05, gnorm=2.701, loss_scale=2, train_wall=11, gb_free=2.8, wall=15834
2021-06-18 23:02:51 | INFO | train_inner | epoch 001: 1430 / 3002 loss=3.105, ppl=8.6, wps=5749.6, ups=0.09, wpb=64824, bsz=128, num_updates=1416, lr=9.99967e-05, gnorm=2.646, loss_scale=2, train_wall=11, gb_free=2.8, wall=15846
2021-06-18 23:03:02 | INFO | train_inner | epoch 001: 1431 / 3002 loss=2.987, ppl=7.93, wps=5877.6, ups=0.09, wpb=64855, bsz=128, num_updates=1417, lr=9.99967e-05, gnorm=2.435, loss_scale=2, train_wall=11, gb_free=2.8, wall=15857
2021-06-18 23:03:13 | INFO | train_inner | epoch 001: 1432 / 3002 loss=3.004, ppl=8.02, wps=5901.5, ups=0.09, wpb=64888, bsz=128, num_updates=1418, lr=9.99967e-05, gnorm=2.519, loss_scale=2, train_wall=11, gb_free=2.8, wall=15868
2021-06-18 23:03:24 | INFO | train_inner | epoch 001: 1433 / 3002 loss=3.137, ppl=8.79, wps=5879.3, ups=0.09, wpb=64836, bsz=128, num_updates=1419, lr=9.99966e-05, gnorm=3.485, loss_scale=2, train_wall=11, gb_free=2.8, wall=15879
2021-06-18 23:03:35 | INFO | train_inner | epoch 001: 1434 / 3002 loss=2.874, ppl=7.33, wps=5889.6, ups=0.09, wpb=64933, bsz=128, num_updates=1420, lr=9.99966e-05, gnorm=2.549, loss_scale=2, train_wall=11, gb_free=2.8, wall=15890
2021-06-18 23:03:46 | INFO | train_inner | epoch 001: 1435 / 3002 loss=3.041, ppl=8.23, wps=5868.6, ups=0.09, wpb=64827, bsz=128, num_updates=1421, lr=9.99966e-05, gnorm=2.545, loss_scale=2, train_wall=11, gb_free=2.8, wall=15901
2021-06-18 23:03:57 | INFO | train_inner | epoch 001: 1436 / 3002 loss=3.286, ppl=9.75, wps=5981.4, ups=0.09, wpb=64820, bsz=128, num_updates=1422, lr=9.99966e-05, gnorm=2.688, loss_scale=2, train_wall=10, gb_free=2.8, wall=15911
2021-06-18 23:04:08 | INFO | train_inner | epoch 001: 1437 / 3002 loss=3.197, ppl=9.17, wps=5845.8, ups=0.09, wpb=64794, bsz=128, num_updates=1423, lr=9.99966e-05, gnorm=2.592, loss_scale=2, train_wall=11, gb_free=2.8, wall=15923
2021-06-18 23:04:19 | INFO | train_inner | epoch 001: 1438 / 3002 loss=3.128, ppl=8.74, wps=5912.8, ups=0.09, wpb=64853, bsz=128, num_updates=1424, lr=9.99966e-05, gnorm=8.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=15934
2021-06-18 23:04:30 | INFO | train_inner | epoch 001: 1439 / 3002 loss=3.001, ppl=8, wps=5840.2, ups=0.09, wpb=64792, bsz=128, num_updates=1425, lr=9.99966e-05, gnorm=2.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=15945
2021-06-18 23:04:41 | INFO | train_inner | epoch 001: 1440 / 3002 loss=3.018, ppl=8.1, wps=5824.3, ups=0.09, wpb=64849, bsz=128, num_updates=1426, lr=9.99966e-05, gnorm=2.466, loss_scale=2, train_wall=11, gb_free=2.8, wall=15956
2021-06-18 23:04:53 | INFO | train_inner | epoch 001: 1441 / 3002 loss=3.008, ppl=8.04, wps=5794.1, ups=0.09, wpb=64846, bsz=128, num_updates=1427, lr=9.99966e-05, gnorm=2.682, loss_scale=2, train_wall=11, gb_free=2.8, wall=15967
2021-06-18 23:05:04 | INFO | train_inner | epoch 001: 1442 / 3002 loss=3.074, ppl=8.42, wps=5823.3, ups=0.09, wpb=64807, bsz=128, num_updates=1428, lr=9.99966e-05, gnorm=2.67, loss_scale=2, train_wall=11, gb_free=2.8, wall=15978
2021-06-18 23:05:15 | INFO | train_inner | epoch 001: 1443 / 3002 loss=3.139, ppl=8.81, wps=5830.6, ups=0.09, wpb=64818, bsz=128, num_updates=1429, lr=9.99966e-05, gnorm=2.625, loss_scale=2, train_wall=11, gb_free=2.8, wall=15989
2021-06-18 23:05:26 | INFO | train_inner | epoch 001: 1444 / 3002 loss=3.05, ppl=8.28, wps=5794.2, ups=0.09, wpb=64866, bsz=128, num_updates=1430, lr=9.99966e-05, gnorm=2.581, loss_scale=2, train_wall=11, gb_free=2.8, wall=16000
2021-06-18 23:05:37 | INFO | train_inner | epoch 001: 1445 / 3002 loss=2.873, ppl=7.33, wps=5882.9, ups=0.09, wpb=64803, bsz=128, num_updates=1431, lr=9.99966e-05, gnorm=2.71, loss_scale=2, train_wall=11, gb_free=2.8, wall=16011
2021-06-18 23:05:48 | INFO | train_inner | epoch 001: 1446 / 3002 loss=2.986, ppl=7.92, wps=5782.1, ups=0.09, wpb=64821, bsz=128, num_updates=1432, lr=9.99965e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=16023
2021-06-18 23:05:59 | INFO | train_inner | epoch 001: 1447 / 3002 loss=3.183, ppl=9.08, wps=5917.6, ups=0.09, wpb=64809, bsz=128, num_updates=1433, lr=9.99965e-05, gnorm=2.723, loss_scale=2, train_wall=11, gb_free=2.8, wall=16034
2021-06-18 23:06:10 | INFO | train_inner | epoch 001: 1448 / 3002 loss=3.009, ppl=8.05, wps=5824.7, ups=0.09, wpb=64814, bsz=128, num_updates=1434, lr=9.99965e-05, gnorm=2.659, loss_scale=2, train_wall=11, gb_free=2.8, wall=16045
2021-06-18 23:06:22 | INFO | train_inner | epoch 001: 1449 / 3002 loss=3.12, ppl=8.7, wps=5795, ups=0.09, wpb=64855, bsz=128, num_updates=1435, lr=9.99965e-05, gnorm=2.506, loss_scale=2, train_wall=11, gb_free=2.8, wall=16056
2021-06-18 23:06:33 | INFO | train_inner | epoch 001: 1450 / 3002 loss=2.89, ppl=7.41, wps=5820.1, ups=0.09, wpb=64833, bsz=128, num_updates=1436, lr=9.99965e-05, gnorm=2.539, loss_scale=2, train_wall=11, gb_free=2.8, wall=16067
2021-06-18 23:06:44 | INFO | train_inner | epoch 001: 1451 / 3002 loss=2.924, ppl=7.59, wps=5824.4, ups=0.09, wpb=64882, bsz=128, num_updates=1437, lr=9.99965e-05, gnorm=2.541, loss_scale=2, train_wall=11, gb_free=2.8, wall=16078
2021-06-18 23:06:55 | INFO | train_inner | epoch 001: 1452 / 3002 loss=2.983, ppl=7.91, wps=5829.4, ups=0.09, wpb=64852, bsz=128, num_updates=1438, lr=9.99965e-05, gnorm=2.538, loss_scale=2, train_wall=11, gb_free=2.8, wall=16089
2021-06-18 23:07:06 | INFO | train_inner | epoch 001: 1453 / 3002 loss=2.923, ppl=7.58, wps=5941.6, ups=0.09, wpb=64774, bsz=128, num_updates=1439, lr=9.99965e-05, gnorm=2.494, loss_scale=2, train_wall=10, gb_free=2.8, wall=16100
2021-06-18 23:07:17 | INFO | train_inner | epoch 001: 1454 / 3002 loss=2.967, ppl=7.82, wps=5869.7, ups=0.09, wpb=64798, bsz=128, num_updates=1440, lr=9.99965e-05, gnorm=2.576, loss_scale=2, train_wall=11, gb_free=2.8, wall=16111
2021-06-18 23:07:28 | INFO | train_inner | epoch 001: 1455 / 3002 loss=3.021, ppl=8.12, wps=5782.2, ups=0.09, wpb=64811, bsz=128, num_updates=1441, lr=9.99965e-05, gnorm=2.494, loss_scale=2, train_wall=11, gb_free=2.8, wall=16122
2021-06-18 23:07:39 | INFO | train_inner | epoch 001: 1456 / 3002 loss=2.868, ppl=7.3, wps=5833.3, ups=0.09, wpb=64817, bsz=128, num_updates=1442, lr=9.99965e-05, gnorm=2.584, loss_scale=2, train_wall=11, gb_free=2.8, wall=16134
2021-06-18 23:07:50 | INFO | train_inner | epoch 001: 1457 / 3002 loss=3.141, ppl=8.82, wps=5959.6, ups=0.09, wpb=64864, bsz=128, num_updates=1443, lr=9.99965e-05, gnorm=2.661, loss_scale=2, train_wall=10, gb_free=2.8, wall=16144
2021-06-18 23:08:01 | INFO | train_inner | epoch 001: 1458 / 3002 loss=3.174, ppl=9.03, wps=5830.5, ups=0.09, wpb=64818, bsz=128, num_updates=1444, lr=9.99964e-05, gnorm=2.553, loss_scale=2, train_wall=11, gb_free=2.8, wall=16156
2021-06-18 23:08:12 | INFO | train_inner | epoch 001: 1459 / 3002 loss=2.935, ppl=7.65, wps=5846.1, ups=0.09, wpb=64830, bsz=128, num_updates=1445, lr=9.99964e-05, gnorm=2.564, loss_scale=2, train_wall=11, gb_free=2.8, wall=16167
2021-06-18 23:08:23 | INFO | train_inner | epoch 001: 1460 / 3002 loss=3.171, ppl=9.01, wps=5825.5, ups=0.09, wpb=64813, bsz=128, num_updates=1446, lr=9.99964e-05, gnorm=3.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=16178
2021-06-18 23:08:35 | INFO | train_inner | epoch 001: 1461 / 3002 loss=2.877, ppl=7.35, wps=5729.8, ups=0.09, wpb=64738, bsz=128, num_updates=1447, lr=9.99964e-05, gnorm=3.962, loss_scale=2, train_wall=11, gb_free=2.8, wall=16189
2021-06-18 23:08:46 | INFO | train_inner | epoch 001: 1462 / 3002 loss=3.123, ppl=8.71, wps=5836.5, ups=0.09, wpb=64837, bsz=128, num_updates=1448, lr=9.99964e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=16200
2021-06-18 23:08:57 | INFO | train_inner | epoch 001: 1463 / 3002 loss=2.988, ppl=7.93, wps=5837, ups=0.09, wpb=64862, bsz=128, num_updates=1449, lr=9.99964e-05, gnorm=2.686, loss_scale=2, train_wall=11, gb_free=2.8, wall=16211
2021-06-18 23:09:08 | INFO | train_inner | epoch 001: 1464 / 3002 loss=3.084, ppl=8.48, wps=5836.5, ups=0.09, wpb=64836, bsz=128, num_updates=1450, lr=9.99964e-05, gnorm=2.542, loss_scale=2, train_wall=11, gb_free=2.8, wall=16222
2021-06-18 23:09:19 | INFO | train_inner | epoch 001: 1465 / 3002 loss=3.204, ppl=9.22, wps=5991.6, ups=0.09, wpb=64797, bsz=128, num_updates=1451, lr=9.99964e-05, gnorm=2.607, loss_scale=2, train_wall=10, gb_free=2.8, wall=16233
2021-06-18 23:09:30 | INFO | train_inner | epoch 001: 1466 / 3002 loss=3.127, ppl=8.73, wps=5826.1, ups=0.09, wpb=64852, bsz=128, num_updates=1452, lr=9.99964e-05, gnorm=2.651, loss_scale=2, train_wall=11, gb_free=2.8, wall=16244
2021-06-18 23:09:41 | INFO | train_inner | epoch 001: 1467 / 3002 loss=3.179, ppl=9.06, wps=5929.4, ups=0.09, wpb=64798, bsz=128, num_updates=1453, lr=9.99964e-05, gnorm=2.461, loss_scale=2, train_wall=10, gb_free=2.8, wall=16255
2021-06-18 23:09:52 | INFO | train_inner | epoch 001: 1468 / 3002 loss=2.942, ppl=7.69, wps=5883.1, ups=0.09, wpb=64872, bsz=128, num_updates=1454, lr=9.99964e-05, gnorm=2.572, loss_scale=2, train_wall=11, gb_free=2.8, wall=16266
2021-06-18 23:10:03 | INFO | train_inner | epoch 001: 1469 / 3002 loss=3.093, ppl=8.53, wps=5796.3, ups=0.09, wpb=64760, bsz=128, num_updates=1455, lr=9.99964e-05, gnorm=2.571, loss_scale=2, train_wall=11, gb_free=2.8, wall=16277
2021-06-18 23:10:14 | INFO | train_inner | epoch 001: 1470 / 3002 loss=2.945, ppl=7.7, wps=5801.7, ups=0.09, wpb=64805, bsz=128, num_updates=1456, lr=9.99964e-05, gnorm=2.447, loss_scale=2, train_wall=11, gb_free=2.8, wall=16289
2021-06-18 23:10:25 | INFO | train_inner | epoch 001: 1471 / 3002 loss=2.934, ppl=7.64, wps=5858.4, ups=0.09, wpb=64838, bsz=128, num_updates=1457, lr=9.99963e-05, gnorm=2.665, loss_scale=2, train_wall=11, gb_free=2.8, wall=16300
2021-06-18 23:10:37 | INFO | train_inner | epoch 001: 1472 / 3002 loss=3.174, ppl=9.03, wps=5815.5, ups=0.09, wpb=64823, bsz=128, num_updates=1458, lr=9.99963e-05, gnorm=2.742, loss_scale=2, train_wall=11, gb_free=2.8, wall=16311
2021-06-18 23:10:48 | INFO | train_inner | epoch 001: 1473 / 3002 loss=3.106, ppl=8.61, wps=5880.1, ups=0.09, wpb=64787, bsz=128, num_updates=1459, lr=9.99963e-05, gnorm=2.478, loss_scale=2, train_wall=11, gb_free=2.8, wall=16322
2021-06-18 23:10:59 | INFO | train_inner | epoch 001: 1474 / 3002 loss=3.045, ppl=8.25, wps=5892.3, ups=0.09, wpb=64823, bsz=128, num_updates=1460, lr=9.99963e-05, gnorm=2.398, loss_scale=2, train_wall=11, gb_free=2.8, wall=16333
2021-06-18 23:11:10 | INFO | train_inner | epoch 001: 1475 / 3002 loss=3.01, ppl=8.06, wps=5724, ups=0.09, wpb=64870, bsz=128, num_updates=1461, lr=9.99963e-05, gnorm=2.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=16344
2021-06-18 23:11:21 | INFO | train_inner | epoch 001: 1476 / 3002 loss=3.179, ppl=9.06, wps=5803.5, ups=0.09, wpb=64813, bsz=128, num_updates=1462, lr=9.99963e-05, gnorm=2.599, loss_scale=2, train_wall=11, gb_free=2.8, wall=16355
2021-06-18 23:11:32 | INFO | train_inner | epoch 001: 1477 / 3002 loss=3.053, ppl=8.3, wps=5824.8, ups=0.09, wpb=64866, bsz=128, num_updates=1463, lr=9.99963e-05, gnorm=2.508, loss_scale=2, train_wall=11, gb_free=2.8, wall=16367
2021-06-18 23:11:44 | INFO | train_inner | epoch 001: 1478 / 3002 loss=2.855, ppl=7.24, wps=5735.7, ups=0.09, wpb=64857, bsz=128, num_updates=1464, lr=9.99963e-05, gnorm=2.601, loss_scale=2, train_wall=11, gb_free=2.8, wall=16378
2021-06-18 23:11:55 | INFO | train_inner | epoch 001: 1479 / 3002 loss=2.928, ppl=7.61, wps=5825.4, ups=0.09, wpb=64846, bsz=128, num_updates=1465, lr=9.99963e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=16389
2021-06-18 23:12:06 | INFO | train_inner | epoch 001: 1480 / 3002 loss=3.083, ppl=8.48, wps=5907.2, ups=0.09, wpb=64760, bsz=128, num_updates=1466, lr=9.99963e-05, gnorm=3.352, loss_scale=2, train_wall=11, gb_free=2.8, wall=16400
2021-06-18 23:12:17 | INFO | train_inner | epoch 001: 1481 / 3002 loss=3.064, ppl=8.36, wps=5928.5, ups=0.09, wpb=64802, bsz=128, num_updates=1467, lr=9.99963e-05, gnorm=2.649, loss_scale=2, train_wall=10, gb_free=2.8, wall=16411
2021-06-18 23:12:27 | INFO | train_inner | epoch 001: 1482 / 3002 loss=3.067, ppl=8.38, wps=5923.4, ups=0.09, wpb=64819, bsz=128, num_updates=1468, lr=9.99963e-05, gnorm=2.699, loss_scale=2, train_wall=11, gb_free=2.8, wall=16422
2021-06-18 23:12:39 | INFO | train_inner | epoch 001: 1483 / 3002 loss=3.048, ppl=8.27, wps=5806.6, ups=0.09, wpb=64845, bsz=128, num_updates=1469, lr=9.99962e-05, gnorm=2.605, loss_scale=2, train_wall=11, gb_free=2.8, wall=16433
2021-06-18 23:12:50 | INFO | train_inner | epoch 001: 1484 / 3002 loss=3.018, ppl=8.1, wps=5810.1, ups=0.09, wpb=64819, bsz=128, num_updates=1470, lr=9.99962e-05, gnorm=2.57, loss_scale=2, train_wall=11, gb_free=2.8, wall=16444
2021-06-18 23:13:01 | INFO | train_inner | epoch 001: 1485 / 3002 loss=2.923, ppl=7.59, wps=5745.6, ups=0.09, wpb=64827, bsz=128, num_updates=1471, lr=9.99962e-05, gnorm=2.501, loss_scale=2, train_wall=11, gb_free=2.8, wall=16455
2021-06-18 23:13:12 | INFO | train_inner | epoch 001: 1486 / 3002 loss=3.111, ppl=8.64, wps=5903.8, ups=0.09, wpb=64814, bsz=128, num_updates=1472, lr=9.99962e-05, gnorm=2.604, loss_scale=2, train_wall=11, gb_free=2.8, wall=16466
2021-06-18 23:13:23 | INFO | train_inner | epoch 001: 1487 / 3002 loss=3.103, ppl=8.59, wps=5857.7, ups=0.09, wpb=64764, bsz=128, num_updates=1473, lr=9.99962e-05, gnorm=2.63, loss_scale=2, train_wall=11, gb_free=2.8, wall=16477
2021-06-18 23:13:34 | INFO | train_inner | epoch 001: 1488 / 3002 loss=3.011, ppl=8.06, wps=5886, ups=0.09, wpb=64835, bsz=128, num_updates=1474, lr=9.99962e-05, gnorm=3.558, loss_scale=2, train_wall=11, gb_free=2.8, wall=16488
2021-06-18 23:13:45 | INFO | train_inner | epoch 001: 1489 / 3002 loss=3.02, ppl=8.11, wps=5934.6, ups=0.09, wpb=64828, bsz=128, num_updates=1475, lr=9.99962e-05, gnorm=2.457, loss_scale=2, train_wall=10, gb_free=2.8, wall=16499
2021-06-18 23:13:56 | INFO | train_inner | epoch 001: 1490 / 3002 loss=3.032, ppl=8.18, wps=5900.6, ups=0.09, wpb=64811, bsz=128, num_updates=1476, lr=9.99962e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=16510
2021-06-18 23:14:07 | INFO | train_inner | epoch 001: 1491 / 3002 loss=2.949, ppl=7.72, wps=5867.8, ups=0.09, wpb=64847, bsz=128, num_updates=1477, lr=9.99962e-05, gnorm=2.52, loss_scale=2, train_wall=11, gb_free=2.8, wall=16521
2021-06-18 23:14:18 | INFO | train_inner | epoch 001: 1492 / 3002 loss=3.144, ppl=8.84, wps=5852, ups=0.09, wpb=64734, bsz=128, num_updates=1478, lr=9.99962e-05, gnorm=2.693, loss_scale=2, train_wall=11, gb_free=2.8, wall=16532
2021-06-18 23:14:29 | INFO | train_inner | epoch 001: 1493 / 3002 loss=2.941, ppl=7.68, wps=5860.8, ups=0.09, wpb=64817, bsz=128, num_updates=1479, lr=9.99962e-05, gnorm=2.683, loss_scale=2, train_wall=11, gb_free=2.8, wall=16544
2021-06-18 23:14:40 | INFO | train_inner | epoch 001: 1494 / 3002 loss=2.932, ppl=7.63, wps=5935.9, ups=0.09, wpb=64811, bsz=128, num_updates=1480, lr=9.99962e-05, gnorm=3.019, loss_scale=2, train_wall=10, gb_free=2.8, wall=16554
2021-06-18 23:14:51 | INFO | train_inner | epoch 001: 1495 / 3002 loss=2.861, ppl=7.27, wps=5841.3, ups=0.09, wpb=64860, bsz=128, num_updates=1481, lr=9.99962e-05, gnorm=2.503, loss_scale=2, train_wall=11, gb_free=2.8, wall=16566
2021-06-18 23:15:02 | INFO | train_inner | epoch 001: 1496 / 3002 loss=3.083, ppl=8.47, wps=5823.9, ups=0.09, wpb=64814, bsz=128, num_updates=1482, lr=9.99961e-05, gnorm=2.587, loss_scale=2, train_wall=11, gb_free=2.8, wall=16577
2021-06-18 23:15:13 | INFO | train_inner | epoch 001: 1497 / 3002 loss=3.024, ppl=8.13, wps=5864.6, ups=0.09, wpb=64868, bsz=128, num_updates=1483, lr=9.99961e-05, gnorm=2.577, loss_scale=2, train_wall=11, gb_free=2.8, wall=16588
2021-06-18 23:15:24 | INFO | train_inner | epoch 001: 1498 / 3002 loss=3.003, ppl=8.02, wps=5919.7, ups=0.09, wpb=64753, bsz=128, num_updates=1484, lr=9.99961e-05, gnorm=2.492, loss_scale=2, train_wall=10, gb_free=2.8, wall=16599
2021-06-18 23:15:35 | INFO | train_inner | epoch 001: 1499 / 3002 loss=3.03, ppl=8.17, wps=5852.3, ups=0.09, wpb=64755, bsz=128, num_updates=1485, lr=9.99961e-05, gnorm=2.461, loss_scale=2, train_wall=11, gb_free=2.8, wall=16610
2021-06-18 23:15:46 | INFO | train_inner | epoch 001: 1500 / 3002 loss=2.923, ppl=7.58, wps=5930.3, ups=0.09, wpb=64795, bsz=128, num_updates=1486, lr=9.99961e-05, gnorm=2.423, loss_scale=2, train_wall=10, gb_free=2.8, wall=16621
2021-06-18 23:15:57 | INFO | train_inner | epoch 001: 1501 / 3002 loss=2.876, ppl=7.34, wps=5830.3, ups=0.09, wpb=64886, bsz=128, num_updates=1487, lr=9.99961e-05, gnorm=2.511, loss_scale=2, train_wall=11, gb_free=2.8, wall=16632
2021-06-18 23:16:09 | INFO | train_inner | epoch 001: 1502 / 3002 loss=2.904, ppl=7.48, wps=5844.8, ups=0.09, wpb=64875, bsz=128, num_updates=1488, lr=9.99961e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=16643
2021-06-18 23:16:20 | INFO | train_inner | epoch 001: 1503 / 3002 loss=3.058, ppl=8.33, wps=5856.8, ups=0.09, wpb=64875, bsz=128, num_updates=1489, lr=9.99961e-05, gnorm=2.506, loss_scale=4, train_wall=11, gb_free=2.8, wall=16654
2021-06-18 23:16:31 | INFO | train_inner | epoch 001: 1504 / 3002 loss=2.957, ppl=7.76, wps=5799.3, ups=0.09, wpb=64790, bsz=128, num_updates=1490, lr=9.99961e-05, gnorm=2.438, loss_scale=4, train_wall=11, gb_free=2.8, wall=16665
2021-06-18 23:16:42 | INFO | train_inner | epoch 001: 1505 / 3002 loss=3.08, ppl=8.46, wps=5863.2, ups=0.09, wpb=64825, bsz=128, num_updates=1491, lr=9.99961e-05, gnorm=2.49, loss_scale=4, train_wall=11, gb_free=2.8, wall=16676
2021-06-18 23:16:53 | INFO | train_inner | epoch 001: 1506 / 3002 loss=2.939, ppl=7.67, wps=5745.4, ups=0.09, wpb=64922, bsz=128, num_updates=1492, lr=9.99961e-05, gnorm=2.553, loss_scale=4, train_wall=11, gb_free=2.8, wall=16688
2021-06-18 23:17:04 | INFO | train_inner | epoch 001: 1507 / 3002 loss=2.919, ppl=7.56, wps=5766.3, ups=0.09, wpb=64834, bsz=128, num_updates=1493, lr=9.99961e-05, gnorm=2.413, loss_scale=4, train_wall=11, gb_free=2.8, wall=16699
2021-06-18 23:17:16 | INFO | train_inner | epoch 001: 1508 / 3002 loss=2.919, ppl=7.57, wps=5816.6, ups=0.09, wpb=64849, bsz=128, num_updates=1494, lr=9.9996e-05, gnorm=3.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=16710
2021-06-18 23:17:27 | INFO | train_inner | epoch 001: 1509 / 3002 loss=2.929, ppl=7.62, wps=5900.2, ups=0.09, wpb=64858, bsz=128, num_updates=1495, lr=9.9996e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=16721
2021-06-18 23:17:38 | INFO | train_inner | epoch 001: 1510 / 3002 loss=2.913, ppl=7.53, wps=5859.3, ups=0.09, wpb=64876, bsz=128, num_updates=1496, lr=9.9996e-05, gnorm=2.516, loss_scale=4, train_wall=11, gb_free=2.8, wall=16732
2021-06-18 23:17:49 | INFO | train_inner | epoch 001: 1511 / 3002 loss=3.011, ppl=8.06, wps=5748.7, ups=0.09, wpb=64789, bsz=128, num_updates=1497, lr=9.9996e-05, gnorm=2.541, loss_scale=4, train_wall=11, gb_free=2.8, wall=16743
2021-06-18 23:18:00 | INFO | train_inner | epoch 001: 1512 / 3002 loss=2.93, ppl=7.62, wps=5832.5, ups=0.09, wpb=64808, bsz=128, num_updates=1498, lr=9.9996e-05, gnorm=2.482, loss_scale=4, train_wall=11, gb_free=2.8, wall=16754
2021-06-18 23:18:11 | INFO | train_inner | epoch 001: 1513 / 3002 loss=3.058, ppl=8.33, wps=5799, ups=0.09, wpb=64886, bsz=128, num_updates=1499, lr=9.9996e-05, gnorm=2.574, loss_scale=4, train_wall=11, gb_free=2.8, wall=16766
2021-06-18 23:18:22 | INFO | train_inner | epoch 001: 1514 / 3002 loss=3.01, ppl=8.05, wps=5791.7, ups=0.09, wpb=64764, bsz=128, num_updates=1500, lr=9.9996e-05, gnorm=2.42, loss_scale=4, train_wall=11, gb_free=2.8, wall=16777
2021-06-18 23:18:33 | INFO | train_inner | epoch 001: 1515 / 3002 loss=3.023, ppl=8.13, wps=5891.3, ups=0.09, wpb=64859, bsz=128, num_updates=1501, lr=9.9996e-05, gnorm=2.789, loss_scale=4, train_wall=11, gb_free=2.8, wall=16788
2021-06-18 23:18:45 | INFO | train_inner | epoch 001: 1516 / 3002 loss=3.081, ppl=8.46, wps=5824.9, ups=0.09, wpb=64751, bsz=128, num_updates=1502, lr=9.9996e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=16799
2021-06-18 23:18:56 | INFO | train_inner | epoch 001: 1517 / 3002 loss=3.128, ppl=8.74, wps=5841.3, ups=0.09, wpb=64848, bsz=128, num_updates=1503, lr=9.9996e-05, gnorm=2.479, loss_scale=4, train_wall=11, gb_free=2.8, wall=16810
2021-06-18 23:19:07 | INFO | train_inner | epoch 001: 1518 / 3002 loss=2.828, ppl=7.1, wps=5803.6, ups=0.09, wpb=64813, bsz=128, num_updates=1504, lr=9.9996e-05, gnorm=2.453, loss_scale=4, train_wall=11, gb_free=2.8, wall=16821
2021-06-18 23:19:18 | INFO | train_inner | epoch 001: 1519 / 3002 loss=2.852, ppl=7.22, wps=5826.7, ups=0.09, wpb=64835, bsz=128, num_updates=1505, lr=9.9996e-05, gnorm=3.142, loss_scale=4, train_wall=11, gb_free=2.8, wall=16832
2021-06-18 23:19:29 | INFO | train_inner | epoch 001: 1520 / 3002 loss=3.114, ppl=8.66, wps=5826.2, ups=0.09, wpb=64787, bsz=128, num_updates=1506, lr=9.9996e-05, gnorm=2.616, loss_scale=4, train_wall=11, gb_free=2.8, wall=16843
2021-06-18 23:19:40 | INFO | train_inner | epoch 001: 1521 / 3002 loss=3.062, ppl=8.35, wps=5840, ups=0.09, wpb=64793, bsz=128, num_updates=1507, lr=9.99959e-05, gnorm=2.571, loss_scale=4, train_wall=11, gb_free=2.8, wall=16854
2021-06-18 23:19:51 | INFO | train_inner | epoch 001: 1522 / 3002 loss=3.118, ppl=8.68, wps=5913.8, ups=0.09, wpb=64935, bsz=128, num_updates=1508, lr=9.99959e-05, gnorm=2.486, loss_scale=4, train_wall=11, gb_free=2.8, wall=16865
2021-06-18 23:20:02 | INFO | train_inner | epoch 001: 1523 / 3002 loss=2.976, ppl=7.87, wps=5957.4, ups=0.09, wpb=64849, bsz=128, num_updates=1509, lr=9.99959e-05, gnorm=2.565, loss_scale=4, train_wall=10, gb_free=2.8, wall=16876
2021-06-18 23:20:13 | INFO | train_inner | epoch 001: 1524 / 3002 loss=3.044, ppl=8.25, wps=5835.7, ups=0.09, wpb=64887, bsz=128, num_updates=1510, lr=9.99959e-05, gnorm=3.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=16887
2021-06-18 23:20:24 | INFO | train_inner | epoch 001: 1525 / 3002 loss=3.209, ppl=9.25, wps=5946.4, ups=0.09, wpb=64845, bsz=128, num_updates=1511, lr=9.99959e-05, gnorm=3.285, loss_scale=4, train_wall=10, gb_free=2.8, wall=16898
2021-06-18 23:20:35 | INFO | train_inner | epoch 001: 1526 / 3002 loss=2.986, ppl=7.92, wps=5739.9, ups=0.09, wpb=64849, bsz=128, num_updates=1512, lr=9.99959e-05, gnorm=2.664, loss_scale=4, train_wall=11, gb_free=2.8, wall=16910
2021-06-18 23:20:47 | INFO | train_inner | epoch 001: 1527 / 3002 loss=2.978, ppl=7.88, wps=5775.7, ups=0.09, wpb=64843, bsz=128, num_updates=1513, lr=9.99959e-05, gnorm=2.604, loss_scale=4, train_wall=11, gb_free=2.8, wall=16921
2021-06-18 23:20:58 | INFO | train_inner | epoch 001: 1528 / 3002 loss=3.029, ppl=8.16, wps=5819.8, ups=0.09, wpb=64811, bsz=128, num_updates=1514, lr=9.99959e-05, gnorm=2.505, loss_scale=4, train_wall=11, gb_free=2.8, wall=16932
2021-06-18 23:21:09 | INFO | train_inner | epoch 001: 1529 / 3002 loss=2.96, ppl=7.78, wps=5939.8, ups=0.09, wpb=64927, bsz=128, num_updates=1515, lr=9.99959e-05, gnorm=2.462, loss_scale=4, train_wall=10, gb_free=2.8, wall=16943
2021-06-18 23:21:20 | INFO | train_inner | epoch 001: 1530 / 3002 loss=2.882, ppl=7.37, wps=5789.5, ups=0.09, wpb=64851, bsz=128, num_updates=1516, lr=9.99959e-05, gnorm=2.488, loss_scale=4, train_wall=11, gb_free=2.8, wall=16954
2021-06-18 23:21:31 | INFO | train_inner | epoch 001: 1531 / 3002 loss=3.128, ppl=8.74, wps=5822, ups=0.09, wpb=64777, bsz=128, num_updates=1517, lr=9.99959e-05, gnorm=2.512, loss_scale=4, train_wall=11, gb_free=2.8, wall=16965
2021-06-18 23:21:42 | INFO | train_inner | epoch 001: 1532 / 3002 loss=2.852, ppl=7.22, wps=5906.8, ups=0.09, wpb=64890, bsz=128, num_updates=1518, lr=9.99959e-05, gnorm=2.936, loss_scale=4, train_wall=10, gb_free=2.8, wall=16976
2021-06-18 23:21:53 | INFO | train_inner | epoch 001: 1533 / 3002 loss=3.022, ppl=8.12, wps=5837.1, ups=0.09, wpb=64792, bsz=128, num_updates=1519, lr=9.99958e-05, gnorm=3.681, loss_scale=4, train_wall=11, gb_free=2.8, wall=16987
2021-06-18 23:22:04 | INFO | train_inner | epoch 001: 1534 / 3002 loss=3.103, ppl=8.59, wps=5776.1, ups=0.09, wpb=64865, bsz=128, num_updates=1520, lr=9.99958e-05, gnorm=2.726, loss_scale=4, train_wall=11, gb_free=2.8, wall=16999
2021-06-18 23:22:15 | INFO | train_inner | epoch 001: 1535 / 3002 loss=2.928, ppl=7.61, wps=5854.8, ups=0.09, wpb=64835, bsz=128, num_updates=1521, lr=9.99958e-05, gnorm=2.499, loss_scale=4, train_wall=11, gb_free=2.8, wall=17010
2021-06-18 23:22:26 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-18 23:22:37 | INFO | train_inner | epoch 001: 1537 / 3002 loss=2.832, ppl=7.12, wps=2958.9, ups=0.05, wpb=64934, bsz=128, num_updates=1522, lr=9.99958e-05, gnorm=2.505, loss_scale=2, train_wall=21, gb_free=2.8, wall=17032
2021-06-18 23:22:48 | INFO | train_inner | epoch 001: 1538 / 3002 loss=3.109, ppl=8.63, wps=5823.9, ups=0.09, wpb=64789, bsz=128, num_updates=1523, lr=9.99958e-05, gnorm=2.497, loss_scale=2, train_wall=11, gb_free=2.8, wall=17043
2021-06-18 23:23:00 | INFO | train_inner | epoch 001: 1539 / 3002 loss=3.029, ppl=8.16, wps=5802.7, ups=0.09, wpb=64896, bsz=128, num_updates=1524, lr=9.99958e-05, gnorm=2.527, loss_scale=2, train_wall=11, gb_free=2.8, wall=17054
2021-06-18 23:23:11 | INFO | train_inner | epoch 001: 1540 / 3002 loss=3.283, ppl=9.73, wps=5838, ups=0.09, wpb=64867, bsz=128, num_updates=1525, lr=9.99958e-05, gnorm=2.507, loss_scale=2, train_wall=11, gb_free=2.8, wall=17065
2021-06-18 23:23:22 | INFO | train_inner | epoch 001: 1541 / 3002 loss=2.85, ppl=7.21, wps=5944.2, ups=0.09, wpb=64803, bsz=128, num_updates=1526, lr=9.99958e-05, gnorm=2.415, loss_scale=2, train_wall=10, gb_free=2.8, wall=17076
2021-06-18 23:23:33 | INFO | train_inner | epoch 001: 1542 / 3002 loss=2.908, ppl=7.51, wps=5842.1, ups=0.09, wpb=64849, bsz=128, num_updates=1527, lr=9.99958e-05, gnorm=2.587, loss_scale=2, train_wall=11, gb_free=2.8, wall=17087
2021-06-18 23:23:44 | INFO | train_inner | epoch 001: 1543 / 3002 loss=3.007, ppl=8.04, wps=5808, ups=0.09, wpb=64805, bsz=128, num_updates=1528, lr=9.99958e-05, gnorm=2.541, loss_scale=2, train_wall=11, gb_free=2.8, wall=17098
2021-06-18 23:23:55 | INFO | train_inner | epoch 001: 1544 / 3002 loss=2.976, ppl=7.87, wps=5709.8, ups=0.09, wpb=64773, bsz=128, num_updates=1529, lr=9.99958e-05, gnorm=2.386, loss_scale=2, train_wall=11, gb_free=2.8, wall=17110
2021-06-18 23:24:06 | INFO | train_inner | epoch 001: 1545 / 3002 loss=2.985, ppl=7.92, wps=5803.3, ups=0.09, wpb=64867, bsz=128, num_updates=1530, lr=9.99958e-05, gnorm=2.805, loss_scale=2, train_wall=11, gb_free=2.8, wall=17121
2021-06-18 23:24:17 | INFO | train_inner | epoch 001: 1546 / 3002 loss=3.025, ppl=8.14, wps=5940.3, ups=0.09, wpb=64820, bsz=128, num_updates=1531, lr=9.99958e-05, gnorm=2.498, loss_scale=2, train_wall=10, gb_free=2.8, wall=17132
2021-06-18 23:24:28 | INFO | train_inner | epoch 001: 1547 / 3002 loss=3.07, ppl=8.4, wps=5871, ups=0.09, wpb=64833, bsz=128, num_updates=1532, lr=9.99957e-05, gnorm=3.562, loss_scale=2, train_wall=11, gb_free=2.8, wall=17143
2021-06-18 23:24:39 | INFO | train_inner | epoch 001: 1548 / 3002 loss=2.849, ppl=7.2, wps=5836.9, ups=0.09, wpb=64854, bsz=128, num_updates=1533, lr=9.99957e-05, gnorm=2.586, loss_scale=2, train_wall=11, gb_free=2.8, wall=17154
2021-06-18 23:24:51 | INFO | train_inner | epoch 001: 1549 / 3002 loss=2.983, ppl=7.91, wps=5756, ups=0.09, wpb=64825, bsz=128, num_updates=1534, lr=9.99957e-05, gnorm=2.622, loss_scale=2, train_wall=11, gb_free=2.8, wall=17165
2021-06-18 23:25:01 | INFO | train_inner | epoch 001: 1550 / 3002 loss=3.115, ppl=8.66, wps=6056.1, ups=0.09, wpb=64812, bsz=128, num_updates=1535, lr=9.99957e-05, gnorm=9.555, loss_scale=2, train_wall=10, gb_free=2.8, wall=17176
2021-06-18 23:25:12 | INFO | train_inner | epoch 001: 1551 / 3002 loss=3.066, ppl=8.38, wps=5898.5, ups=0.09, wpb=64891, bsz=128, num_updates=1536, lr=9.99957e-05, gnorm=2.832, loss_scale=2, train_wall=11, gb_free=2.8, wall=17187
2021-06-18 23:25:23 | INFO | train_inner | epoch 001: 1552 / 3002 loss=2.996, ppl=7.98, wps=5871.5, ups=0.09, wpb=64845, bsz=128, num_updates=1537, lr=9.99957e-05, gnorm=2.488, loss_scale=2, train_wall=11, gb_free=2.8, wall=17198
2021-06-18 23:25:35 | INFO | train_inner | epoch 001: 1553 / 3002 loss=3.068, ppl=8.39, wps=5707.2, ups=0.09, wpb=64800, bsz=128, num_updates=1538, lr=9.99957e-05, gnorm=63.536, loss_scale=2, train_wall=11, gb_free=2.8, wall=17209
2021-06-18 23:25:46 | INFO | train_inner | epoch 001: 1554 / 3002 loss=3.097, ppl=8.56, wps=5784.1, ups=0.09, wpb=64812, bsz=128, num_updates=1539, lr=9.99957e-05, gnorm=4.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=17220
2021-06-18 23:25:57 | INFO | train_inner | epoch 001: 1555 / 3002 loss=3.136, ppl=8.79, wps=5874.5, ups=0.09, wpb=64785, bsz=128, num_updates=1540, lr=9.99957e-05, gnorm=2.686, loss_scale=2, train_wall=11, gb_free=2.8, wall=17231
2021-06-18 23:26:08 | INFO | train_inner | epoch 001: 1556 / 3002 loss=2.872, ppl=7.32, wps=5880, ups=0.09, wpb=64799, bsz=128, num_updates=1541, lr=9.99957e-05, gnorm=2.623, loss_scale=2, train_wall=11, gb_free=2.8, wall=17242
2021-06-18 23:26:19 | INFO | train_inner | epoch 001: 1557 / 3002 loss=2.903, ppl=7.48, wps=5860.5, ups=0.09, wpb=64938, bsz=128, num_updates=1542, lr=9.99957e-05, gnorm=5.994, loss_scale=2, train_wall=11, gb_free=2.8, wall=17254
2021-06-18 23:26:30 | INFO | train_inner | epoch 001: 1558 / 3002 loss=3.02, ppl=8.11, wps=5819, ups=0.09, wpb=64880, bsz=128, num_updates=1543, lr=9.99957e-05, gnorm=2.853, loss_scale=2, train_wall=11, gb_free=2.8, wall=17265
2021-06-18 23:26:41 | INFO | train_inner | epoch 001: 1559 / 3002 loss=3.03, ppl=8.17, wps=5871.2, ups=0.09, wpb=64776, bsz=128, num_updates=1544, lr=9.99956e-05, gnorm=2.836, loss_scale=2, train_wall=11, gb_free=2.8, wall=17276
2021-06-18 23:26:52 | INFO | train_inner | epoch 001: 1560 / 3002 loss=3.035, ppl=8.19, wps=5849.1, ups=0.09, wpb=64824, bsz=128, num_updates=1545, lr=9.99956e-05, gnorm=5.599, loss_scale=2, train_wall=11, gb_free=2.8, wall=17287
2021-06-18 23:27:04 | INFO | train_inner | epoch 001: 1561 / 3002 loss=2.995, ppl=7.97, wps=5815.3, ups=0.09, wpb=64851, bsz=128, num_updates=1546, lr=9.99956e-05, gnorm=3.515, loss_scale=2, train_wall=11, gb_free=2.8, wall=17298
2021-06-18 23:27:15 | INFO | train_inner | epoch 001: 1562 / 3002 loss=3.098, ppl=8.56, wps=5919.2, ups=0.09, wpb=64812, bsz=128, num_updates=1547, lr=9.99956e-05, gnorm=2.885, loss_scale=2, train_wall=10, gb_free=2.8, wall=17309
2021-06-18 23:27:26 | INFO | train_inner | epoch 001: 1563 / 3002 loss=2.969, ppl=7.83, wps=5908.5, ups=0.09, wpb=64836, bsz=128, num_updates=1548, lr=9.99956e-05, gnorm=2.922, loss_scale=2, train_wall=11, gb_free=2.8, wall=17320
2021-06-18 23:27:37 | INFO | train_inner | epoch 001: 1564 / 3002 loss=2.841, ppl=7.16, wps=5852.6, ups=0.09, wpb=64877, bsz=128, num_updates=1549, lr=9.99956e-05, gnorm=3.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=17331
2021-06-18 23:27:48 | INFO | train_inner | epoch 001: 1565 / 3002 loss=3.162, ppl=8.95, wps=5873.9, ups=0.09, wpb=64750, bsz=128, num_updates=1550, lr=9.99956e-05, gnorm=2.953, loss_scale=2, train_wall=11, gb_free=2.8, wall=17342
2021-06-18 23:27:59 | INFO | train_inner | epoch 001: 1566 / 3002 loss=3.138, ppl=8.8, wps=5851.7, ups=0.09, wpb=64800, bsz=128, num_updates=1551, lr=9.99956e-05, gnorm=2.782, loss_scale=2, train_wall=11, gb_free=2.8, wall=17353
2021-06-18 23:28:10 | INFO | train_inner | epoch 001: 1567 / 3002 loss=3.13, ppl=8.76, wps=5865.1, ups=0.09, wpb=64875, bsz=128, num_updates=1552, lr=9.99956e-05, gnorm=2.772, loss_scale=2, train_wall=11, gb_free=2.8, wall=17364
2021-06-18 23:28:21 | INFO | train_inner | epoch 001: 1568 / 3002 loss=3.091, ppl=8.52, wps=5787.4, ups=0.09, wpb=64794, bsz=128, num_updates=1553, lr=9.99956e-05, gnorm=2.71, loss_scale=2, train_wall=11, gb_free=2.8, wall=17375
2021-06-18 23:28:32 | INFO | train_inner | epoch 001: 1569 / 3002 loss=3.298, ppl=9.83, wps=5946, ups=0.09, wpb=64795, bsz=128, num_updates=1554, lr=9.99956e-05, gnorm=2.498, loss_scale=2, train_wall=10, gb_free=2.8, wall=17386
2021-06-18 23:28:43 | INFO | train_inner | epoch 001: 1570 / 3002 loss=3.198, ppl=9.18, wps=5816.5, ups=0.09, wpb=64808, bsz=128, num_updates=1555, lr=9.99956e-05, gnorm=13.703, loss_scale=2, train_wall=11, gb_free=2.8, wall=17397
2021-06-18 23:28:54 | INFO | train_inner | epoch 001: 1571 / 3002 loss=2.928, ppl=7.61, wps=5817, ups=0.09, wpb=64766, bsz=128, num_updates=1556, lr=9.99956e-05, gnorm=2.63, loss_scale=2, train_wall=11, gb_free=2.8, wall=17408
2021-06-18 23:29:05 | INFO | train_inner | epoch 001: 1572 / 3002 loss=3.218, ppl=9.3, wps=5814.6, ups=0.09, wpb=64677, bsz=128, num_updates=1557, lr=9.99955e-05, gnorm=2.692, loss_scale=2, train_wall=11, gb_free=2.8, wall=17420
2021-06-18 23:29:16 | INFO | train_inner | epoch 001: 1573 / 3002 loss=3.05, ppl=8.28, wps=5826.8, ups=0.09, wpb=64812, bsz=128, num_updates=1558, lr=9.99955e-05, gnorm=9.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=17431
2021-06-18 23:29:28 | INFO | train_inner | epoch 001: 1574 / 3002 loss=3.036, ppl=8.2, wps=5741.9, ups=0.09, wpb=64763, bsz=128, num_updates=1559, lr=9.99955e-05, gnorm=2.569, loss_scale=2, train_wall=11, gb_free=2.8, wall=17442
2021-06-18 23:29:38 | INFO | train_inner | epoch 001: 1575 / 3002 loss=3.206, ppl=9.23, wps=5985.7, ups=0.09, wpb=64886, bsz=128, num_updates=1560, lr=9.99955e-05, gnorm=3.495, loss_scale=2, train_wall=10, gb_free=2.8, wall=17453
2021-06-18 23:29:49 | INFO | train_inner | epoch 001: 1576 / 3002 loss=2.977, ppl=7.87, wps=5915.7, ups=0.09, wpb=64854, bsz=128, num_updates=1561, lr=9.99955e-05, gnorm=2.825, loss_scale=2, train_wall=11, gb_free=2.8, wall=17464
2021-06-18 23:30:00 | INFO | train_inner | epoch 001: 1577 / 3002 loss=2.991, ppl=7.95, wps=5880.1, ups=0.09, wpb=64856, bsz=128, num_updates=1562, lr=9.99955e-05, gnorm=2.716, loss_scale=2, train_wall=11, gb_free=2.8, wall=17475
2021-06-18 23:30:11 | INFO | train_inner | epoch 001: 1578 / 3002 loss=3.008, ppl=8.04, wps=5925.5, ups=0.09, wpb=64864, bsz=128, num_updates=1563, lr=9.99955e-05, gnorm=3.335, loss_scale=2, train_wall=10, gb_free=2.8, wall=17486
2021-06-18 23:30:23 | INFO | train_inner | epoch 001: 1579 / 3002 loss=3.049, ppl=8.28, wps=5849.7, ups=0.09, wpb=64844, bsz=128, num_updates=1564, lr=9.99955e-05, gnorm=2.854, loss_scale=2, train_wall=11, gb_free=2.8, wall=17497
2021-06-18 23:30:34 | INFO | train_inner | epoch 001: 1580 / 3002 loss=3.01, ppl=8.06, wps=5860.8, ups=0.09, wpb=64824, bsz=128, num_updates=1565, lr=9.99955e-05, gnorm=3.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=17508
2021-06-18 23:30:45 | INFO | train_inner | epoch 001: 1581 / 3002 loss=2.981, ppl=7.9, wps=5866.9, ups=0.09, wpb=64756, bsz=128, num_updates=1566, lr=9.99955e-05, gnorm=2.643, loss_scale=2, train_wall=11, gb_free=2.8, wall=17519
2021-06-18 23:30:56 | INFO | train_inner | epoch 001: 1582 / 3002 loss=3.121, ppl=8.7, wps=5848.6, ups=0.09, wpb=64788, bsz=128, num_updates=1567, lr=9.99955e-05, gnorm=2.643, loss_scale=2, train_wall=11, gb_free=2.8, wall=17530
2021-06-18 23:31:07 | INFO | train_inner | epoch 001: 1583 / 3002 loss=2.983, ppl=7.9, wps=5909, ups=0.09, wpb=64693, bsz=128, num_updates=1568, lr=9.99955e-05, gnorm=2.561, loss_scale=2, train_wall=10, gb_free=2.8, wall=17541
2021-06-18 23:31:17 | INFO | train_inner | epoch 001: 1584 / 3002 loss=3.097, ppl=8.56, wps=5973.1, ups=0.09, wpb=64817, bsz=128, num_updates=1569, lr=9.99954e-05, gnorm=2.654, loss_scale=2, train_wall=10, gb_free=2.8, wall=17552
2021-06-18 23:31:29 | INFO | train_inner | epoch 001: 1585 / 3002 loss=2.993, ppl=7.96, wps=5826.8, ups=0.09, wpb=64859, bsz=128, num_updates=1570, lr=9.99954e-05, gnorm=2.581, loss_scale=2, train_wall=11, gb_free=2.8, wall=17563
2021-06-18 23:31:40 | INFO | train_inner | epoch 001: 1586 / 3002 loss=3.125, ppl=8.73, wps=5914.1, ups=0.09, wpb=64806, bsz=128, num_updates=1571, lr=9.99954e-05, gnorm=2.651, loss_scale=2, train_wall=10, gb_free=2.8, wall=17574
2021-06-18 23:31:51 | INFO | train_inner | epoch 001: 1587 / 3002 loss=3.026, ppl=8.14, wps=5772, ups=0.09, wpb=64815, bsz=128, num_updates=1572, lr=9.99954e-05, gnorm=2.628, loss_scale=2, train_wall=11, gb_free=2.8, wall=17585
2021-06-18 23:32:02 | INFO | train_inner | epoch 001: 1588 / 3002 loss=3.267, ppl=9.63, wps=5803.5, ups=0.09, wpb=64793, bsz=128, num_updates=1573, lr=9.99954e-05, gnorm=2.805, loss_scale=2, train_wall=11, gb_free=2.8, wall=17596
2021-06-18 23:32:13 | INFO | train_inner | epoch 001: 1589 / 3002 loss=3.244, ppl=9.47, wps=5896.9, ups=0.09, wpb=64815, bsz=128, num_updates=1574, lr=9.99954e-05, gnorm=2.505, loss_scale=2, train_wall=11, gb_free=2.8, wall=17607
2021-06-18 23:32:24 | INFO | train_inner | epoch 001: 1590 / 3002 loss=2.836, ppl=7.14, wps=5872.8, ups=0.09, wpb=64791, bsz=128, num_updates=1575, lr=9.99954e-05, gnorm=2.517, loss_scale=2, train_wall=11, gb_free=2.8, wall=17618
2021-06-18 23:32:35 | INFO | train_inner | epoch 001: 1591 / 3002 loss=3.066, ppl=8.37, wps=5924.2, ups=0.09, wpb=64799, bsz=128, num_updates=1576, lr=9.99954e-05, gnorm=2.697, loss_scale=2, train_wall=10, gb_free=2.8, wall=17629
2021-06-18 23:32:46 | INFO | train_inner | epoch 001: 1592 / 3002 loss=3.097, ppl=8.56, wps=5836.4, ups=0.09, wpb=64775, bsz=128, num_updates=1577, lr=9.99954e-05, gnorm=2.864, loss_scale=2, train_wall=11, gb_free=2.8, wall=17640
2021-06-18 23:32:57 | INFO | train_inner | epoch 001: 1593 / 3002 loss=3.105, ppl=8.6, wps=5828.4, ups=0.09, wpb=64776, bsz=128, num_updates=1578, lr=9.99954e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=17651
2021-06-18 23:33:08 | INFO | train_inner | epoch 001: 1594 / 3002 loss=3.121, ppl=8.7, wps=5910.3, ups=0.09, wpb=64896, bsz=128, num_updates=1579, lr=9.99954e-05, gnorm=2.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=17662
2021-06-18 23:33:19 | INFO | train_inner | epoch 001: 1595 / 3002 loss=2.948, ppl=7.72, wps=5824.1, ups=0.09, wpb=64788, bsz=128, num_updates=1580, lr=9.99954e-05, gnorm=3.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=17674
2021-06-18 23:33:30 | INFO | train_inner | epoch 001: 1596 / 3002 loss=2.931, ppl=7.62, wps=5903.6, ups=0.09, wpb=64886, bsz=128, num_updates=1581, lr=9.99954e-05, gnorm=2.928, loss_scale=2, train_wall=11, gb_free=2.8, wall=17685
2021-06-18 23:33:41 | INFO | train_inner | epoch 001: 1597 / 3002 loss=3.123, ppl=8.71, wps=5798.1, ups=0.09, wpb=64739, bsz=128, num_updates=1582, lr=9.99953e-05, gnorm=2.533, loss_scale=2, train_wall=11, gb_free=2.8, wall=17696
2021-06-18 23:33:52 | INFO | train_inner | epoch 001: 1598 / 3002 loss=3.1, ppl=8.58, wps=5853.4, ups=0.09, wpb=64864, bsz=128, num_updates=1583, lr=9.99953e-05, gnorm=2.634, loss_scale=2, train_wall=11, gb_free=2.8, wall=17707
2021-06-18 23:34:04 | INFO | train_inner | epoch 001: 1599 / 3002 loss=3.007, ppl=8.04, wps=5766.4, ups=0.09, wpb=64785, bsz=128, num_updates=1584, lr=9.99953e-05, gnorm=2.631, loss_scale=2, train_wall=11, gb_free=2.8, wall=17718
2021-06-18 23:34:15 | INFO | train_inner | epoch 001: 1600 / 3002 loss=2.982, ppl=7.9, wps=5831.4, ups=0.09, wpb=64840, bsz=128, num_updates=1585, lr=9.99953e-05, gnorm=2.571, loss_scale=2, train_wall=11, gb_free=2.8, wall=17729
2021-06-18 23:34:26 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-18 23:34:37 | INFO | train_inner | epoch 001: 1602 / 3002 loss=2.947, ppl=7.71, wps=2928.3, ups=0.05, wpb=64832, bsz=128, num_updates=1586, lr=9.99953e-05, gnorm=2.505, loss_scale=1, train_wall=21, gb_free=2.8, wall=17751
2021-06-18 23:34:48 | INFO | train_inner | epoch 001: 1603 / 3002 loss=2.917, ppl=7.55, wps=5953.6, ups=0.09, wpb=64832, bsz=128, num_updates=1587, lr=9.99953e-05, gnorm=2.646, loss_scale=1, train_wall=10, gb_free=2.8, wall=17762
2021-06-18 23:34:59 | INFO | train_inner | epoch 001: 1604 / 3002 loss=3.043, ppl=8.24, wps=5925.6, ups=0.09, wpb=64892, bsz=128, num_updates=1588, lr=9.99953e-05, gnorm=2.794, loss_scale=1, train_wall=11, gb_free=2.8, wall=17773
2021-06-18 23:35:10 | INFO | train_inner | epoch 001: 1605 / 3002 loss=2.92, ppl=7.57, wps=5988.5, ups=0.09, wpb=64954, bsz=128, num_updates=1589, lr=9.99953e-05, gnorm=2.495, loss_scale=1, train_wall=10, gb_free=2.8, wall=17784
2021-06-18 23:35:21 | INFO | train_inner | epoch 001: 1606 / 3002 loss=3.038, ppl=8.22, wps=5861.8, ups=0.09, wpb=64776, bsz=128, num_updates=1590, lr=9.99953e-05, gnorm=2.461, loss_scale=1, train_wall=11, gb_free=2.8, wall=17795
2021-06-18 23:35:32 | INFO | train_inner | epoch 001: 1607 / 3002 loss=2.975, ppl=7.86, wps=5848.4, ups=0.09, wpb=64907, bsz=128, num_updates=1591, lr=9.99953e-05, gnorm=2.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=17806
2021-06-18 23:35:43 | INFO | train_inner | epoch 001: 1608 / 3002 loss=3.123, ppl=8.71, wps=5987.8, ups=0.09, wpb=64875, bsz=128, num_updates=1592, lr=9.99953e-05, gnorm=2.531, loss_scale=1, train_wall=10, gb_free=2.8, wall=17817
2021-06-18 23:35:54 | INFO | train_inner | epoch 001: 1609 / 3002 loss=3.114, ppl=8.66, wps=5775.2, ups=0.09, wpb=64739, bsz=128, num_updates=1593, lr=9.99953e-05, gnorm=9.578, loss_scale=1, train_wall=11, gb_free=2.8, wall=17828
2021-06-18 23:36:05 | INFO | train_inner | epoch 001: 1610 / 3002 loss=2.958, ppl=7.77, wps=5857.2, ups=0.09, wpb=64853, bsz=128, num_updates=1594, lr=9.99952e-05, gnorm=2.507, loss_scale=1, train_wall=11, gb_free=2.8, wall=17839
2021-06-18 23:36:16 | INFO | train_inner | epoch 001: 1611 / 3002 loss=2.963, ppl=7.8, wps=5917, ups=0.09, wpb=64843, bsz=128, num_updates=1595, lr=9.99952e-05, gnorm=2.722, loss_scale=1, train_wall=11, gb_free=2.8, wall=17850
2021-06-18 23:36:27 | INFO | train_inner | epoch 001: 1612 / 3002 loss=3.13, ppl=8.76, wps=5806.9, ups=0.09, wpb=64797, bsz=128, num_updates=1596, lr=9.99952e-05, gnorm=2.466, loss_scale=1, train_wall=11, gb_free=2.8, wall=17861
2021-06-18 23:36:38 | INFO | train_inner | epoch 001: 1613 / 3002 loss=3.272, ppl=9.66, wps=5790.6, ups=0.09, wpb=64791, bsz=128, num_updates=1597, lr=9.99952e-05, gnorm=2.613, loss_scale=1, train_wall=11, gb_free=2.8, wall=17873
2021-06-18 23:36:49 | INFO | train_inner | epoch 001: 1614 / 3002 loss=2.977, ppl=7.87, wps=5831.5, ups=0.09, wpb=64796, bsz=128, num_updates=1598, lr=9.99952e-05, gnorm=2.689, loss_scale=1, train_wall=11, gb_free=2.8, wall=17884
2021-06-18 23:37:00 | INFO | train_inner | epoch 001: 1615 / 3002 loss=3.005, ppl=8.03, wps=5838.8, ups=0.09, wpb=64858, bsz=128, num_updates=1599, lr=9.99952e-05, gnorm=3.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=17895
2021-06-18 23:37:12 | INFO | train_inner | epoch 001: 1616 / 3002 loss=2.988, ppl=7.93, wps=5843.3, ups=0.09, wpb=64885, bsz=128, num_updates=1600, lr=9.99952e-05, gnorm=2.515, loss_scale=1, train_wall=11, gb_free=2.8, wall=17906
2021-06-18 23:37:23 | INFO | train_inner | epoch 001: 1617 / 3002 loss=3.044, ppl=8.25, wps=5834.7, ups=0.09, wpb=64732, bsz=128, num_updates=1601, lr=9.99952e-05, gnorm=2.598, loss_scale=1, train_wall=11, gb_free=2.8, wall=17917
2021-06-18 23:37:34 | INFO | train_inner | epoch 001: 1618 / 3002 loss=2.889, ppl=7.41, wps=5909.9, ups=0.09, wpb=64862, bsz=128, num_updates=1602, lr=9.99952e-05, gnorm=2.52, loss_scale=1, train_wall=11, gb_free=2.8, wall=17928
2021-06-18 23:37:45 | INFO | train_inner | epoch 001: 1619 / 3002 loss=3.14, ppl=8.82, wps=5849, ups=0.09, wpb=64832, bsz=128, num_updates=1603, lr=9.99952e-05, gnorm=5.379, loss_scale=1, train_wall=11, gb_free=2.8, wall=17939
2021-06-18 23:37:56 | INFO | train_inner | epoch 001: 1620 / 3002 loss=2.873, ppl=7.33, wps=5943.9, ups=0.09, wpb=64870, bsz=128, num_updates=1604, lr=9.99952e-05, gnorm=2.966, loss_scale=1, train_wall=10, gb_free=2.8, wall=17950
2021-06-18 23:38:07 | INFO | train_inner | epoch 001: 1621 / 3002 loss=3.091, ppl=8.52, wps=5907, ups=0.09, wpb=64906, bsz=128, num_updates=1605, lr=9.99952e-05, gnorm=2.656, loss_scale=1, train_wall=11, gb_free=2.8, wall=17961
2021-06-18 23:38:18 | INFO | train_inner | epoch 001: 1622 / 3002 loss=2.997, ppl=7.98, wps=5719.6, ups=0.09, wpb=64781, bsz=128, num_updates=1606, lr=9.99952e-05, gnorm=2.541, loss_scale=1, train_wall=11, gb_free=2.8, wall=17972
2021-06-18 23:38:29 | INFO | train_inner | epoch 001: 1623 / 3002 loss=3.034, ppl=8.19, wps=5860.4, ups=0.09, wpb=64793, bsz=128, num_updates=1607, lr=9.99951e-05, gnorm=2.558, loss_scale=1, train_wall=11, gb_free=2.8, wall=17983
2021-06-18 23:38:40 | INFO | train_inner | epoch 001: 1624 / 3002 loss=2.938, ppl=7.66, wps=5856.7, ups=0.09, wpb=64830, bsz=128, num_updates=1608, lr=9.99951e-05, gnorm=2.696, loss_scale=1, train_wall=11, gb_free=2.8, wall=17994
2021-06-18 23:38:51 | INFO | train_inner | epoch 001: 1625 / 3002 loss=2.958, ppl=7.77, wps=5876.3, ups=0.09, wpb=64857, bsz=128, num_updates=1609, lr=9.99951e-05, gnorm=8.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=18005
2021-06-18 23:39:02 | INFO | train_inner | epoch 001: 1626 / 3002 loss=2.973, ppl=7.85, wps=5946.2, ups=0.09, wpb=64861, bsz=128, num_updates=1610, lr=9.99951e-05, gnorm=2.712, loss_scale=1, train_wall=10, gb_free=2.8, wall=18016
2021-06-18 23:39:13 | INFO | train_inner | epoch 001: 1627 / 3002 loss=3.331, ppl=10.07, wps=5892.3, ups=0.09, wpb=64926, bsz=128, num_updates=1611, lr=9.99951e-05, gnorm=2.87, loss_scale=1, train_wall=11, gb_free=2.8, wall=18027
2021-06-18 23:39:24 | INFO | train_inner | epoch 001: 1628 / 3002 loss=2.975, ppl=7.86, wps=5874.8, ups=0.09, wpb=64923, bsz=128, num_updates=1612, lr=9.99951e-05, gnorm=11.498, loss_scale=1, train_wall=11, gb_free=2.8, wall=18038
2021-06-18 23:39:35 | INFO | train_inner | epoch 001: 1629 / 3002 loss=2.99, ppl=7.95, wps=5819.3, ups=0.09, wpb=64834, bsz=128, num_updates=1613, lr=9.99951e-05, gnorm=2.663, loss_scale=1, train_wall=11, gb_free=2.8, wall=18050
2021-06-18 23:39:47 | INFO | train_inner | epoch 001: 1630 / 3002 loss=3.08, ppl=8.46, wps=5755.8, ups=0.09, wpb=64846, bsz=128, num_updates=1614, lr=9.99951e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=18061
2021-06-18 23:39:58 | INFO | train_inner | epoch 001: 1631 / 3002 loss=3.05, ppl=8.28, wps=5827.1, ups=0.09, wpb=64842, bsz=128, num_updates=1615, lr=9.99951e-05, gnorm=2.534, loss_scale=1, train_wall=11, gb_free=2.8, wall=18072
2021-06-18 23:40:09 | INFO | train_inner | epoch 001: 1632 / 3002 loss=3.002, ppl=8.01, wps=5909.1, ups=0.09, wpb=64868, bsz=128, num_updates=1616, lr=9.99951e-05, gnorm=2.76, loss_scale=1, train_wall=11, gb_free=2.8, wall=18083
2021-06-18 23:40:20 | INFO | train_inner | epoch 001: 1633 / 3002 loss=2.986, ppl=7.92, wps=5825.6, ups=0.09, wpb=64847, bsz=128, num_updates=1617, lr=9.99951e-05, gnorm=3.307, loss_scale=1, train_wall=11, gb_free=2.8, wall=18094
2021-06-18 23:40:31 | INFO | train_inner | epoch 001: 1634 / 3002 loss=3.009, ppl=8.05, wps=5995.5, ups=0.09, wpb=64871, bsz=128, num_updates=1618, lr=9.99951e-05, gnorm=7.813, loss_scale=1, train_wall=10, gb_free=2.8, wall=18105
2021-06-18 23:40:42 | INFO | train_inner | epoch 001: 1635 / 3002 loss=3.034, ppl=8.19, wps=5836.6, ups=0.09, wpb=64721, bsz=128, num_updates=1619, lr=9.9995e-05, gnorm=2.487, loss_scale=1, train_wall=11, gb_free=2.8, wall=18116
2021-06-18 23:40:52 | INFO | train_inner | epoch 001: 1636 / 3002 loss=2.939, ppl=7.67, wps=6023.3, ups=0.09, wpb=64851, bsz=128, num_updates=1620, lr=9.9995e-05, gnorm=2.636, loss_scale=1, train_wall=10, gb_free=2.8, wall=18127
2021-06-18 23:41:04 | INFO | train_inner | epoch 001: 1637 / 3002 loss=2.944, ppl=7.7, wps=5822, ups=0.09, wpb=64716, bsz=128, num_updates=1621, lr=9.9995e-05, gnorm=2.672, loss_scale=1, train_wall=11, gb_free=2.8, wall=18138
2021-06-18 23:41:15 | INFO | train_inner | epoch 001: 1638 / 3002 loss=3.11, ppl=8.63, wps=5845.7, ups=0.09, wpb=64842, bsz=128, num_updates=1622, lr=9.9995e-05, gnorm=2.617, loss_scale=1, train_wall=11, gb_free=2.8, wall=18149
2021-06-18 23:41:26 | INFO | train_inner | epoch 001: 1639 / 3002 loss=3.062, ppl=8.35, wps=5892.3, ups=0.09, wpb=64816, bsz=128, num_updates=1623, lr=9.9995e-05, gnorm=2.669, loss_scale=1, train_wall=11, gb_free=2.8, wall=18160
2021-06-18 23:41:37 | INFO | train_inner | epoch 001: 1640 / 3002 loss=2.984, ppl=7.91, wps=5810.9, ups=0.09, wpb=64656, bsz=128, num_updates=1624, lr=9.9995e-05, gnorm=11.143, loss_scale=1, train_wall=11, gb_free=2.8, wall=18171
2021-06-18 23:41:48 | INFO | train_inner | epoch 001: 1641 / 3002 loss=3.009, ppl=8.05, wps=5854.1, ups=0.09, wpb=64862, bsz=128, num_updates=1625, lr=9.9995e-05, gnorm=2.843, loss_scale=1, train_wall=11, gb_free=2.8, wall=18182
2021-06-18 23:41:59 | INFO | train_inner | epoch 001: 1642 / 3002 loss=3, ppl=8, wps=5741.3, ups=0.09, wpb=64779, bsz=128, num_updates=1626, lr=9.9995e-05, gnorm=6.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=18193
2021-06-18 23:42:10 | INFO | train_inner | epoch 001: 1643 / 3002 loss=2.84, ppl=7.16, wps=5934.6, ups=0.09, wpb=64881, bsz=128, num_updates=1627, lr=9.9995e-05, gnorm=2.57, loss_scale=1, train_wall=11, gb_free=2.8, wall=18204
2021-06-18 23:42:21 | INFO | train_inner | epoch 001: 1644 / 3002 loss=2.999, ppl=7.99, wps=5726, ups=0.09, wpb=64806, bsz=128, num_updates=1628, lr=9.9995e-05, gnorm=16.856, loss_scale=1, train_wall=11, gb_free=2.8, wall=18216
2021-06-18 23:42:33 | INFO | train_inner | epoch 001: 1645 / 3002 loss=3.106, ppl=8.61, wps=5796.2, ups=0.09, wpb=64746, bsz=128, num_updates=1629, lr=9.9995e-05, gnorm=3.088, loss_scale=1, train_wall=11, gb_free=2.8, wall=18227
2021-06-18 23:42:44 | INFO | train_inner | epoch 001: 1646 / 3002 loss=2.997, ppl=7.98, wps=5862.9, ups=0.09, wpb=64849, bsz=128, num_updates=1630, lr=9.9995e-05, gnorm=3.315, loss_scale=1, train_wall=11, gb_free=2.8, wall=18238
2021-06-18 23:42:55 | INFO | train_inner | epoch 001: 1647 / 3002 loss=3.07, ppl=8.4, wps=5840.1, ups=0.09, wpb=64767, bsz=128, num_updates=1631, lr=9.9995e-05, gnorm=3.858, loss_scale=1, train_wall=11, gb_free=2.8, wall=18249
2021-06-18 23:43:06 | INFO | train_inner | epoch 001: 1648 / 3002 loss=3.05, ppl=8.28, wps=5793.6, ups=0.09, wpb=64788, bsz=128, num_updates=1632, lr=9.99949e-05, gnorm=2.645, loss_scale=1, train_wall=11, gb_free=2.8, wall=18260
2021-06-18 23:43:17 | INFO | train_inner | epoch 001: 1649 / 3002 loss=2.983, ppl=7.9, wps=5859.6, ups=0.09, wpb=64751, bsz=128, num_updates=1633, lr=9.99949e-05, gnorm=2.652, loss_scale=1, train_wall=11, gb_free=2.8, wall=18271
2021-06-18 23:43:28 | INFO | train_inner | epoch 001: 1650 / 3002 loss=3.133, ppl=8.78, wps=5836, ups=0.09, wpb=64896, bsz=128, num_updates=1634, lr=9.99949e-05, gnorm=4.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=18282
2021-06-18 23:43:39 | INFO | train_inner | epoch 001: 1651 / 3002 loss=3.144, ppl=8.84, wps=5887.4, ups=0.09, wpb=64886, bsz=128, num_updates=1635, lr=9.99949e-05, gnorm=2.628, loss_scale=1, train_wall=11, gb_free=2.8, wall=18293
2021-06-18 23:43:50 | INFO | train_inner | epoch 001: 1652 / 3002 loss=2.981, ppl=7.89, wps=5824.1, ups=0.09, wpb=64833, bsz=128, num_updates=1636, lr=9.99949e-05, gnorm=4.294, loss_scale=1, train_wall=11, gb_free=2.8, wall=18305
2021-06-18 23:44:01 | INFO | train_inner | epoch 001: 1653 / 3002 loss=3.044, ppl=8.25, wps=5905.2, ups=0.09, wpb=64821, bsz=128, num_updates=1637, lr=9.99949e-05, gnorm=2.91, loss_scale=1, train_wall=11, gb_free=2.8, wall=18316
2021-06-18 23:44:12 | INFO | train_inner | epoch 001: 1654 / 3002 loss=3.075, ppl=8.43, wps=5925.7, ups=0.09, wpb=64912, bsz=128, num_updates=1638, lr=9.99949e-05, gnorm=4.845, loss_scale=1, train_wall=10, gb_free=2.8, wall=18326
2021-06-18 23:44:23 | INFO | train_inner | epoch 001: 1655 / 3002 loss=2.987, ppl=7.93, wps=5896.3, ups=0.09, wpb=64816, bsz=128, num_updates=1639, lr=9.99949e-05, gnorm=2.97, loss_scale=1, train_wall=11, gb_free=2.8, wall=18337
2021-06-18 23:44:34 | INFO | train_inner | epoch 001: 1656 / 3002 loss=3.12, ppl=8.69, wps=5826.9, ups=0.09, wpb=64759, bsz=128, num_updates=1640, lr=9.99949e-05, gnorm=5.134, loss_scale=1, train_wall=11, gb_free=2.8, wall=18349
2021-06-18 23:44:45 | INFO | train_inner | epoch 001: 1657 / 3002 loss=3.091, ppl=8.52, wps=5825.4, ups=0.09, wpb=64746, bsz=128, num_updates=1641, lr=9.99949e-05, gnorm=3.586, loss_scale=1, train_wall=11, gb_free=2.8, wall=18360
2021-06-18 23:44:56 | INFO | train_inner | epoch 001: 1658 / 3002 loss=2.931, ppl=7.63, wps=5978.6, ups=0.09, wpb=64863, bsz=128, num_updates=1642, lr=9.99949e-05, gnorm=2.688, loss_scale=1, train_wall=10, gb_free=2.8, wall=18371
2021-06-18 23:45:07 | INFO | train_inner | epoch 001: 1659 / 3002 loss=2.987, ppl=7.93, wps=5835.5, ups=0.09, wpb=64851, bsz=128, num_updates=1643, lr=9.99949e-05, gnorm=5.319, loss_scale=1, train_wall=11, gb_free=2.8, wall=18382
2021-06-18 23:45:18 | INFO | train_inner | epoch 001: 1660 / 3002 loss=2.911, ppl=7.52, wps=5826.1, ups=0.09, wpb=64882, bsz=128, num_updates=1644, lr=9.99948e-05, gnorm=2.829, loss_scale=1, train_wall=11, gb_free=2.8, wall=18393
2021-06-18 23:45:30 | INFO | train_inner | epoch 001: 1661 / 3002 loss=3.115, ppl=8.67, wps=5846.8, ups=0.09, wpb=64809, bsz=128, num_updates=1645, lr=9.99948e-05, gnorm=3.361, loss_scale=1, train_wall=11, gb_free=2.8, wall=18404
2021-06-18 23:45:41 | INFO | train_inner | epoch 001: 1662 / 3002 loss=3.052, ppl=8.29, wps=5805.3, ups=0.09, wpb=64773, bsz=128, num_updates=1646, lr=9.99948e-05, gnorm=2.963, loss_scale=1, train_wall=11, gb_free=2.8, wall=18415
2021-06-18 23:45:52 | INFO | train_inner | epoch 001: 1663 / 3002 loss=2.971, ppl=7.84, wps=5816.4, ups=0.09, wpb=64757, bsz=128, num_updates=1647, lr=9.99948e-05, gnorm=20.616, loss_scale=1, train_wall=11, gb_free=2.8, wall=18426
2021-06-18 23:46:03 | INFO | train_inner | epoch 001: 1664 / 3002 loss=2.941, ppl=7.68, wps=5935.4, ups=0.09, wpb=64924, bsz=128, num_updates=1648, lr=9.99948e-05, gnorm=5.569, loss_scale=1, train_wall=10, gb_free=2.8, wall=18437
2021-06-18 23:46:14 | INFO | train_inner | epoch 001: 1665 / 3002 loss=3.153, ppl=8.89, wps=5828.2, ups=0.09, wpb=64916, bsz=128, num_updates=1649, lr=9.99948e-05, gnorm=2.974, loss_scale=1, train_wall=11, gb_free=2.8, wall=18448
2021-06-18 23:46:25 | INFO | train_inner | epoch 001: 1666 / 3002 loss=3.011, ppl=8.06, wps=5916.7, ups=0.09, wpb=64763, bsz=128, num_updates=1650, lr=9.99948e-05, gnorm=3.257, loss_scale=1, train_wall=10, gb_free=2.8, wall=18459
2021-06-18 23:46:36 | INFO | train_inner | epoch 001: 1667 / 3002 loss=3.12, ppl=8.69, wps=5811.7, ups=0.09, wpb=64862, bsz=128, num_updates=1651, lr=9.99948e-05, gnorm=4.284, loss_scale=1, train_wall=11, gb_free=2.8, wall=18470
2021-06-18 23:46:47 | INFO | train_inner | epoch 001: 1668 / 3002 loss=3.055, ppl=8.31, wps=5862.3, ups=0.09, wpb=64862, bsz=128, num_updates=1652, lr=9.99948e-05, gnorm=4.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=18481
2021-06-18 23:46:58 | INFO | train_inner | epoch 001: 1669 / 3002 loss=3.234, ppl=9.41, wps=5818.6, ups=0.09, wpb=64826, bsz=128, num_updates=1653, lr=9.99948e-05, gnorm=3.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=18493
2021-06-18 23:47:09 | INFO | train_inner | epoch 001: 1670 / 3002 loss=3.137, ppl=8.8, wps=5809.1, ups=0.09, wpb=64886, bsz=128, num_updates=1654, lr=9.99948e-05, gnorm=3.818, loss_scale=1, train_wall=11, gb_free=2.8, wall=18504
2021-06-18 23:47:21 | INFO | train_inner | epoch 001: 1671 / 3002 loss=3.073, ppl=8.41, wps=5838.5, ups=0.09, wpb=64854, bsz=128, num_updates=1655, lr=9.99948e-05, gnorm=13.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=18515
2021-06-18 23:47:32 | INFO | train_inner | epoch 001: 1672 / 3002 loss=3.1, ppl=8.57, wps=5884.9, ups=0.09, wpb=64834, bsz=128, num_updates=1656, lr=9.99948e-05, gnorm=12.132, loss_scale=1, train_wall=11, gb_free=2.8, wall=18526
2021-06-18 23:47:43 | INFO | train_inner | epoch 001: 1673 / 3002 loss=3.026, ppl=8.14, wps=5895.2, ups=0.09, wpb=64812, bsz=128, num_updates=1657, lr=9.99947e-05, gnorm=4.085, loss_scale=1, train_wall=11, gb_free=2.8, wall=18537
2021-06-18 23:47:54 | INFO | train_inner | epoch 001: 1674 / 3002 loss=3.054, ppl=8.31, wps=5884.9, ups=0.09, wpb=64940, bsz=128, num_updates=1658, lr=9.99947e-05, gnorm=2.961, loss_scale=1, train_wall=11, gb_free=2.8, wall=18548
2021-06-18 23:48:05 | INFO | train_inner | epoch 001: 1675 / 3002 loss=3.039, ppl=8.22, wps=5797.6, ups=0.09, wpb=64808, bsz=128, num_updates=1659, lr=9.99947e-05, gnorm=2.963, loss_scale=1, train_wall=11, gb_free=2.8, wall=18559
2021-06-18 23:48:16 | INFO | train_inner | epoch 001: 1676 / 3002 loss=3.228, ppl=9.37, wps=5794.7, ups=0.09, wpb=64806, bsz=128, num_updates=1660, lr=9.99947e-05, gnorm=2.849, loss_scale=1, train_wall=11, gb_free=2.8, wall=18570
2021-06-18 23:48:27 | INFO | train_inner | epoch 001: 1677 / 3002 loss=3.028, ppl=8.16, wps=5756.2, ups=0.09, wpb=64778, bsz=128, num_updates=1661, lr=9.99947e-05, gnorm=3.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=18582
2021-06-18 23:48:38 | INFO | train_inner | epoch 001: 1678 / 3002 loss=3.034, ppl=8.19, wps=5881, ups=0.09, wpb=64814, bsz=128, num_updates=1662, lr=9.99947e-05, gnorm=3.761, loss_scale=1, train_wall=11, gb_free=2.8, wall=18593
2021-06-18 23:48:49 | INFO | train_inner | epoch 001: 1679 / 3002 loss=3.173, ppl=9.02, wps=5914.4, ups=0.09, wpb=64851, bsz=128, num_updates=1663, lr=9.99947e-05, gnorm=3.492, loss_scale=1, train_wall=11, gb_free=2.8, wall=18603
2021-06-18 23:49:00 | INFO | train_inner | epoch 001: 1680 / 3002 loss=3.057, ppl=8.32, wps=5899.2, ups=0.09, wpb=64829, bsz=128, num_updates=1664, lr=9.99947e-05, gnorm=2.804, loss_scale=1, train_wall=11, gb_free=2.8, wall=18614
2021-06-18 23:49:11 | INFO | train_inner | epoch 001: 1681 / 3002 loss=3.116, ppl=8.67, wps=5812.3, ups=0.09, wpb=64846, bsz=128, num_updates=1665, lr=9.99947e-05, gnorm=2.75, loss_scale=1, train_wall=11, gb_free=2.8, wall=18626
2021-06-18 23:49:22 | INFO | train_inner | epoch 001: 1682 / 3002 loss=3.068, ppl=8.39, wps=5849.1, ups=0.09, wpb=64791, bsz=128, num_updates=1666, lr=9.99947e-05, gnorm=3.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=18637
2021-06-18 23:49:34 | INFO | train_inner | epoch 001: 1683 / 3002 loss=3.105, ppl=8.6, wps=5794.9, ups=0.09, wpb=64879, bsz=128, num_updates=1667, lr=9.99947e-05, gnorm=2.691, loss_scale=1, train_wall=11, gb_free=2.8, wall=18648
2021-06-18 23:49:45 | INFO | train_inner | epoch 001: 1684 / 3002 loss=2.862, ppl=7.27, wps=5747.8, ups=0.09, wpb=64855, bsz=128, num_updates=1668, lr=9.99947e-05, gnorm=2.928, loss_scale=1, train_wall=11, gb_free=2.8, wall=18659
2021-06-18 23:49:56 | INFO | train_inner | epoch 001: 1685 / 3002 loss=3.168, ppl=8.99, wps=5824.5, ups=0.09, wpb=64825, bsz=128, num_updates=1669, lr=9.99946e-05, gnorm=3.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=18670
2021-06-18 23:50:07 | INFO | train_inner | epoch 001: 1686 / 3002 loss=2.915, ppl=7.54, wps=5791.5, ups=0.09, wpb=64832, bsz=128, num_updates=1670, lr=9.99946e-05, gnorm=3.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=18682
2021-06-18 23:50:18 | INFO | train_inner | epoch 001: 1687 / 3002 loss=2.921, ppl=7.58, wps=5808.8, ups=0.09, wpb=64874, bsz=128, num_updates=1671, lr=9.99946e-05, gnorm=2.644, loss_scale=1, train_wall=11, gb_free=2.8, wall=18693
2021-06-18 23:50:29 | INFO | train_inner | epoch 001: 1688 / 3002 loss=2.942, ppl=7.69, wps=5829.1, ups=0.09, wpb=64792, bsz=128, num_updates=1672, lr=9.99946e-05, gnorm=7.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=18704
2021-06-18 23:50:41 | INFO | train_inner | epoch 001: 1689 / 3002 loss=2.837, ppl=7.15, wps=5767.4, ups=0.09, wpb=64835, bsz=128, num_updates=1673, lr=9.99946e-05, gnorm=2.465, loss_scale=1, train_wall=11, gb_free=2.8, wall=18715
2021-06-18 23:50:52 | INFO | train_inner | epoch 001: 1690 / 3002 loss=2.98, ppl=7.89, wps=5902.4, ups=0.09, wpb=64870, bsz=128, num_updates=1674, lr=9.99946e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=18726
2021-06-18 23:51:03 | INFO | train_inner | epoch 001: 1691 / 3002 loss=3.162, ppl=8.95, wps=5863.3, ups=0.09, wpb=64870, bsz=128, num_updates=1675, lr=9.99946e-05, gnorm=3.965, loss_scale=1, train_wall=11, gb_free=2.8, wall=18737
2021-06-18 23:51:14 | INFO | train_inner | epoch 001: 1692 / 3002 loss=3.092, ppl=8.53, wps=5963.9, ups=0.09, wpb=64807, bsz=128, num_updates=1676, lr=9.99946e-05, gnorm=2.678, loss_scale=1, train_wall=10, gb_free=2.8, wall=18748
2021-06-18 23:51:25 | INFO | train_inner | epoch 001: 1693 / 3002 loss=3.042, ppl=8.24, wps=5846.1, ups=0.09, wpb=64806, bsz=128, num_updates=1677, lr=9.99946e-05, gnorm=2.599, loss_scale=1, train_wall=11, gb_free=2.8, wall=18759
2021-06-18 23:51:36 | INFO | train_inner | epoch 001: 1694 / 3002 loss=3.108, ppl=8.62, wps=5772.1, ups=0.09, wpb=64775, bsz=128, num_updates=1678, lr=9.99946e-05, gnorm=2.721, loss_scale=1, train_wall=11, gb_free=2.8, wall=18770
2021-06-18 23:51:47 | INFO | train_inner | epoch 001: 1695 / 3002 loss=3.131, ppl=8.76, wps=5924, ups=0.09, wpb=64758, bsz=128, num_updates=1679, lr=9.99946e-05, gnorm=2.688, loss_scale=1, train_wall=10, gb_free=2.8, wall=18781
2021-06-18 23:51:58 | INFO | train_inner | epoch 001: 1696 / 3002 loss=2.915, ppl=7.54, wps=5789.5, ups=0.09, wpb=64698, bsz=128, num_updates=1680, lr=9.99946e-05, gnorm=2.714, loss_scale=1, train_wall=11, gb_free=2.8, wall=18792
2021-06-18 23:52:09 | INFO | train_inner | epoch 001: 1697 / 3002 loss=3.01, ppl=8.06, wps=5880.6, ups=0.09, wpb=64905, bsz=128, num_updates=1681, lr=9.99946e-05, gnorm=6.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=18803
2021-06-18 23:52:20 | INFO | train_inner | epoch 001: 1698 / 3002 loss=3.076, ppl=8.43, wps=5820.5, ups=0.09, wpb=64869, bsz=128, num_updates=1682, lr=9.99945e-05, gnorm=2.641, loss_scale=1, train_wall=11, gb_free=2.8, wall=18815
2021-06-18 23:52:31 | INFO | train_inner | epoch 001: 1699 / 3002 loss=3.104, ppl=8.6, wps=5805.9, ups=0.09, wpb=64745, bsz=128, num_updates=1683, lr=9.99945e-05, gnorm=2.617, loss_scale=1, train_wall=11, gb_free=2.8, wall=18826
2021-06-18 23:52:42 | INFO | train_inner | epoch 001: 1700 / 3002 loss=3.042, ppl=8.24, wps=5870.2, ups=0.09, wpb=64781, bsz=128, num_updates=1684, lr=9.99945e-05, gnorm=2.468, loss_scale=1, train_wall=11, gb_free=2.8, wall=18837
2021-06-18 23:52:53 | INFO | train_inner | epoch 001: 1701 / 3002 loss=2.931, ppl=7.63, wps=5945.4, ups=0.09, wpb=64927, bsz=128, num_updates=1685, lr=9.99945e-05, gnorm=2.513, loss_scale=1, train_wall=10, gb_free=2.8, wall=18848
2021-06-18 23:53:04 | INFO | train_inner | epoch 001: 1702 / 3002 loss=2.978, ppl=7.88, wps=5911.8, ups=0.09, wpb=64832, bsz=128, num_updates=1686, lr=9.99945e-05, gnorm=2.724, loss_scale=1, train_wall=11, gb_free=2.8, wall=18859
2021-06-18 23:53:16 | INFO | train_inner | epoch 001: 1703 / 3002 loss=3.084, ppl=8.48, wps=5761.8, ups=0.09, wpb=64793, bsz=128, num_updates=1687, lr=9.99945e-05, gnorm=64.165, loss_scale=1, train_wall=11, gb_free=2.8, wall=18870
2021-06-18 23:53:27 | INFO | train_inner | epoch 001: 1704 / 3002 loss=2.933, ppl=7.64, wps=5881.1, ups=0.09, wpb=64854, bsz=128, num_updates=1688, lr=9.99945e-05, gnorm=2.794, loss_scale=1, train_wall=11, gb_free=2.8, wall=18881
2021-06-18 23:53:37 | INFO | train_inner | epoch 001: 1705 / 3002 loss=2.982, ppl=7.9, wps=5965.7, ups=0.09, wpb=64797, bsz=128, num_updates=1689, lr=9.99945e-05, gnorm=2.75, loss_scale=1, train_wall=10, gb_free=2.8, wall=18892
2021-06-18 23:53:48 | INFO | train_inner | epoch 001: 1706 / 3002 loss=2.961, ppl=7.79, wps=5921.9, ups=0.09, wpb=64793, bsz=128, num_updates=1690, lr=9.99945e-05, gnorm=2.787, loss_scale=1, train_wall=10, gb_free=2.8, wall=18903
2021-06-18 23:54:00 | INFO | train_inner | epoch 001: 1707 / 3002 loss=3.134, ppl=8.78, wps=5828, ups=0.09, wpb=64864, bsz=128, num_updates=1691, lr=9.99945e-05, gnorm=3.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=18914
2021-06-18 23:54:11 | INFO | train_inner | epoch 001: 1708 / 3002 loss=3.175, ppl=9.03, wps=5756.5, ups=0.09, wpb=64818, bsz=128, num_updates=1692, lr=9.99945e-05, gnorm=2.91, loss_scale=1, train_wall=11, gb_free=2.8, wall=18925
2021-06-18 23:54:22 | INFO | train_inner | epoch 001: 1709 / 3002 loss=3.096, ppl=8.55, wps=5789.9, ups=0.09, wpb=64767, bsz=128, num_updates=1693, lr=9.99945e-05, gnorm=3.989, loss_scale=1, train_wall=11, gb_free=2.8, wall=18936
2021-06-18 23:54:33 | INFO | train_inner | epoch 001: 1710 / 3002 loss=2.906, ppl=7.5, wps=5920.9, ups=0.09, wpb=64824, bsz=128, num_updates=1694, lr=9.99944e-05, gnorm=5.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=18947
2021-06-18 23:54:44 | INFO | train_inner | epoch 001: 1711 / 3002 loss=3.212, ppl=9.27, wps=5838.9, ups=0.09, wpb=64796, bsz=128, num_updates=1695, lr=9.99944e-05, gnorm=4.767, loss_scale=1, train_wall=11, gb_free=2.8, wall=18958
2021-06-18 23:54:55 | INFO | train_inner | epoch 001: 1712 / 3002 loss=3.172, ppl=9.01, wps=5775.1, ups=0.09, wpb=64784, bsz=128, num_updates=1696, lr=9.99944e-05, gnorm=3.241, loss_scale=1, train_wall=11, gb_free=2.8, wall=18970
2021-06-18 23:55:06 | INFO | train_inner | epoch 001: 1713 / 3002 loss=3.097, ppl=8.56, wps=5837.5, ups=0.09, wpb=64810, bsz=128, num_updates=1697, lr=9.99944e-05, gnorm=3.108, loss_scale=1, train_wall=11, gb_free=2.8, wall=18981
2021-06-18 23:55:17 | INFO | train_inner | epoch 001: 1714 / 3002 loss=3.057, ppl=8.32, wps=5906, ups=0.09, wpb=64903, bsz=128, num_updates=1698, lr=9.99944e-05, gnorm=2.999, loss_scale=1, train_wall=11, gb_free=2.8, wall=18992
2021-06-18 23:55:28 | INFO | train_inner | epoch 001: 1715 / 3002 loss=3.107, ppl=8.62, wps=5862.2, ups=0.09, wpb=64805, bsz=128, num_updates=1699, lr=9.99944e-05, gnorm=4.313, loss_scale=1, train_wall=11, gb_free=2.8, wall=19003
2021-06-18 23:55:40 | INFO | train_inner | epoch 001: 1716 / 3002 loss=3.109, ppl=8.63, wps=5820.1, ups=0.09, wpb=64881, bsz=128, num_updates=1700, lr=9.99944e-05, gnorm=26.696, loss_scale=1, train_wall=11, gb_free=2.8, wall=19014
2021-06-18 23:55:51 | INFO | train_inner | epoch 001: 1717 / 3002 loss=3.18, ppl=9.06, wps=5839.1, ups=0.09, wpb=64818, bsz=128, num_updates=1701, lr=9.99944e-05, gnorm=2.94, loss_scale=1, train_wall=11, gb_free=2.8, wall=19025
2021-06-18 23:56:02 | INFO | train_inner | epoch 001: 1718 / 3002 loss=3.432, ppl=10.8, wps=5852.7, ups=0.09, wpb=64716, bsz=128, num_updates=1702, lr=9.99944e-05, gnorm=3.374, loss_scale=1, train_wall=11, gb_free=2.8, wall=19036
2021-06-18 23:56:13 | INFO | train_inner | epoch 001: 1719 / 3002 loss=3.067, ppl=8.38, wps=5843, ups=0.09, wpb=64866, bsz=128, num_updates=1703, lr=9.99944e-05, gnorm=3.578, loss_scale=1, train_wall=11, gb_free=2.8, wall=19047
2021-06-18 23:56:24 | INFO | train_inner | epoch 001: 1720 / 3002 loss=3.183, ppl=9.08, wps=5775.7, ups=0.09, wpb=64794, bsz=128, num_updates=1704, lr=9.99944e-05, gnorm=8.423, loss_scale=1, train_wall=11, gb_free=2.8, wall=19058
2021-06-18 23:56:35 | INFO | train_inner | epoch 001: 1721 / 3002 loss=3.381, ppl=10.41, wps=5896.3, ups=0.09, wpb=64786, bsz=128, num_updates=1705, lr=9.99944e-05, gnorm=3.839, loss_scale=1, train_wall=11, gb_free=2.8, wall=19069
2021-06-18 23:56:46 | INFO | train_inner | epoch 001: 1722 / 3002 loss=3.31, ppl=9.92, wps=5888.1, ups=0.09, wpb=64777, bsz=128, num_updates=1706, lr=9.99944e-05, gnorm=10.966, loss_scale=1, train_wall=11, gb_free=2.8, wall=19080
2021-06-18 23:56:57 | INFO | train_inner | epoch 001: 1723 / 3002 loss=3.52, ppl=11.47, wps=5882.7, ups=0.09, wpb=64821, bsz=128, num_updates=1707, lr=9.99943e-05, gnorm=4.222, loss_scale=1, train_wall=11, gb_free=2.8, wall=19091
2021-06-18 23:57:08 | INFO | train_inner | epoch 001: 1724 / 3002 loss=3.735, ppl=13.32, wps=5923.7, ups=0.09, wpb=64842, bsz=128, num_updates=1708, lr=9.99943e-05, gnorm=9.778, loss_scale=1, train_wall=10, gb_free=2.8, wall=19102
2021-06-18 23:57:19 | INFO | train_inner | epoch 001: 1725 / 3002 loss=3.844, ppl=14.36, wps=5792.6, ups=0.09, wpb=64824, bsz=128, num_updates=1709, lr=9.99943e-05, gnorm=16.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=19113
2021-06-18 23:57:30 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-18 23:57:41 | INFO | train_inner | epoch 001: 1727 / 3002 loss=4.135, ppl=17.57, wps=2929.5, ups=0.05, wpb=64719, bsz=128, num_updates=1710, lr=9.99943e-05, gnorm=10.088, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=19136
2021-06-18 23:57:52 | INFO | train_inner | epoch 001: 1728 / 3002 loss=4.113, ppl=17.31, wps=5938, ups=0.09, wpb=64940, bsz=128, num_updates=1711, lr=9.99943e-05, gnorm=45.337, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=19147
2021-06-18 23:58:03 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25
2021-06-18 23:58:14 | INFO | train_inner | epoch 001: 1730 / 3002 loss=3.665, ppl=12.69, wps=2936.3, ups=0.05, wpb=64953, bsz=128, num_updates=1712, lr=9.99943e-05, gnorm=7.258, loss_scale=0.25, train_wall=21, gb_free=2.8, wall=19169
2021-06-18 23:58:25 | INFO | train_inner | epoch 001: 1731 / 3002 loss=3.702, ppl=13.01, wps=5935.6, ups=0.09, wpb=64871, bsz=128, num_updates=1713, lr=9.99943e-05, gnorm=9.71, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19180
2021-06-18 23:58:36 | INFO | train_inner | epoch 001: 1732 / 3002 loss=3.404, ppl=10.58, wps=5752, ups=0.09, wpb=64858, bsz=128, num_updates=1714, lr=9.99943e-05, gnorm=6.149, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19191
2021-06-18 23:58:47 | INFO | train_inner | epoch 001: 1733 / 3002 loss=3.257, ppl=9.56, wps=5954.8, ups=0.09, wpb=64852, bsz=128, num_updates=1715, lr=9.99943e-05, gnorm=3.413, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19202
2021-06-18 23:58:59 | INFO | train_inner | epoch 001: 1734 / 3002 loss=3.295, ppl=9.82, wps=5782.1, ups=0.09, wpb=64857, bsz=128, num_updates=1716, lr=9.99943e-05, gnorm=3.314, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19213
2021-06-18 23:59:10 | INFO | train_inner | epoch 001: 1735 / 3002 loss=3.201, ppl=9.2, wps=5782.1, ups=0.09, wpb=64789, bsz=128, num_updates=1717, lr=9.99943e-05, gnorm=4.598, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19224
2021-06-18 23:59:21 | INFO | train_inner | epoch 001: 1736 / 3002 loss=3.194, ppl=9.15, wps=5917, ups=0.09, wpb=64852, bsz=128, num_updates=1718, lr=9.99943e-05, gnorm=35.067, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19235
2021-06-18 23:59:32 | INFO | train_inner | epoch 001: 1737 / 3002 loss=3.171, ppl=9.01, wps=5843.2, ups=0.09, wpb=64874, bsz=128, num_updates=1719, lr=9.99942e-05, gnorm=2.931, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19246
2021-06-18 23:59:43 | INFO | train_inner | epoch 001: 1738 / 3002 loss=3.152, ppl=8.89, wps=5892.1, ups=0.09, wpb=64907, bsz=128, num_updates=1720, lr=9.99942e-05, gnorm=3.115, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19257
2021-06-18 23:59:54 | INFO | train_inner | epoch 001: 1739 / 3002 loss=3.033, ppl=8.19, wps=5848.6, ups=0.09, wpb=64912, bsz=128, num_updates=1721, lr=9.99942e-05, gnorm=3.471, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19268
2021-06-19 00:00:05 | INFO | train_inner | epoch 001: 1740 / 3002 loss=3.237, ppl=9.43, wps=5976.3, ups=0.09, wpb=64817, bsz=128, num_updates=1722, lr=9.99942e-05, gnorm=4.523, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19279
2021-06-19 00:00:16 | INFO | train_inner | epoch 001: 1741 / 3002 loss=3.224, ppl=9.34, wps=5956.6, ups=0.09, wpb=64859, bsz=128, num_updates=1723, lr=9.99942e-05, gnorm=3.378, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19290
2021-06-19 00:00:27 | INFO | train_inner | epoch 001: 1742 / 3002 loss=3.151, ppl=8.88, wps=5810.4, ups=0.09, wpb=64905, bsz=128, num_updates=1724, lr=9.99942e-05, gnorm=3.436, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19301
2021-06-19 00:00:38 | INFO | train_inner | epoch 001: 1743 / 3002 loss=3.118, ppl=8.68, wps=5902.1, ups=0.09, wpb=64854, bsz=128, num_updates=1725, lr=9.99942e-05, gnorm=2.942, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19312
2021-06-19 00:00:49 | INFO | train_inner | epoch 001: 1744 / 3002 loss=3.193, ppl=9.15, wps=5813.5, ups=0.09, wpb=64898, bsz=128, num_updates=1726, lr=9.99942e-05, gnorm=3.021, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19323
2021-06-19 00:01:00 | INFO | train_inner | epoch 001: 1745 / 3002 loss=3.161, ppl=8.95, wps=5835.7, ups=0.09, wpb=64852, bsz=128, num_updates=1727, lr=9.99942e-05, gnorm=3.31, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19334
2021-06-19 00:01:11 | INFO | train_inner | epoch 001: 1746 / 3002 loss=2.996, ppl=7.98, wps=5867.4, ups=0.09, wpb=64839, bsz=128, num_updates=1728, lr=9.99942e-05, gnorm=2.993, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19346
2021-06-19 00:01:22 | INFO | train_inner | epoch 001: 1747 / 3002 loss=3.186, ppl=9.1, wps=5892.9, ups=0.09, wpb=64756, bsz=128, num_updates=1729, lr=9.99942e-05, gnorm=2.918, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19357
2021-06-19 00:01:34 | INFO | train_inner | epoch 001: 1748 / 3002 loss=3.056, ppl=8.31, wps=5689.9, ups=0.09, wpb=64788, bsz=128, num_updates=1730, lr=9.99942e-05, gnorm=3.052, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19368
2021-06-19 00:01:45 | INFO | train_inner | epoch 001: 1749 / 3002 loss=3.041, ppl=8.23, wps=5875.2, ups=0.09, wpb=64886, bsz=128, num_updates=1731, lr=9.99942e-05, gnorm=3.623, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19379
2021-06-19 00:01:56 | INFO | train_inner | epoch 001: 1750 / 3002 loss=3.282, ppl=9.73, wps=5902, ups=0.09, wpb=64848, bsz=128, num_updates=1732, lr=9.99941e-05, gnorm=3.898, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19390
2021-06-19 00:02:06 | INFO | train_inner | epoch 001: 1751 / 3002 loss=3.043, ppl=8.24, wps=5997, ups=0.09, wpb=64853, bsz=128, num_updates=1733, lr=9.99941e-05, gnorm=2.746, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19401
2021-06-19 00:02:17 | INFO | train_inner | epoch 001: 1752 / 3002 loss=3.084, ppl=8.48, wps=5918.5, ups=0.09, wpb=64860, bsz=128, num_updates=1734, lr=9.99941e-05, gnorm=3.193, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19412
2021-06-19 00:02:28 | INFO | train_inner | epoch 001: 1753 / 3002 loss=3.2, ppl=9.19, wps=5969.2, ups=0.09, wpb=64782, bsz=128, num_updates=1735, lr=9.99941e-05, gnorm=2.82, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19423
2021-06-19 00:02:39 | INFO | train_inner | epoch 001: 1754 / 3002 loss=3.351, ppl=10.2, wps=5854.7, ups=0.09, wpb=64757, bsz=128, num_updates=1736, lr=9.99941e-05, gnorm=2.8, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19434
2021-06-19 00:02:50 | INFO | train_inner | epoch 001: 1755 / 3002 loss=3.295, ppl=9.82, wps=5884.9, ups=0.09, wpb=64749, bsz=128, num_updates=1737, lr=9.99941e-05, gnorm=2.881, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19445
2021-06-19 00:03:01 | INFO | train_inner | epoch 001: 1756 / 3002 loss=3.044, ppl=8.25, wps=5863.5, ups=0.09, wpb=64793, bsz=128, num_updates=1738, lr=9.99941e-05, gnorm=2.78, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19456
2021-06-19 00:03:12 | INFO | train_inner | epoch 001: 1757 / 3002 loss=2.934, ppl=7.64, wps=5993.3, ups=0.09, wpb=64916, bsz=128, num_updates=1739, lr=9.99941e-05, gnorm=3.69, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19467
2021-06-19 00:03:23 | INFO | train_inner | epoch 001: 1758 / 3002 loss=2.981, ppl=7.89, wps=5794.3, ups=0.09, wpb=64853, bsz=128, num_updates=1740, lr=9.99941e-05, gnorm=2.846, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19478
2021-06-19 00:03:35 | INFO | train_inner | epoch 001: 1759 / 3002 loss=2.977, ppl=7.87, wps=5815.9, ups=0.09, wpb=64812, bsz=128, num_updates=1741, lr=9.99941e-05, gnorm=2.578, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19489
2021-06-19 00:03:46 | INFO | train_inner | epoch 001: 1760 / 3002 loss=3.139, ppl=8.81, wps=5841.2, ups=0.09, wpb=64792, bsz=128, num_updates=1742, lr=9.99941e-05, gnorm=3.855, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19500
2021-06-19 00:03:57 | INFO | train_inner | epoch 001: 1761 / 3002 loss=2.986, ppl=7.92, wps=5942.4, ups=0.09, wpb=64829, bsz=128, num_updates=1743, lr=9.99941e-05, gnorm=2.547, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19511
2021-06-19 00:04:08 | INFO | train_inner | epoch 001: 1762 / 3002 loss=3.107, ppl=8.61, wps=5894.2, ups=0.09, wpb=64758, bsz=128, num_updates=1744, lr=9.9994e-05, gnorm=3.229, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19522
2021-06-19 00:04:19 | INFO | train_inner | epoch 001: 1763 / 3002 loss=3.056, ppl=8.31, wps=5749.4, ups=0.09, wpb=64785, bsz=128, num_updates=1745, lr=9.9994e-05, gnorm=2.599, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19533
2021-06-19 00:04:30 | INFO | train_inner | epoch 001: 1764 / 3002 loss=3.028, ppl=8.16, wps=5858.1, ups=0.09, wpb=64962, bsz=128, num_updates=1746, lr=9.9994e-05, gnorm=3.204, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19544
2021-06-19 00:04:41 | INFO | train_inner | epoch 001: 1765 / 3002 loss=3.057, ppl=8.32, wps=5767, ups=0.09, wpb=64723, bsz=128, num_updates=1747, lr=9.9994e-05, gnorm=2.601, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19555
2021-06-19 00:04:52 | INFO | train_inner | epoch 001: 1766 / 3002 loss=3.051, ppl=8.29, wps=5840, ups=0.09, wpb=64882, bsz=128, num_updates=1748, lr=9.9994e-05, gnorm=2.824, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19567
2021-06-19 00:05:04 | INFO | train_inner | epoch 001: 1767 / 3002 loss=2.97, ppl=7.84, wps=5707, ups=0.09, wpb=64786, bsz=128, num_updates=1749, lr=9.9994e-05, gnorm=2.62, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19578
2021-06-19 00:05:15 | INFO | train_inner | epoch 001: 1768 / 3002 loss=3.088, ppl=8.5, wps=5694.9, ups=0.09, wpb=64791, bsz=128, num_updates=1750, lr=9.9994e-05, gnorm=2.521, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19589
2021-06-19 00:05:26 | INFO | train_inner | epoch 001: 1769 / 3002 loss=3.04, ppl=8.23, wps=5872.7, ups=0.09, wpb=64804, bsz=128, num_updates=1751, lr=9.9994e-05, gnorm=3.4, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19600
2021-06-19 00:05:37 | INFO | train_inner | epoch 001: 1770 / 3002 loss=3.122, ppl=8.7, wps=5875.3, ups=0.09, wpb=64823, bsz=128, num_updates=1752, lr=9.9994e-05, gnorm=2.83, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19611
2021-06-19 00:05:48 | INFO | train_inner | epoch 001: 1771 / 3002 loss=3.049, ppl=8.27, wps=5813.8, ups=0.09, wpb=64779, bsz=128, num_updates=1753, lr=9.9994e-05, gnorm=3.766, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19622
2021-06-19 00:05:59 | INFO | train_inner | epoch 001: 1772 / 3002 loss=2.91, ppl=7.52, wps=6013.3, ups=0.09, wpb=64935, bsz=128, num_updates=1754, lr=9.9994e-05, gnorm=2.731, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19633
2021-06-19 00:06:10 | INFO | train_inner | epoch 001: 1773 / 3002 loss=3.032, ppl=8.18, wps=5822.2, ups=0.09, wpb=64770, bsz=128, num_updates=1755, lr=9.9994e-05, gnorm=2.727, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19644
2021-06-19 00:06:21 | INFO | train_inner | epoch 001: 1774 / 3002 loss=2.963, ppl=7.8, wps=5833.8, ups=0.09, wpb=64861, bsz=128, num_updates=1756, lr=9.9994e-05, gnorm=2.51, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19656
2021-06-19 00:06:32 | INFO | train_inner | epoch 001: 1775 / 3002 loss=3.022, ppl=8.12, wps=5993.4, ups=0.09, wpb=64869, bsz=128, num_updates=1757, lr=9.99939e-05, gnorm=2.577, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19666
2021-06-19 00:06:43 | INFO | train_inner | epoch 001: 1776 / 3002 loss=2.952, ppl=7.74, wps=6001.6, ups=0.09, wpb=64899, bsz=128, num_updates=1758, lr=9.99939e-05, gnorm=2.91, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19677
2021-06-19 00:06:54 | INFO | train_inner | epoch 001: 1777 / 3002 loss=3.021, ppl=8.12, wps=5812.8, ups=0.09, wpb=64770, bsz=128, num_updates=1759, lr=9.99939e-05, gnorm=3.977, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19688
2021-06-19 00:07:05 | INFO | train_inner | epoch 001: 1778 / 3002 loss=3.064, ppl=8.36, wps=5864.1, ups=0.09, wpb=64879, bsz=128, num_updates=1760, lr=9.99939e-05, gnorm=4.137, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19699
2021-06-19 00:07:16 | INFO | train_inner | epoch 001: 1779 / 3002 loss=3.222, ppl=9.33, wps=5809.5, ups=0.09, wpb=64784, bsz=128, num_updates=1761, lr=9.99939e-05, gnorm=2.546, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19711
2021-06-19 00:07:27 | INFO | train_inner | epoch 001: 1780 / 3002 loss=3.182, ppl=9.07, wps=5797.1, ups=0.09, wpb=64733, bsz=128, num_updates=1762, lr=9.99939e-05, gnorm=2.743, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19722
2021-06-19 00:07:38 | INFO | train_inner | epoch 001: 1781 / 3002 loss=3.15, ppl=8.88, wps=5910.9, ups=0.09, wpb=64780, bsz=128, num_updates=1763, lr=9.99939e-05, gnorm=2.599, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19733
2021-06-19 00:07:49 | INFO | train_inner | epoch 001: 1782 / 3002 loss=2.971, ppl=7.84, wps=5820.9, ups=0.09, wpb=64757, bsz=128, num_updates=1764, lr=9.99939e-05, gnorm=19.151, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19744
2021-06-19 00:08:01 | INFO | train_inner | epoch 001: 1783 / 3002 loss=2.979, ppl=7.88, wps=5773.6, ups=0.09, wpb=64868, bsz=128, num_updates=1765, lr=9.99939e-05, gnorm=2.421, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19755
2021-06-19 00:08:12 | INFO | train_inner | epoch 001: 1784 / 3002 loss=2.987, ppl=7.93, wps=5693.4, ups=0.09, wpb=64813, bsz=128, num_updates=1766, lr=9.99939e-05, gnorm=2.422, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19766
2021-06-19 00:08:23 | INFO | train_inner | epoch 001: 1785 / 3002 loss=2.949, ppl=7.72, wps=5762.2, ups=0.09, wpb=64828, bsz=128, num_updates=1767, lr=9.99939e-05, gnorm=2.435, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19778
2021-06-19 00:08:34 | INFO | train_inner | epoch 001: 1786 / 3002 loss=2.983, ppl=7.91, wps=5797.6, ups=0.09, wpb=64735, bsz=128, num_updates=1768, lr=9.99939e-05, gnorm=2.615, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19789
2021-06-19 00:08:46 | INFO | train_inner | epoch 001: 1787 / 3002 loss=3.072, ppl=8.41, wps=5822.7, ups=0.09, wpb=64821, bsz=128, num_updates=1769, lr=9.99938e-05, gnorm=2.728, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19800
2021-06-19 00:08:57 | INFO | train_inner | epoch 001: 1788 / 3002 loss=3.047, ppl=8.26, wps=5824, ups=0.09, wpb=64963, bsz=128, num_updates=1770, lr=9.99938e-05, gnorm=2.696, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19811
2021-06-19 00:09:08 | INFO | train_inner | epoch 001: 1789 / 3002 loss=2.773, ppl=6.84, wps=5906.8, ups=0.09, wpb=64808, bsz=128, num_updates=1771, lr=9.99938e-05, gnorm=2.391, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19822
2021-06-19 00:09:19 | INFO | train_inner | epoch 001: 1790 / 3002 loss=3.061, ppl=8.35, wps=5841.2, ups=0.09, wpb=64763, bsz=128, num_updates=1772, lr=9.99938e-05, gnorm=3.35, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19833
2021-06-19 00:09:30 | INFO | train_inner | epoch 001: 1791 / 3002 loss=2.825, ppl=7.09, wps=5850.6, ups=0.09, wpb=64914, bsz=128, num_updates=1773, lr=9.99938e-05, gnorm=2.527, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19844
2021-06-19 00:09:41 | INFO | train_inner | epoch 001: 1792 / 3002 loss=3.111, ppl=8.64, wps=5966.4, ups=0.09, wpb=64910, bsz=128, num_updates=1774, lr=9.99938e-05, gnorm=2.428, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19855
2021-06-19 00:09:52 | INFO | train_inner | epoch 001: 1793 / 3002 loss=2.93, ppl=7.62, wps=5836.9, ups=0.09, wpb=64798, bsz=128, num_updates=1775, lr=9.99938e-05, gnorm=2.588, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19866
2021-06-19 00:10:03 | INFO | train_inner | epoch 001: 1794 / 3002 loss=3.016, ppl=8.09, wps=5993.4, ups=0.09, wpb=64872, bsz=128, num_updates=1776, lr=9.99938e-05, gnorm=2.634, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=19877
2021-06-19 00:10:14 | INFO | train_inner | epoch 001: 1795 / 3002 loss=3.118, ppl=8.68, wps=5795.4, ups=0.09, wpb=64778, bsz=128, num_updates=1777, lr=9.99938e-05, gnorm=2.422, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19888
2021-06-19 00:10:25 | INFO | train_inner | epoch 001: 1796 / 3002 loss=2.998, ppl=7.99, wps=5872.8, ups=0.09, wpb=64829, bsz=128, num_updates=1778, lr=9.99938e-05, gnorm=2.496, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19899
2021-06-19 00:10:36 | INFO | train_inner | epoch 001: 1797 / 3002 loss=3.029, ppl=8.16, wps=5801.9, ups=0.09, wpb=64901, bsz=128, num_updates=1779, lr=9.99938e-05, gnorm=94.949, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19910
2021-06-19 00:10:47 | INFO | train_inner | epoch 001: 1798 / 3002 loss=3.008, ppl=8.04, wps=5796.6, ups=0.09, wpb=64801, bsz=128, num_updates=1780, lr=9.99938e-05, gnorm=4.068, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19922
2021-06-19 00:10:59 | INFO | train_inner | epoch 001: 1799 / 3002 loss=2.938, ppl=7.66, wps=5769, ups=0.09, wpb=64805, bsz=128, num_updates=1781, lr=9.99938e-05, gnorm=2.661, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19933
2021-06-19 00:11:10 | INFO | train_inner | epoch 001: 1800 / 3002 loss=3.035, ppl=8.2, wps=5805, ups=0.09, wpb=64789, bsz=128, num_updates=1782, lr=9.99937e-05, gnorm=2.823, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19944
2021-06-19 00:11:21 | INFO | train_inner | epoch 001: 1801 / 3002 loss=2.892, ppl=7.42, wps=5903.6, ups=0.09, wpb=64753, bsz=128, num_updates=1783, lr=9.99937e-05, gnorm=2.57, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19955
2021-06-19 00:11:32 | INFO | train_inner | epoch 001: 1802 / 3002 loss=3.006, ppl=8.03, wps=5835.2, ups=0.09, wpb=64805, bsz=128, num_updates=1784, lr=9.99937e-05, gnorm=2.668, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19966
2021-06-19 00:11:43 | INFO | train_inner | epoch 001: 1803 / 3002 loss=2.956, ppl=7.76, wps=5824.8, ups=0.09, wpb=64778, bsz=128, num_updates=1785, lr=9.99937e-05, gnorm=9.771, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19977
2021-06-19 00:11:54 | INFO | train_inner | epoch 001: 1804 / 3002 loss=3.022, ppl=8.13, wps=5835.5, ups=0.09, wpb=64825, bsz=128, num_updates=1786, lr=9.99937e-05, gnorm=2.574, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19988
2021-06-19 00:12:05 | INFO | train_inner | epoch 001: 1805 / 3002 loss=2.96, ppl=7.78, wps=5861.1, ups=0.09, wpb=64828, bsz=128, num_updates=1787, lr=9.99937e-05, gnorm=2.627, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=19999
2021-06-19 00:12:16 | INFO | train_inner | epoch 001: 1806 / 3002 loss=2.991, ppl=7.95, wps=5946.4, ups=0.09, wpb=64778, bsz=128, num_updates=1788, lr=9.99937e-05, gnorm=3.034, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=20010
2021-06-19 00:12:27 | INFO | train_inner | epoch 001: 1807 / 3002 loss=3.208, ppl=9.24, wps=5847, ups=0.09, wpb=64829, bsz=128, num_updates=1789, lr=9.99937e-05, gnorm=17.241, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=20021
2021-06-19 00:12:38 | INFO | train_inner | epoch 001: 1808 / 3002 loss=2.835, ppl=7.13, wps=5891.4, ups=0.09, wpb=64852, bsz=128, num_updates=1790, lr=9.99937e-05, gnorm=3.667, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=20032
2021-06-19 00:12:49 | INFO | train_inner | epoch 001: 1809 / 3002 loss=3.263, ppl=9.6, wps=5821.3, ups=0.09, wpb=64854, bsz=128, num_updates=1791, lr=9.99937e-05, gnorm=3.927, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=20044
2021-06-19 00:13:00 | INFO | train_inner | epoch 001: 1810 / 3002 loss=3.11, ppl=8.64, wps=5833.2, ups=0.09, wpb=64750, bsz=128, num_updates=1792, lr=9.99937e-05, gnorm=3.059, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=20055
2021-06-19 00:13:11 | INFO | train_inner | epoch 001: 1811 / 3002 loss=3.283, ppl=9.73, wps=5926.1, ups=0.09, wpb=64847, bsz=128, num_updates=1793, lr=9.99937e-05, gnorm=3.78, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=20066
2021-06-19 00:13:22 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125
2021-06-19 00:13:33 | INFO | train_inner | epoch 001: 1813 / 3002 loss=3.151, ppl=8.88, wps=2921.8, ups=0.05, wpb=64827, bsz=128, num_updates=1794, lr=9.99936e-05, gnorm=7.908, loss_scale=0.125, train_wall=21, gb_free=2.8, wall=20088
2021-06-19 00:13:44 | INFO | train_inner | epoch 001: 1814 / 3002 loss=3.224, ppl=9.34, wps=5852.5, ups=0.09, wpb=64731, bsz=128, num_updates=1795, lr=9.99936e-05, gnorm=4.006, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20099
2021-06-19 00:13:55 | INFO | train_inner | epoch 001: 1815 / 3002 loss=3.123, ppl=8.71, wps=5901, ups=0.09, wpb=64777, bsz=128, num_updates=1796, lr=9.99936e-05, gnorm=3.518, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20110
2021-06-19 00:14:07 | INFO | train_inner | epoch 001: 1816 / 3002 loss=2.886, ppl=7.39, wps=5770, ups=0.09, wpb=64922, bsz=128, num_updates=1797, lr=9.99936e-05, gnorm=2.673, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20121
2021-06-19 00:14:18 | INFO | train_inner | epoch 001: 1817 / 3002 loss=3.286, ppl=9.75, wps=5903.6, ups=0.09, wpb=64779, bsz=128, num_updates=1798, lr=9.99936e-05, gnorm=3.401, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20132
2021-06-19 00:14:29 | INFO | train_inner | epoch 001: 1818 / 3002 loss=2.889, ppl=7.41, wps=5857.9, ups=0.09, wpb=64864, bsz=128, num_updates=1799, lr=9.99936e-05, gnorm=5.695, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20143
2021-06-19 00:14:40 | INFO | train_inner | epoch 001: 1819 / 3002 loss=3.005, ppl=8.03, wps=5877.7, ups=0.09, wpb=64848, bsz=128, num_updates=1800, lr=9.99936e-05, gnorm=11.377, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20154
2021-06-19 00:14:51 | INFO | train_inner | epoch 001: 1820 / 3002 loss=2.904, ppl=7.49, wps=5774.1, ups=0.09, wpb=64857, bsz=128, num_updates=1801, lr=9.99936e-05, gnorm=29.521, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20165
2021-06-19 00:15:02 | INFO | train_inner | epoch 001: 1821 / 3002 loss=3.015, ppl=8.08, wps=5883, ups=0.09, wpb=64774, bsz=128, num_updates=1802, lr=9.99936e-05, gnorm=2.678, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20176
2021-06-19 00:15:13 | INFO | train_inner | epoch 001: 1822 / 3002 loss=3.171, ppl=9, wps=5929.5, ups=0.09, wpb=64799, bsz=128, num_updates=1803, lr=9.99936e-05, gnorm=5.156, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20187
2021-06-19 00:15:24 | INFO | train_inner | epoch 001: 1823 / 3002 loss=2.935, ppl=7.65, wps=5869, ups=0.09, wpb=64894, bsz=128, num_updates=1804, lr=9.99936e-05, gnorm=3.071, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20198
2021-06-19 00:15:35 | INFO | train_inner | epoch 001: 1824 / 3002 loss=3.01, ppl=8.05, wps=5918.1, ups=0.09, wpb=64831, bsz=128, num_updates=1805, lr=9.99936e-05, gnorm=2.587, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20209
2021-06-19 00:15:46 | INFO | train_inner | epoch 001: 1825 / 3002 loss=2.939, ppl=7.67, wps=5927.5, ups=0.09, wpb=64849, bsz=128, num_updates=1806, lr=9.99936e-05, gnorm=2.663, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20220
2021-06-19 00:15:57 | INFO | train_inner | epoch 001: 1826 / 3002 loss=2.96, ppl=7.78, wps=5931.3, ups=0.09, wpb=64817, bsz=128, num_updates=1807, lr=9.99935e-05, gnorm=2.505, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20231
2021-06-19 00:16:08 | INFO | train_inner | epoch 001: 1827 / 3002 loss=2.967, ppl=7.82, wps=5760.8, ups=0.09, wpb=64834, bsz=128, num_updates=1808, lr=9.99935e-05, gnorm=2.701, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20242
2021-06-19 00:16:19 | INFO | train_inner | epoch 001: 1828 / 3002 loss=3.084, ppl=8.48, wps=5956.2, ups=0.09, wpb=64838, bsz=128, num_updates=1809, lr=9.99935e-05, gnorm=2.669, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20253
2021-06-19 00:16:30 | INFO | train_inner | epoch 001: 1829 / 3002 loss=3.021, ppl=8.12, wps=5969, ups=0.09, wpb=64887, bsz=128, num_updates=1810, lr=9.99935e-05, gnorm=2.715, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20264
2021-06-19 00:16:41 | INFO | train_inner | epoch 001: 1830 / 3002 loss=2.942, ppl=7.69, wps=5730.5, ups=0.09, wpb=64809, bsz=128, num_updates=1811, lr=9.99935e-05, gnorm=2.632, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20276
2021-06-19 00:16:52 | INFO | train_inner | epoch 001: 1831 / 3002 loss=3.095, ppl=8.55, wps=5883.1, ups=0.09, wpb=64801, bsz=128, num_updates=1812, lr=9.99935e-05, gnorm=2.908, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20287
2021-06-19 00:17:03 | INFO | train_inner | epoch 001: 1832 / 3002 loss=3.042, ppl=8.23, wps=5794.3, ups=0.09, wpb=64805, bsz=128, num_updates=1813, lr=9.99935e-05, gnorm=2.822, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20298
2021-06-19 00:17:14 | INFO | train_inner | epoch 001: 1833 / 3002 loss=3.146, ppl=8.85, wps=5852.8, ups=0.09, wpb=64880, bsz=128, num_updates=1814, lr=9.99935e-05, gnorm=2.673, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20309
2021-06-19 00:17:26 | INFO | train_inner | epoch 001: 1834 / 3002 loss=3.111, ppl=8.64, wps=5835.7, ups=0.09, wpb=64800, bsz=128, num_updates=1815, lr=9.99935e-05, gnorm=2.546, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20320
2021-06-19 00:17:37 | INFO | train_inner | epoch 001: 1835 / 3002 loss=2.858, ppl=7.25, wps=5839.8, ups=0.09, wpb=64789, bsz=128, num_updates=1816, lr=9.99935e-05, gnorm=2.46, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20331
2021-06-19 00:17:48 | INFO | train_inner | epoch 001: 1836 / 3002 loss=3.003, ppl=8.02, wps=5905.8, ups=0.09, wpb=64812, bsz=128, num_updates=1817, lr=9.99935e-05, gnorm=2.529, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20342
2021-06-19 00:17:59 | INFO | train_inner | epoch 001: 1837 / 3002 loss=3.038, ppl=8.21, wps=5721.9, ups=0.09, wpb=64855, bsz=128, num_updates=1818, lr=9.99935e-05, gnorm=2.579, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20353
2021-06-19 00:18:10 | INFO | train_inner | epoch 001: 1838 / 3002 loss=3.137, ppl=8.79, wps=5919.3, ups=0.09, wpb=64899, bsz=128, num_updates=1819, lr=9.99934e-05, gnorm=2.654, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20364
2021-06-19 00:18:21 | INFO | train_inner | epoch 001: 1839 / 3002 loss=3.007, ppl=8.04, wps=5854.9, ups=0.09, wpb=64899, bsz=128, num_updates=1820, lr=9.99934e-05, gnorm=2.895, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20375
2021-06-19 00:18:32 | INFO | train_inner | epoch 001: 1840 / 3002 loss=3.022, ppl=8.12, wps=5802.2, ups=0.09, wpb=64842, bsz=128, num_updates=1821, lr=9.99934e-05, gnorm=2.566, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20387
2021-06-19 00:18:43 | INFO | train_inner | epoch 001: 1841 / 3002 loss=2.979, ppl=7.88, wps=5926.7, ups=0.09, wpb=64842, bsz=128, num_updates=1822, lr=9.99934e-05, gnorm=2.517, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20397
2021-06-19 00:18:54 | INFO | train_inner | epoch 001: 1842 / 3002 loss=2.971, ppl=7.84, wps=5822.3, ups=0.09, wpb=64870, bsz=128, num_updates=1823, lr=9.99934e-05, gnorm=2.538, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20409
2021-06-19 00:19:06 | INFO | train_inner | epoch 001: 1843 / 3002 loss=3.07, ppl=8.4, wps=5753.5, ups=0.09, wpb=64828, bsz=128, num_updates=1824, lr=9.99934e-05, gnorm=2.682, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20420
2021-06-19 00:19:17 | INFO | train_inner | epoch 001: 1844 / 3002 loss=2.921, ppl=7.58, wps=5747.6, ups=0.09, wpb=64816, bsz=128, num_updates=1825, lr=9.99934e-05, gnorm=2.559, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20431
2021-06-19 00:19:28 | INFO | train_inner | epoch 001: 1845 / 3002 loss=2.992, ppl=7.95, wps=5839.3, ups=0.09, wpb=64781, bsz=128, num_updates=1826, lr=9.99934e-05, gnorm=2.526, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20442
2021-06-19 00:19:39 | INFO | train_inner | epoch 001: 1846 / 3002 loss=2.873, ppl=7.33, wps=5886.6, ups=0.09, wpb=64815, bsz=128, num_updates=1827, lr=9.99934e-05, gnorm=2.554, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20453
2021-06-19 00:19:50 | INFO | train_inner | epoch 001: 1847 / 3002 loss=3.013, ppl=8.07, wps=5837, ups=0.09, wpb=64828, bsz=128, num_updates=1828, lr=9.99934e-05, gnorm=2.505, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20464
2021-06-19 00:20:01 | INFO | train_inner | epoch 001: 1848 / 3002 loss=2.914, ppl=7.54, wps=5810.3, ups=0.09, wpb=64805, bsz=128, num_updates=1829, lr=9.99934e-05, gnorm=2.53, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20476
2021-06-19 00:20:12 | INFO | train_inner | epoch 001: 1849 / 3002 loss=3.057, ppl=8.32, wps=5733.8, ups=0.09, wpb=64721, bsz=128, num_updates=1830, lr=9.99934e-05, gnorm=3.218, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20487
2021-06-19 00:20:24 | INFO | train_inner | epoch 001: 1850 / 3002 loss=3.017, ppl=8.1, wps=5861.6, ups=0.09, wpb=64801, bsz=128, num_updates=1831, lr=9.99934e-05, gnorm=2.489, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20498
2021-06-19 00:20:35 | INFO | train_inner | epoch 001: 1851 / 3002 loss=2.843, ppl=7.18, wps=5889.5, ups=0.09, wpb=64866, bsz=128, num_updates=1832, lr=9.99933e-05, gnorm=2.475, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20509
2021-06-19 00:20:46 | INFO | train_inner | epoch 001: 1852 / 3002 loss=2.895, ppl=7.44, wps=5844.4, ups=0.09, wpb=64789, bsz=128, num_updates=1833, lr=9.99933e-05, gnorm=2.492, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20520
2021-06-19 00:20:57 | INFO | train_inner | epoch 001: 1853 / 3002 loss=3.104, ppl=8.6, wps=5888.7, ups=0.09, wpb=64826, bsz=128, num_updates=1834, lr=9.99933e-05, gnorm=22.599, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20531
2021-06-19 00:21:08 | INFO | train_inner | epoch 001: 1854 / 3002 loss=2.952, ppl=7.74, wps=5923.2, ups=0.09, wpb=64834, bsz=128, num_updates=1835, lr=9.99933e-05, gnorm=3.309, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20542
2021-06-19 00:21:18 | INFO | train_inner | epoch 001: 1855 / 3002 loss=3.145, ppl=8.85, wps=5941.8, ups=0.09, wpb=64767, bsz=128, num_updates=1836, lr=9.99933e-05, gnorm=5.689, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20553
2021-06-19 00:21:30 | INFO | train_inner | epoch 001: 1856 / 3002 loss=3.062, ppl=8.35, wps=5868.2, ups=0.09, wpb=64770, bsz=128, num_updates=1837, lr=9.99933e-05, gnorm=2.53, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20564
2021-06-19 00:21:40 | INFO | train_inner | epoch 001: 1857 / 3002 loss=3.032, ppl=8.18, wps=5934.4, ups=0.09, wpb=64898, bsz=128, num_updates=1838, lr=9.99933e-05, gnorm=2.603, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20575
2021-06-19 00:21:52 | INFO | train_inner | epoch 001: 1858 / 3002 loss=2.972, ppl=7.85, wps=5821.2, ups=0.09, wpb=64890, bsz=128, num_updates=1839, lr=9.99933e-05, gnorm=2.593, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20586
2021-06-19 00:22:03 | INFO | train_inner | epoch 001: 1859 / 3002 loss=3.015, ppl=8.08, wps=5898.9, ups=0.09, wpb=64868, bsz=128, num_updates=1840, lr=9.99933e-05, gnorm=2.667, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20597
2021-06-19 00:22:14 | INFO | train_inner | epoch 001: 1860 / 3002 loss=3.077, ppl=8.44, wps=5780, ups=0.09, wpb=64793, bsz=128, num_updates=1841, lr=9.99933e-05, gnorm=2.558, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20608
2021-06-19 00:22:25 | INFO | train_inner | epoch 001: 1861 / 3002 loss=2.864, ppl=7.28, wps=6006.3, ups=0.09, wpb=64837, bsz=128, num_updates=1842, lr=9.99933e-05, gnorm=2.501, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20619
2021-06-19 00:22:36 | INFO | train_inner | epoch 001: 1862 / 3002 loss=3.013, ppl=8.07, wps=5758.1, ups=0.09, wpb=64791, bsz=128, num_updates=1843, lr=9.99933e-05, gnorm=2.781, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20630
2021-06-19 00:22:47 | INFO | train_inner | epoch 001: 1863 / 3002 loss=2.97, ppl=7.83, wps=5901.5, ups=0.09, wpb=64802, bsz=128, num_updates=1844, lr=9.99932e-05, gnorm=2.558, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20641
2021-06-19 00:22:58 | INFO | train_inner | epoch 001: 1864 / 3002 loss=3.062, ppl=8.35, wps=5819.4, ups=0.09, wpb=64884, bsz=128, num_updates=1845, lr=9.99932e-05, gnorm=2.567, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20652
2021-06-19 00:23:09 | INFO | train_inner | epoch 001: 1865 / 3002 loss=2.902, ppl=7.48, wps=5857, ups=0.09, wpb=64854, bsz=128, num_updates=1846, lr=9.99932e-05, gnorm=2.439, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20663
2021-06-19 00:23:20 | INFO | train_inner | epoch 001: 1866 / 3002 loss=2.935, ppl=7.65, wps=5895.5, ups=0.09, wpb=64821, bsz=128, num_updates=1847, lr=9.99932e-05, gnorm=2.575, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20674
2021-06-19 00:23:31 | INFO | train_inner | epoch 001: 1867 / 3002 loss=3.006, ppl=8.04, wps=5958.4, ups=0.09, wpb=64771, bsz=128, num_updates=1848, lr=9.99932e-05, gnorm=2.529, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20685
2021-06-19 00:23:42 | INFO | train_inner | epoch 001: 1868 / 3002 loss=3.046, ppl=8.26, wps=5856.7, ups=0.09, wpb=64820, bsz=128, num_updates=1849, lr=9.99932e-05, gnorm=6.084, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20696
2021-06-19 00:23:53 | INFO | train_inner | epoch 001: 1869 / 3002 loss=3.053, ppl=8.3, wps=5802.3, ups=0.09, wpb=64830, bsz=128, num_updates=1850, lr=9.99932e-05, gnorm=2.682, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20708
2021-06-19 00:24:04 | INFO | train_inner | epoch 001: 1870 / 3002 loss=2.966, ppl=7.81, wps=5836.5, ups=0.09, wpb=64758, bsz=128, num_updates=1851, lr=9.99932e-05, gnorm=2.529, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20719
2021-06-19 00:24:15 | INFO | train_inner | epoch 001: 1871 / 3002 loss=3.034, ppl=8.19, wps=5929.8, ups=0.09, wpb=64844, bsz=128, num_updates=1852, lr=9.99932e-05, gnorm=3.745, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20730
2021-06-19 00:24:26 | INFO | train_inner | epoch 001: 1872 / 3002 loss=2.92, ppl=7.57, wps=5850.1, ups=0.09, wpb=64818, bsz=128, num_updates=1853, lr=9.99932e-05, gnorm=5.764, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20741
2021-06-19 00:24:37 | INFO | train_inner | epoch 001: 1873 / 3002 loss=3.039, ppl=8.22, wps=5925.1, ups=0.09, wpb=64820, bsz=128, num_updates=1854, lr=9.99932e-05, gnorm=2.78, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20752
2021-06-19 00:24:48 | INFO | train_inner | epoch 001: 1874 / 3002 loss=3.023, ppl=8.13, wps=5851.9, ups=0.09, wpb=64802, bsz=128, num_updates=1855, lr=9.99932e-05, gnorm=2.628, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20763
2021-06-19 00:25:00 | INFO | train_inner | epoch 001: 1875 / 3002 loss=3.161, ppl=8.94, wps=5729.3, ups=0.09, wpb=64862, bsz=128, num_updates=1856, lr=9.99932e-05, gnorm=3.533, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20774
2021-06-19 00:25:11 | INFO | train_inner | epoch 001: 1876 / 3002 loss=3.03, ppl=8.17, wps=5905.3, ups=0.09, wpb=64822, bsz=128, num_updates=1857, lr=9.99931e-05, gnorm=2.537, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20785
2021-06-19 00:25:22 | INFO | train_inner | epoch 001: 1877 / 3002 loss=2.895, ppl=7.44, wps=5891.5, ups=0.09, wpb=64885, bsz=128, num_updates=1858, lr=9.99931e-05, gnorm=2.482, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20796
2021-06-19 00:25:33 | INFO | train_inner | epoch 001: 1878 / 3002 loss=2.976, ppl=7.87, wps=5856.6, ups=0.09, wpb=64850, bsz=128, num_updates=1859, lr=9.99931e-05, gnorm=2.531, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20807
2021-06-19 00:25:44 | INFO | train_inner | epoch 001: 1879 / 3002 loss=2.899, ppl=7.46, wps=5840.7, ups=0.09, wpb=64797, bsz=128, num_updates=1860, lr=9.99931e-05, gnorm=2.515, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20818
2021-06-19 00:25:55 | INFO | train_inner | epoch 001: 1880 / 3002 loss=2.978, ppl=7.88, wps=5898.3, ups=0.09, wpb=64907, bsz=128, num_updates=1861, lr=9.99931e-05, gnorm=2.548, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20829
2021-06-19 00:26:06 | INFO | train_inner | epoch 001: 1881 / 3002 loss=2.832, ppl=7.12, wps=5910.2, ups=0.09, wpb=64766, bsz=128, num_updates=1862, lr=9.99931e-05, gnorm=2.728, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20840
2021-06-19 00:26:17 | INFO | train_inner | epoch 001: 1882 / 3002 loss=2.84, ppl=7.16, wps=5829.9, ups=0.09, wpb=64798, bsz=128, num_updates=1863, lr=9.99931e-05, gnorm=3.077, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20851
2021-06-19 00:26:28 | INFO | train_inner | epoch 001: 1883 / 3002 loss=3.024, ppl=8.13, wps=5755.3, ups=0.09, wpb=64817, bsz=128, num_updates=1864, lr=9.99931e-05, gnorm=2.444, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20862
2021-06-19 00:26:39 | INFO | train_inner | epoch 001: 1884 / 3002 loss=3.021, ppl=8.12, wps=5873.2, ups=0.09, wpb=64848, bsz=128, num_updates=1865, lr=9.99931e-05, gnorm=2.471, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20874
2021-06-19 00:26:50 | INFO | train_inner | epoch 001: 1885 / 3002 loss=2.885, ppl=7.39, wps=5799.9, ups=0.09, wpb=64821, bsz=128, num_updates=1866, lr=9.99931e-05, gnorm=2.449, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20885
2021-06-19 00:27:01 | INFO | train_inner | epoch 001: 1886 / 3002 loss=2.855, ppl=7.23, wps=5913.6, ups=0.09, wpb=64833, bsz=128, num_updates=1867, lr=9.99931e-05, gnorm=2.399, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20896
2021-06-19 00:27:12 | INFO | train_inner | epoch 001: 1887 / 3002 loss=2.92, ppl=7.57, wps=5808.8, ups=0.09, wpb=64825, bsz=128, num_updates=1868, lr=9.99931e-05, gnorm=2.558, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20907
2021-06-19 00:27:24 | INFO | train_inner | epoch 001: 1888 / 3002 loss=2.967, ppl=7.82, wps=5856.9, ups=0.09, wpb=64885, bsz=128, num_updates=1869, lr=9.9993e-05, gnorm=2.519, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20918
2021-06-19 00:27:35 | INFO | train_inner | epoch 001: 1889 / 3002 loss=2.933, ppl=7.64, wps=5872.8, ups=0.09, wpb=64872, bsz=128, num_updates=1870, lr=9.9993e-05, gnorm=2.483, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20929
2021-06-19 00:27:46 | INFO | train_inner | epoch 001: 1890 / 3002 loss=2.945, ppl=7.7, wps=5803.3, ups=0.09, wpb=64818, bsz=128, num_updates=1871, lr=9.9993e-05, gnorm=2.527, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20940
2021-06-19 00:27:57 | INFO | train_inner | epoch 001: 1891 / 3002 loss=2.965, ppl=7.81, wps=5801.4, ups=0.09, wpb=64855, bsz=128, num_updates=1872, lr=9.9993e-05, gnorm=5.326, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20951
2021-06-19 00:28:08 | INFO | train_inner | epoch 001: 1892 / 3002 loss=3.049, ppl=8.28, wps=5789.5, ups=0.09, wpb=64883, bsz=128, num_updates=1873, lr=9.9993e-05, gnorm=2.426, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20962
2021-06-19 00:28:19 | INFO | train_inner | epoch 001: 1893 / 3002 loss=3.065, ppl=8.37, wps=5820.9, ups=0.09, wpb=64845, bsz=128, num_updates=1874, lr=9.9993e-05, gnorm=2.549, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20974
2021-06-19 00:28:30 | INFO | train_inner | epoch 001: 1894 / 3002 loss=3.13, ppl=8.76, wps=5943.1, ups=0.09, wpb=64947, bsz=128, num_updates=1875, lr=9.9993e-05, gnorm=2.684, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=20985
2021-06-19 00:28:41 | INFO | train_inner | epoch 001: 1895 / 3002 loss=3.153, ppl=8.89, wps=5919.5, ups=0.09, wpb=64796, bsz=128, num_updates=1876, lr=9.9993e-05, gnorm=3.244, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=20995
2021-06-19 00:28:52 | INFO | train_inner | epoch 001: 1896 / 3002 loss=3.117, ppl=8.67, wps=5994.1, ups=0.09, wpb=64806, bsz=128, num_updates=1877, lr=9.9993e-05, gnorm=2.523, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21006
2021-06-19 00:29:03 | INFO | train_inner | epoch 001: 1897 / 3002 loss=3.06, ppl=8.34, wps=5826.9, ups=0.09, wpb=64856, bsz=128, num_updates=1878, lr=9.9993e-05, gnorm=2.556, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21017
2021-06-19 00:29:14 | INFO | train_inner | epoch 001: 1898 / 3002 loss=2.787, ppl=6.9, wps=5722.3, ups=0.09, wpb=64827, bsz=128, num_updates=1879, lr=9.9993e-05, gnorm=4.946, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21029
2021-06-19 00:29:26 | INFO | train_inner | epoch 001: 1899 / 3002 loss=3.147, ppl=8.86, wps=5814.9, ups=0.09, wpb=64827, bsz=128, num_updates=1880, lr=9.9993e-05, gnorm=2.513, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21040
2021-06-19 00:29:37 | INFO | train_inner | epoch 001: 1900 / 3002 loss=2.836, ppl=7.14, wps=5823.8, ups=0.09, wpb=64839, bsz=128, num_updates=1881, lr=9.9993e-05, gnorm=2.45, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21051
2021-06-19 00:29:48 | INFO | train_inner | epoch 001: 1901 / 3002 loss=2.994, ppl=7.97, wps=5963.1, ups=0.09, wpb=64827, bsz=128, num_updates=1882, lr=9.99929e-05, gnorm=2.63, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21062
2021-06-19 00:29:58 | INFO | train_inner | epoch 001: 1902 / 3002 loss=2.892, ppl=7.43, wps=6005.6, ups=0.09, wpb=64841, bsz=128, num_updates=1883, lr=9.99929e-05, gnorm=2.37, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21073
2021-06-19 00:30:10 | INFO | train_inner | epoch 001: 1903 / 3002 loss=3.093, ppl=8.53, wps=5811.1, ups=0.09, wpb=64779, bsz=128, num_updates=1884, lr=9.99929e-05, gnorm=2.556, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21084
2021-06-19 00:30:21 | INFO | train_inner | epoch 001: 1904 / 3002 loss=2.951, ppl=7.73, wps=5802.4, ups=0.09, wpb=64815, bsz=128, num_updates=1885, lr=9.99929e-05, gnorm=2.512, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21095
2021-06-19 00:30:32 | INFO | train_inner | epoch 001: 1905 / 3002 loss=2.912, ppl=7.53, wps=5875.7, ups=0.09, wpb=64821, bsz=128, num_updates=1886, lr=9.99929e-05, gnorm=57.194, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21106
2021-06-19 00:30:43 | INFO | train_inner | epoch 001: 1906 / 3002 loss=3.07, ppl=8.4, wps=5859.8, ups=0.09, wpb=64766, bsz=128, num_updates=1887, lr=9.99929e-05, gnorm=2.36, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21117
2021-06-19 00:30:54 | INFO | train_inner | epoch 001: 1907 / 3002 loss=3.104, ppl=8.6, wps=5872.9, ups=0.09, wpb=64875, bsz=128, num_updates=1888, lr=9.99929e-05, gnorm=2.473, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21128
2021-06-19 00:31:05 | INFO | train_inner | epoch 001: 1908 / 3002 loss=2.918, ppl=7.56, wps=5902.9, ups=0.09, wpb=64864, bsz=128, num_updates=1889, lr=9.99929e-05, gnorm=2.459, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21139
2021-06-19 00:31:16 | INFO | train_inner | epoch 001: 1909 / 3002 loss=2.969, ppl=7.83, wps=5888, ups=0.09, wpb=64860, bsz=128, num_updates=1890, lr=9.99929e-05, gnorm=2.732, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21150
2021-06-19 00:31:27 | INFO | train_inner | epoch 001: 1910 / 3002 loss=3.274, ppl=9.67, wps=5933.4, ups=0.09, wpb=64821, bsz=128, num_updates=1891, lr=9.99929e-05, gnorm=2.542, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21161
2021-06-19 00:31:38 | INFO | train_inner | epoch 001: 1911 / 3002 loss=3.015, ppl=8.08, wps=5831.9, ups=0.09, wpb=64869, bsz=128, num_updates=1892, lr=9.99929e-05, gnorm=2.511, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21172
2021-06-19 00:31:49 | INFO | train_inner | epoch 001: 1912 / 3002 loss=3.021, ppl=8.12, wps=5840.8, ups=0.09, wpb=64788, bsz=128, num_updates=1893, lr=9.99929e-05, gnorm=2.522, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21183
2021-06-19 00:32:00 | INFO | train_inner | epoch 001: 1913 / 3002 loss=2.983, ppl=7.9, wps=5815.8, ups=0.09, wpb=64781, bsz=128, num_updates=1894, lr=9.99928e-05, gnorm=2.551, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21194
2021-06-19 00:32:11 | INFO | train_inner | epoch 001: 1914 / 3002 loss=3.112, ppl=8.65, wps=5771.1, ups=0.09, wpb=64847, bsz=128, num_updates=1895, lr=9.99928e-05, gnorm=2.686, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21206
2021-06-19 00:32:23 | INFO | train_inner | epoch 001: 1915 / 3002 loss=2.948, ppl=7.72, wps=5821.9, ups=0.09, wpb=64880, bsz=128, num_updates=1896, lr=9.99928e-05, gnorm=2.634, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21217
2021-06-19 00:32:33 | INFO | train_inner | epoch 001: 1916 / 3002 loss=2.907, ppl=7.5, wps=5928.5, ups=0.09, wpb=64905, bsz=128, num_updates=1897, lr=9.99928e-05, gnorm=2.69, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21228
2021-06-19 00:32:45 | INFO | train_inner | epoch 001: 1917 / 3002 loss=2.961, ppl=7.79, wps=5733, ups=0.09, wpb=64821, bsz=128, num_updates=1898, lr=9.99928e-05, gnorm=2.654, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21239
2021-06-19 00:32:56 | INFO | train_inner | epoch 001: 1918 / 3002 loss=2.988, ppl=7.93, wps=5941.7, ups=0.09, wpb=64824, bsz=128, num_updates=1899, lr=9.99928e-05, gnorm=2.561, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21250
2021-06-19 00:33:07 | INFO | train_inner | epoch 001: 1919 / 3002 loss=3.03, ppl=8.17, wps=5958.5, ups=0.09, wpb=64835, bsz=128, num_updates=1900, lr=9.99928e-05, gnorm=2.553, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21261
2021-06-19 00:33:18 | INFO | train_inner | epoch 001: 1920 / 3002 loss=2.944, ppl=7.7, wps=5844.7, ups=0.09, wpb=64828, bsz=128, num_updates=1901, lr=9.99928e-05, gnorm=2.597, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21272
2021-06-19 00:33:28 | INFO | train_inner | epoch 001: 1921 / 3002 loss=2.993, ppl=7.96, wps=5976.4, ups=0.09, wpb=64883, bsz=128, num_updates=1902, lr=9.99928e-05, gnorm=2.435, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21283
2021-06-19 00:33:40 | INFO | train_inner | epoch 001: 1922 / 3002 loss=2.77, ppl=6.82, wps=5750.3, ups=0.09, wpb=64862, bsz=128, num_updates=1903, lr=9.99928e-05, gnorm=11.058, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21294
2021-06-19 00:33:51 | INFO | train_inner | epoch 001: 1923 / 3002 loss=3.096, ppl=8.55, wps=5805.1, ups=0.09, wpb=64782, bsz=128, num_updates=1904, lr=9.99928e-05, gnorm=2.444, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21305
2021-06-19 00:34:02 | INFO | train_inner | epoch 001: 1924 / 3002 loss=2.992, ppl=7.95, wps=5845.2, ups=0.09, wpb=64866, bsz=128, num_updates=1905, lr=9.99928e-05, gnorm=11.591, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21316
2021-06-19 00:34:13 | INFO | train_inner | epoch 001: 1925 / 3002 loss=3.083, ppl=8.48, wps=5816.8, ups=0.09, wpb=64766, bsz=128, num_updates=1906, lr=9.99928e-05, gnorm=3.462, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21328
2021-06-19 00:34:24 | INFO | train_inner | epoch 001: 1926 / 3002 loss=2.92, ppl=7.57, wps=5870.8, ups=0.09, wpb=64814, bsz=128, num_updates=1907, lr=9.99927e-05, gnorm=2.473, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21339
2021-06-19 00:34:35 | INFO | train_inner | epoch 001: 1927 / 3002 loss=2.937, ppl=7.66, wps=5784.3, ups=0.09, wpb=64816, bsz=128, num_updates=1908, lr=9.99927e-05, gnorm=2.553, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21350
2021-06-19 00:34:47 | INFO | train_inner | epoch 001: 1928 / 3002 loss=3.027, ppl=8.15, wps=5830.7, ups=0.09, wpb=64817, bsz=128, num_updates=1909, lr=9.99927e-05, gnorm=4.282, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21361
2021-06-19 00:34:58 | INFO | train_inner | epoch 001: 1929 / 3002 loss=3.16, ppl=8.94, wps=5823.3, ups=0.09, wpb=64735, bsz=128, num_updates=1910, lr=9.99927e-05, gnorm=2.515, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21372
2021-06-19 00:35:09 | INFO | train_inner | epoch 001: 1930 / 3002 loss=2.829, ppl=7.1, wps=5850.4, ups=0.09, wpb=64777, bsz=128, num_updates=1911, lr=9.99927e-05, gnorm=2.47, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21383
2021-06-19 00:35:20 | INFO | train_inner | epoch 001: 1931 / 3002 loss=2.877, ppl=7.34, wps=5776.9, ups=0.09, wpb=64755, bsz=128, num_updates=1912, lr=9.99927e-05, gnorm=2.642, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21394
2021-06-19 00:35:31 | INFO | train_inner | epoch 001: 1932 / 3002 loss=3.046, ppl=8.26, wps=5949.1, ups=0.09, wpb=64846, bsz=128, num_updates=1913, lr=9.99927e-05, gnorm=2.511, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21405
2021-06-19 00:35:42 | INFO | train_inner | epoch 001: 1933 / 3002 loss=3.018, ppl=8.1, wps=5725.1, ups=0.09, wpb=64874, bsz=128, num_updates=1914, lr=9.99927e-05, gnorm=2.447, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21416
2021-06-19 00:35:53 | INFO | train_inner | epoch 001: 1934 / 3002 loss=2.897, ppl=7.45, wps=5978.3, ups=0.09, wpb=64884, bsz=128, num_updates=1915, lr=9.99927e-05, gnorm=2.491, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21427
2021-06-19 00:36:04 | INFO | train_inner | epoch 001: 1935 / 3002 loss=2.944, ppl=7.69, wps=5874.6, ups=0.09, wpb=64816, bsz=128, num_updates=1916, lr=9.99927e-05, gnorm=2.532, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21438
2021-06-19 00:36:15 | INFO | train_inner | epoch 001: 1936 / 3002 loss=3.038, ppl=8.21, wps=5988.9, ups=0.09, wpb=64810, bsz=128, num_updates=1917, lr=9.99927e-05, gnorm=2.667, loss_scale=0.125, train_wall=10, gb_free=2.8, wall=21449
2021-06-19 00:36:26 | INFO | train_inner | epoch 001: 1937 / 3002 loss=3.082, ppl=8.47, wps=5879.2, ups=0.09, wpb=64803, bsz=128, num_updates=1918, lr=9.99927e-05, gnorm=2.56, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21460
2021-06-19 00:36:37 | INFO | train_inner | epoch 001: 1938 / 3002 loss=3.008, ppl=8.05, wps=5795.4, ups=0.09, wpb=64800, bsz=128, num_updates=1919, lr=9.99926e-05, gnorm=4.185, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21471
2021-06-19 00:36:48 | INFO | train_inner | epoch 001: 1939 / 3002 loss=3.007, ppl=8.04, wps=5915.3, ups=0.09, wpb=64821, bsz=128, num_updates=1920, lr=9.99926e-05, gnorm=2.893, loss_scale=0.125, train_wall=11, gb_free=2.8, wall=21482
2021-06-19 00:36:59 | INFO | train_inner | epoch 001: 1940 / 3002 loss=2.823, ppl=7.08, wps=5898.5, ups=0.09, wpb=64869, bsz=128, num_updates=1921, lr=9.99926e-05, gnorm=2.458, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21493
2021-06-19 00:37:10 | INFO | train_inner | epoch 001: 1941 / 3002 loss=2.963, ppl=7.8, wps=5802.4, ups=0.09, wpb=64888, bsz=128, num_updates=1922, lr=9.99926e-05, gnorm=2.416, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21505
2021-06-19 00:37:21 | INFO | train_inner | epoch 001: 1942 / 3002 loss=3.043, ppl=8.24, wps=5902.8, ups=0.09, wpb=64812, bsz=128, num_updates=1923, lr=9.99926e-05, gnorm=2.337, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21516
2021-06-19 00:37:32 | INFO | train_inner | epoch 001: 1943 / 3002 loss=2.961, ppl=7.78, wps=6029.6, ups=0.09, wpb=64855, bsz=128, num_updates=1924, lr=9.99926e-05, gnorm=2.431, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21526
2021-06-19 00:37:43 | INFO | train_inner | epoch 001: 1944 / 3002 loss=2.927, ppl=7.6, wps=5888.4, ups=0.09, wpb=64880, bsz=128, num_updates=1925, lr=9.99926e-05, gnorm=2.468, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21537
2021-06-19 00:37:54 | INFO | train_inner | epoch 001: 1945 / 3002 loss=2.868, ppl=7.3, wps=5810.8, ups=0.09, wpb=64837, bsz=128, num_updates=1926, lr=9.99926e-05, gnorm=2.565, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21548
2021-06-19 00:38:05 | INFO | train_inner | epoch 001: 1946 / 3002 loss=2.889, ppl=7.41, wps=5870, ups=0.09, wpb=64898, bsz=128, num_updates=1927, lr=9.99926e-05, gnorm=2.425, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21560
2021-06-19 00:38:16 | INFO | train_inner | epoch 001: 1947 / 3002 loss=2.867, ppl=7.29, wps=5784.7, ups=0.09, wpb=64825, bsz=128, num_updates=1928, lr=9.99926e-05, gnorm=2.474, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21571
2021-06-19 00:38:27 | INFO | train_inner | epoch 001: 1948 / 3002 loss=2.964, ppl=7.81, wps=5965.3, ups=0.09, wpb=64893, bsz=128, num_updates=1929, lr=9.99926e-05, gnorm=2.535, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21582
2021-06-19 00:38:38 | INFO | train_inner | epoch 001: 1949 / 3002 loss=3.022, ppl=8.13, wps=5915.8, ups=0.09, wpb=64781, bsz=128, num_updates=1930, lr=9.99926e-05, gnorm=3.234, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21593
2021-06-19 00:38:49 | INFO | train_inner | epoch 001: 1950 / 3002 loss=3.006, ppl=8.04, wps=5836.8, ups=0.09, wpb=64796, bsz=128, num_updates=1931, lr=9.99926e-05, gnorm=2.448, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21604
2021-06-19 00:39:00 | INFO | train_inner | epoch 001: 1951 / 3002 loss=2.919, ppl=7.56, wps=5894.5, ups=0.09, wpb=64844, bsz=128, num_updates=1932, lr=9.99925e-05, gnorm=2.38, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21615
2021-06-19 00:39:11 | INFO | train_inner | epoch 001: 1952 / 3002 loss=3.129, ppl=8.75, wps=5940.3, ups=0.09, wpb=64809, bsz=128, num_updates=1933, lr=9.99925e-05, gnorm=2.455, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21626
2021-06-19 00:39:22 | INFO | train_inner | epoch 001: 1953 / 3002 loss=2.998, ppl=7.99, wps=5902.7, ups=0.09, wpb=64819, bsz=128, num_updates=1934, lr=9.99925e-05, gnorm=2.452, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21637
2021-06-19 00:39:33 | INFO | train_inner | epoch 001: 1954 / 3002 loss=3.025, ppl=8.14, wps=5919.4, ups=0.09, wpb=64740, bsz=128, num_updates=1935, lr=9.99925e-05, gnorm=2.402, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21647
2021-06-19 00:39:44 | INFO | train_inner | epoch 001: 1955 / 3002 loss=2.923, ppl=7.59, wps=5963.2, ups=0.09, wpb=64718, bsz=128, num_updates=1936, lr=9.99925e-05, gnorm=2.631, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21658
2021-06-19 00:39:55 | INFO | train_inner | epoch 001: 1956 / 3002 loss=3.124, ppl=8.72, wps=5823.9, ups=0.09, wpb=64880, bsz=128, num_updates=1937, lr=9.99925e-05, gnorm=2.543, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21669
2021-06-19 00:40:06 | INFO | train_inner | epoch 001: 1957 / 3002 loss=3, ppl=8, wps=5989.9, ups=0.09, wpb=64816, bsz=128, num_updates=1938, lr=9.99925e-05, gnorm=2.477, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21680
2021-06-19 00:40:17 | INFO | train_inner | epoch 001: 1958 / 3002 loss=2.962, ppl=7.79, wps=5824.6, ups=0.09, wpb=64900, bsz=128, num_updates=1939, lr=9.99925e-05, gnorm=2.745, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21691
2021-06-19 00:40:28 | INFO | train_inner | epoch 001: 1959 / 3002 loss=2.964, ppl=7.8, wps=5924.3, ups=0.09, wpb=64846, bsz=128, num_updates=1940, lr=9.99925e-05, gnorm=2.454, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21702
2021-06-19 00:40:39 | INFO | train_inner | epoch 001: 1960 / 3002 loss=2.912, ppl=7.53, wps=5928.2, ups=0.09, wpb=64873, bsz=128, num_updates=1941, lr=9.99925e-05, gnorm=2.544, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21713
2021-06-19 00:40:50 | INFO | train_inner | epoch 001: 1961 / 3002 loss=2.973, ppl=7.85, wps=5844.8, ups=0.09, wpb=64791, bsz=128, num_updates=1942, lr=9.99925e-05, gnorm=2.394, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21724
2021-06-19 00:41:01 | INFO | train_inner | epoch 001: 1962 / 3002 loss=2.971, ppl=7.84, wps=5875.8, ups=0.09, wpb=64798, bsz=128, num_updates=1943, lr=9.99925e-05, gnorm=2.504, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21735
2021-06-19 00:41:12 | INFO | train_inner | epoch 001: 1963 / 3002 loss=2.985, ppl=7.92, wps=5776.2, ups=0.09, wpb=64885, bsz=128, num_updates=1944, lr=9.99924e-05, gnorm=3.041, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21747
2021-06-19 00:41:24 | INFO | train_inner | epoch 001: 1964 / 3002 loss=2.829, ppl=7.1, wps=5808.7, ups=0.09, wpb=64820, bsz=128, num_updates=1945, lr=9.99924e-05, gnorm=2.353, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21758
2021-06-19 00:41:35 | INFO | train_inner | epoch 001: 1965 / 3002 loss=3.085, ppl=8.48, wps=5782.4, ups=0.09, wpb=64822, bsz=128, num_updates=1946, lr=9.99924e-05, gnorm=2.503, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21769
2021-06-19 00:41:46 | INFO | train_inner | epoch 001: 1966 / 3002 loss=2.937, ppl=7.66, wps=5820.8, ups=0.09, wpb=64851, bsz=128, num_updates=1947, lr=9.99924e-05, gnorm=2.426, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21780
2021-06-19 00:41:56 | INFO | train_inner | epoch 001: 1967 / 3002 loss=3.036, ppl=8.2, wps=6139.8, ups=0.09, wpb=64876, bsz=128, num_updates=1948, lr=9.99924e-05, gnorm=2.466, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21791
2021-06-19 00:42:08 | INFO | train_inner | epoch 001: 1968 / 3002 loss=2.865, ppl=7.28, wps=5741.7, ups=0.09, wpb=64802, bsz=128, num_updates=1949, lr=9.99924e-05, gnorm=2.654, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21802
2021-06-19 00:42:19 | INFO | train_inner | epoch 001: 1969 / 3002 loss=3.01, ppl=8.06, wps=5946.7, ups=0.09, wpb=64826, bsz=128, num_updates=1950, lr=9.99924e-05, gnorm=2.4, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21813
2021-06-19 00:42:30 | INFO | train_inner | epoch 001: 1970 / 3002 loss=3.101, ppl=8.58, wps=5796.7, ups=0.09, wpb=64776, bsz=128, num_updates=1951, lr=9.99924e-05, gnorm=2.48, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21824
2021-06-19 00:42:41 | INFO | train_inner | epoch 001: 1971 / 3002 loss=2.943, ppl=7.69, wps=5800.9, ups=0.09, wpb=64837, bsz=128, num_updates=1952, lr=9.99924e-05, gnorm=3.076, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21835
2021-06-19 00:42:52 | INFO | train_inner | epoch 001: 1972 / 3002 loss=3.077, ppl=8.44, wps=5934.7, ups=0.09, wpb=64826, bsz=128, num_updates=1953, lr=9.99924e-05, gnorm=2.469, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21846
2021-06-19 00:43:03 | INFO | train_inner | epoch 001: 1973 / 3002 loss=3.085, ppl=8.49, wps=5861, ups=0.09, wpb=64860, bsz=128, num_updates=1954, lr=9.99924e-05, gnorm=2.424, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21857
2021-06-19 00:43:14 | INFO | train_inner | epoch 001: 1974 / 3002 loss=3.08, ppl=8.46, wps=5767.3, ups=0.09, wpb=64906, bsz=128, num_updates=1955, lr=9.99924e-05, gnorm=2.527, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21869
2021-06-19 00:43:25 | INFO | train_inner | epoch 001: 1975 / 3002 loss=3.099, ppl=8.57, wps=5741.5, ups=0.09, wpb=64737, bsz=128, num_updates=1956, lr=9.99924e-05, gnorm=2.462, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21880
2021-06-19 00:43:37 | INFO | train_inner | epoch 001: 1976 / 3002 loss=2.971, ppl=7.84, wps=5803.1, ups=0.09, wpb=64836, bsz=128, num_updates=1957, lr=9.99923e-05, gnorm=2.616, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21891
2021-06-19 00:43:48 | INFO | train_inner | epoch 001: 1977 / 3002 loss=2.963, ppl=7.8, wps=5826.1, ups=0.09, wpb=64856, bsz=128, num_updates=1958, lr=9.99923e-05, gnorm=2.42, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21902
2021-06-19 00:43:59 | INFO | train_inner | epoch 001: 1978 / 3002 loss=2.716, ppl=6.57, wps=5950.7, ups=0.09, wpb=64876, bsz=128, num_updates=1959, lr=9.99923e-05, gnorm=2.405, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21913
2021-06-19 00:44:10 | INFO | train_inner | epoch 001: 1979 / 3002 loss=3.052, ppl=8.3, wps=5925.3, ups=0.09, wpb=64833, bsz=128, num_updates=1960, lr=9.99923e-05, gnorm=2.515, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21924
2021-06-19 00:44:21 | INFO | train_inner | epoch 001: 1980 / 3002 loss=2.98, ppl=7.89, wps=5944.2, ups=0.09, wpb=64804, bsz=128, num_updates=1961, lr=9.99923e-05, gnorm=8.12, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21935
2021-06-19 00:44:31 | INFO | train_inner | epoch 001: 1981 / 3002 loss=2.849, ppl=7.2, wps=5972.2, ups=0.09, wpb=64886, bsz=128, num_updates=1962, lr=9.99923e-05, gnorm=2.407, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21946
2021-06-19 00:44:42 | INFO | train_inner | epoch 001: 1982 / 3002 loss=2.913, ppl=7.53, wps=5911.8, ups=0.09, wpb=64904, bsz=128, num_updates=1963, lr=9.99923e-05, gnorm=2.407, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=21957
2021-06-19 00:44:54 | INFO | train_inner | epoch 001: 1983 / 3002 loss=2.96, ppl=7.78, wps=5750, ups=0.09, wpb=64802, bsz=128, num_updates=1964, lr=9.99923e-05, gnorm=2.405, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21968
2021-06-19 00:45:05 | INFO | train_inner | epoch 001: 1984 / 3002 loss=3.053, ppl=8.3, wps=5813.1, ups=0.09, wpb=64880, bsz=128, num_updates=1965, lr=9.99923e-05, gnorm=2.495, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21979
2021-06-19 00:45:16 | INFO | train_inner | epoch 001: 1985 / 3002 loss=2.979, ppl=7.88, wps=5729.6, ups=0.09, wpb=64733, bsz=128, num_updates=1966, lr=9.99923e-05, gnorm=2.317, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=21990
2021-06-19 00:45:27 | INFO | train_inner | epoch 001: 1986 / 3002 loss=2.789, ppl=6.91, wps=5973.9, ups=0.09, wpb=64865, bsz=128, num_updates=1967, lr=9.99923e-05, gnorm=2.372, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22001
2021-06-19 00:45:38 | INFO | train_inner | epoch 001: 1987 / 3002 loss=3.035, ppl=8.19, wps=5858.5, ups=0.09, wpb=64733, bsz=128, num_updates=1968, lr=9.99923e-05, gnorm=2.484, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22012
2021-06-19 00:45:49 | INFO | train_inner | epoch 001: 1988 / 3002 loss=2.972, ppl=7.85, wps=5939, ups=0.09, wpb=64862, bsz=128, num_updates=1969, lr=9.99922e-05, gnorm=2.495, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22023
2021-06-19 00:46:00 | INFO | train_inner | epoch 001: 1989 / 3002 loss=2.977, ppl=7.88, wps=5839.4, ups=0.09, wpb=64802, bsz=128, num_updates=1970, lr=9.99922e-05, gnorm=2.609, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22034
2021-06-19 00:46:11 | INFO | train_inner | epoch 001: 1990 / 3002 loss=2.839, ppl=7.15, wps=5774.7, ups=0.09, wpb=64878, bsz=128, num_updates=1971, lr=9.99922e-05, gnorm=2.446, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22046
2021-06-19 00:46:23 | INFO | train_inner | epoch 001: 1991 / 3002 loss=2.979, ppl=7.88, wps=5761.7, ups=0.09, wpb=64776, bsz=128, num_updates=1972, lr=9.99922e-05, gnorm=2.461, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22057
2021-06-19 00:46:34 | INFO | train_inner | epoch 001: 1992 / 3002 loss=2.991, ppl=7.95, wps=5805.8, ups=0.09, wpb=64851, bsz=128, num_updates=1973, lr=9.99922e-05, gnorm=2.305, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22068
2021-06-19 00:46:45 | INFO | train_inner | epoch 001: 1993 / 3002 loss=3.077, ppl=8.44, wps=5926.3, ups=0.09, wpb=64758, bsz=128, num_updates=1974, lr=9.99922e-05, gnorm=3.856, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22079
2021-06-19 00:46:56 | INFO | train_inner | epoch 001: 1994 / 3002 loss=3.055, ppl=8.31, wps=5859.3, ups=0.09, wpb=64832, bsz=128, num_updates=1975, lr=9.99922e-05, gnorm=2.508, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22090
2021-06-19 00:47:07 | INFO | train_inner | epoch 001: 1995 / 3002 loss=2.921, ppl=7.58, wps=5884.9, ups=0.09, wpb=64802, bsz=128, num_updates=1976, lr=9.99922e-05, gnorm=2.569, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22101
2021-06-19 00:47:18 | INFO | train_inner | epoch 001: 1996 / 3002 loss=2.929, ppl=7.62, wps=5765.5, ups=0.09, wpb=64845, bsz=128, num_updates=1977, lr=9.99922e-05, gnorm=2.449, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22112
2021-06-19 00:47:29 | INFO | train_inner | epoch 001: 1997 / 3002 loss=2.905, ppl=7.49, wps=5981, ups=0.09, wpb=64893, bsz=128, num_updates=1978, lr=9.99922e-05, gnorm=2.474, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22123
2021-06-19 00:47:40 | INFO | train_inner | epoch 001: 1998 / 3002 loss=3.117, ppl=8.67, wps=5814.5, ups=0.09, wpb=64846, bsz=128, num_updates=1979, lr=9.99922e-05, gnorm=2.759, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22134
2021-06-19 00:47:51 | INFO | train_inner | epoch 001: 1999 / 3002 loss=2.961, ppl=7.79, wps=5892.1, ups=0.09, wpb=64837, bsz=128, num_updates=1980, lr=9.99922e-05, gnorm=2.87, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22145
2021-06-19 00:48:02 | INFO | train_inner | epoch 001: 2000 / 3002 loss=2.818, ppl=7.05, wps=5824.7, ups=0.09, wpb=64807, bsz=128, num_updates=1981, lr=9.99922e-05, gnorm=2.394, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22156
2021-06-19 00:48:13 | INFO | train_inner | epoch 001: 2001 / 3002 loss=3.102, ppl=8.59, wps=6036.7, ups=0.09, wpb=64923, bsz=128, num_updates=1982, lr=9.99921e-05, gnorm=2.414, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22167
2021-06-19 00:48:24 | INFO | train_inner | epoch 001: 2002 / 3002 loss=2.958, ppl=7.77, wps=5893.5, ups=0.09, wpb=64787, bsz=128, num_updates=1983, lr=9.99921e-05, gnorm=2.389, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22178
2021-06-19 00:48:35 | INFO | train_inner | epoch 001: 2003 / 3002 loss=3.054, ppl=8.3, wps=5922, ups=0.09, wpb=64841, bsz=128, num_updates=1984, lr=9.99921e-05, gnorm=2.528, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22189
2021-06-19 00:48:46 | INFO | train_inner | epoch 001: 2004 / 3002 loss=2.891, ppl=7.42, wps=5852.1, ups=0.09, wpb=64863, bsz=128, num_updates=1985, lr=9.99921e-05, gnorm=2.364, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22200
2021-06-19 00:48:57 | INFO | train_inner | epoch 001: 2005 / 3002 loss=2.881, ppl=7.37, wps=5939.3, ups=0.09, wpb=64901, bsz=128, num_updates=1986, lr=9.99921e-05, gnorm=2.401, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22211
2021-06-19 00:49:08 | INFO | train_inner | epoch 001: 2006 / 3002 loss=2.885, ppl=7.39, wps=6043.8, ups=0.09, wpb=64906, bsz=128, num_updates=1987, lr=9.99921e-05, gnorm=2.395, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22222
2021-06-19 00:49:19 | INFO | train_inner | epoch 001: 2007 / 3002 loss=2.996, ppl=7.98, wps=5803.1, ups=0.09, wpb=64850, bsz=128, num_updates=1988, lr=9.99921e-05, gnorm=2.462, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22233
2021-06-19 00:49:30 | INFO | train_inner | epoch 001: 2008 / 3002 loss=2.929, ppl=7.62, wps=5908, ups=0.09, wpb=64776, bsz=128, num_updates=1989, lr=9.99921e-05, gnorm=2.466, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22244
2021-06-19 00:49:41 | INFO | train_inner | epoch 001: 2009 / 3002 loss=3.007, ppl=8.04, wps=5759.1, ups=0.09, wpb=64851, bsz=128, num_updates=1990, lr=9.99921e-05, gnorm=3.37, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22255
2021-06-19 00:49:52 | INFO | train_inner | epoch 001: 2010 / 3002 loss=2.874, ppl=7.33, wps=5790.5, ups=0.09, wpb=64780, bsz=128, num_updates=1991, lr=9.99921e-05, gnorm=2.469, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22266
2021-06-19 00:50:03 | INFO | train_inner | epoch 001: 2011 / 3002 loss=2.946, ppl=7.7, wps=5809, ups=0.09, wpb=64699, bsz=128, num_updates=1992, lr=9.99921e-05, gnorm=2.539, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22278
2021-06-19 00:50:14 | INFO | train_inner | epoch 001: 2012 / 3002 loss=3.156, ppl=8.92, wps=5872.7, ups=0.09, wpb=64773, bsz=128, num_updates=1993, lr=9.99921e-05, gnorm=2.467, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22289
2021-06-19 00:50:25 | INFO | train_inner | epoch 001: 2013 / 3002 loss=2.961, ppl=7.79, wps=6015.6, ups=0.09, wpb=64725, bsz=128, num_updates=1994, lr=9.9992e-05, gnorm=2.334, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22299
2021-06-19 00:50:36 | INFO | train_inner | epoch 001: 2014 / 3002 loss=2.977, ppl=7.87, wps=5939.9, ups=0.09, wpb=64786, bsz=128, num_updates=1995, lr=9.9992e-05, gnorm=2.446, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22310
2021-06-19 00:50:47 | INFO | train_inner | epoch 001: 2015 / 3002 loss=2.909, ppl=7.51, wps=5799.9, ups=0.09, wpb=64820, bsz=128, num_updates=1996, lr=9.9992e-05, gnorm=2.329, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22321
2021-06-19 00:50:58 | INFO | train_inner | epoch 001: 2016 / 3002 loss=2.954, ppl=7.75, wps=5845.7, ups=0.09, wpb=64731, bsz=128, num_updates=1997, lr=9.9992e-05, gnorm=2.316, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22333
2021-06-19 00:51:09 | INFO | train_inner | epoch 001: 2017 / 3002 loss=2.865, ppl=7.28, wps=5774, ups=0.09, wpb=64825, bsz=128, num_updates=1998, lr=9.9992e-05, gnorm=2.479, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22344
2021-06-19 00:51:20 | INFO | train_inner | epoch 001: 2018 / 3002 loss=3.19, ppl=9.13, wps=5865.7, ups=0.09, wpb=64731, bsz=128, num_updates=1999, lr=9.9992e-05, gnorm=2.542, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22355
2021-06-19 00:51:32 | INFO | train_inner | epoch 001: 2019 / 3002 loss=3.006, ppl=8.04, wps=5796.8, ups=0.09, wpb=64804, bsz=128, num_updates=2000, lr=9.9992e-05, gnorm=2.461, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22366
2021-06-19 00:51:43 | INFO | train_inner | epoch 001: 2020 / 3002 loss=2.982, ppl=7.9, wps=5886.8, ups=0.09, wpb=64875, bsz=128, num_updates=2001, lr=9.9992e-05, gnorm=2.544, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22377
2021-06-19 00:51:54 | INFO | train_inner | epoch 001: 2021 / 3002 loss=2.964, ppl=7.8, wps=5809.6, ups=0.09, wpb=64752, bsz=128, num_updates=2002, lr=9.9992e-05, gnorm=3.551, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22388
2021-06-19 00:52:05 | INFO | train_inner | epoch 001: 2022 / 3002 loss=2.919, ppl=7.56, wps=5841.2, ups=0.09, wpb=64846, bsz=128, num_updates=2003, lr=9.9992e-05, gnorm=2.459, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22399
2021-06-19 00:52:16 | INFO | train_inner | epoch 001: 2023 / 3002 loss=3.082, ppl=8.47, wps=5858.2, ups=0.09, wpb=64780, bsz=128, num_updates=2004, lr=9.9992e-05, gnorm=2.577, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22410
2021-06-19 00:52:27 | INFO | train_inner | epoch 001: 2024 / 3002 loss=2.943, ppl=7.69, wps=5879.6, ups=0.09, wpb=64881, bsz=128, num_updates=2005, lr=9.9992e-05, gnorm=2.494, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22421
2021-06-19 00:52:38 | INFO | train_inner | epoch 001: 2025 / 3002 loss=2.907, ppl=7.5, wps=5918.7, ups=0.09, wpb=64868, bsz=128, num_updates=2006, lr=9.9992e-05, gnorm=2.386, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22432
2021-06-19 00:52:49 | INFO | train_inner | epoch 001: 2026 / 3002 loss=2.851, ppl=7.21, wps=5931.2, ups=0.09, wpb=64884, bsz=128, num_updates=2007, lr=9.99919e-05, gnorm=2.61, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22443
2021-06-19 00:53:00 | INFO | train_inner | epoch 001: 2027 / 3002 loss=3.01, ppl=8.06, wps=5838.1, ups=0.09, wpb=64850, bsz=128, num_updates=2008, lr=9.99919e-05, gnorm=2.554, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22454
2021-06-19 00:53:11 | INFO | train_inner | epoch 001: 2028 / 3002 loss=2.845, ppl=7.18, wps=5831.8, ups=0.09, wpb=64806, bsz=128, num_updates=2009, lr=9.99919e-05, gnorm=2.492, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22465
2021-06-19 00:53:22 | INFO | train_inner | epoch 001: 2029 / 3002 loss=2.869, ppl=7.31, wps=5874.1, ups=0.09, wpb=64883, bsz=128, num_updates=2010, lr=9.99919e-05, gnorm=2.351, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22477
2021-06-19 00:53:33 | INFO | train_inner | epoch 001: 2030 / 3002 loss=2.889, ppl=7.41, wps=5887.7, ups=0.09, wpb=64782, bsz=128, num_updates=2011, lr=9.99919e-05, gnorm=2.504, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22488
2021-06-19 00:53:44 | INFO | train_inner | epoch 001: 2031 / 3002 loss=2.979, ppl=7.88, wps=5846.1, ups=0.09, wpb=64799, bsz=128, num_updates=2012, lr=9.99919e-05, gnorm=2.377, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22499
2021-06-19 00:53:56 | INFO | train_inner | epoch 001: 2032 / 3002 loss=2.815, ppl=7.04, wps=5752.2, ups=0.09, wpb=64790, bsz=128, num_updates=2013, lr=9.99919e-05, gnorm=2.422, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22510
2021-06-19 00:54:07 | INFO | train_inner | epoch 001: 2033 / 3002 loss=2.907, ppl=7.5, wps=5746.2, ups=0.09, wpb=64865, bsz=128, num_updates=2014, lr=9.99919e-05, gnorm=2.485, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22521
2021-06-19 00:54:18 | INFO | train_inner | epoch 001: 2034 / 3002 loss=2.859, ppl=7.26, wps=5901.4, ups=0.09, wpb=64819, bsz=128, num_updates=2015, lr=9.99919e-05, gnorm=3.47, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22532
2021-06-19 00:54:29 | INFO | train_inner | epoch 001: 2035 / 3002 loss=2.858, ppl=7.25, wps=5988.4, ups=0.09, wpb=64861, bsz=128, num_updates=2016, lr=9.99919e-05, gnorm=2.375, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22543
2021-06-19 00:54:40 | INFO | train_inner | epoch 001: 2036 / 3002 loss=2.902, ppl=7.47, wps=5808.9, ups=0.09, wpb=64763, bsz=128, num_updates=2017, lr=9.99919e-05, gnorm=2.474, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22554
2021-06-19 00:54:51 | INFO | train_inner | epoch 001: 2037 / 3002 loss=2.798, ppl=6.96, wps=5915.1, ups=0.09, wpb=64880, bsz=128, num_updates=2018, lr=9.99919e-05, gnorm=2.37, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22565
2021-06-19 00:55:02 | INFO | train_inner | epoch 001: 2038 / 3002 loss=2.956, ppl=7.76, wps=5813.1, ups=0.09, wpb=64705, bsz=128, num_updates=2019, lr=9.99918e-05, gnorm=2.376, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22576
2021-06-19 00:55:13 | INFO | train_inner | epoch 001: 2039 / 3002 loss=2.862, ppl=7.27, wps=5874.3, ups=0.09, wpb=64807, bsz=128, num_updates=2020, lr=9.99918e-05, gnorm=2.928, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22587
2021-06-19 00:55:24 | INFO | train_inner | epoch 001: 2040 / 3002 loss=2.946, ppl=7.71, wps=5901.4, ups=0.09, wpb=64843, bsz=128, num_updates=2021, lr=9.99918e-05, gnorm=2.405, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22598
2021-06-19 00:55:35 | INFO | train_inner | epoch 001: 2041 / 3002 loss=2.969, ppl=7.83, wps=5975.4, ups=0.09, wpb=64793, bsz=128, num_updates=2022, lr=9.99918e-05, gnorm=2.688, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22609
2021-06-19 00:55:46 | INFO | train_inner | epoch 001: 2042 / 3002 loss=2.933, ppl=7.63, wps=5817, ups=0.09, wpb=64804, bsz=128, num_updates=2023, lr=9.99918e-05, gnorm=2.466, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22620
2021-06-19 00:55:57 | INFO | train_inner | epoch 001: 2043 / 3002 loss=2.911, ppl=7.52, wps=5840, ups=0.09, wpb=64885, bsz=128, num_updates=2024, lr=9.99918e-05, gnorm=2.554, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22631
2021-06-19 00:56:08 | INFO | train_inner | epoch 001: 2044 / 3002 loss=2.856, ppl=7.24, wps=5876.8, ups=0.09, wpb=64839, bsz=128, num_updates=2025, lr=9.99918e-05, gnorm=2.408, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22642
2021-06-19 00:56:19 | INFO | train_inner | epoch 001: 2045 / 3002 loss=2.87, ppl=7.31, wps=5892.7, ups=0.09, wpb=64867, bsz=128, num_updates=2026, lr=9.99918e-05, gnorm=2.394, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22653
2021-06-19 00:56:30 | INFO | train_inner | epoch 001: 2046 / 3002 loss=3.015, ppl=8.08, wps=5943.3, ups=0.09, wpb=64913, bsz=128, num_updates=2027, lr=9.99918e-05, gnorm=2.603, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22664
2021-06-19 00:56:41 | INFO | train_inner | epoch 001: 2047 / 3002 loss=2.954, ppl=7.75, wps=5820.6, ups=0.09, wpb=64866, bsz=128, num_updates=2028, lr=9.99918e-05, gnorm=2.359, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22675
2021-06-19 00:56:52 | INFO | train_inner | epoch 001: 2048 / 3002 loss=2.92, ppl=7.57, wps=5985, ups=0.09, wpb=64849, bsz=128, num_updates=2029, lr=9.99918e-05, gnorm=2.555, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22686
2021-06-19 00:57:03 | INFO | train_inner | epoch 001: 2049 / 3002 loss=3.071, ppl=8.4, wps=5704.3, ups=0.09, wpb=64839, bsz=128, num_updates=2030, lr=9.99918e-05, gnorm=2.47, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22698
2021-06-19 00:57:14 | INFO | train_inner | epoch 001: 2050 / 3002 loss=2.894, ppl=7.43, wps=5818.3, ups=0.09, wpb=64864, bsz=128, num_updates=2031, lr=9.99918e-05, gnorm=2.423, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22709
2021-06-19 00:57:26 | INFO | train_inner | epoch 001: 2051 / 3002 loss=3.001, ppl=8, wps=5804.9, ups=0.09, wpb=64858, bsz=128, num_updates=2032, lr=9.99917e-05, gnorm=2.423, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22720
2021-06-19 00:57:36 | INFO | train_inner | epoch 001: 2052 / 3002 loss=2.945, ppl=7.7, wps=6015.7, ups=0.09, wpb=64889, bsz=128, num_updates=2033, lr=9.99917e-05, gnorm=2.509, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22731
2021-06-19 00:57:47 | INFO | train_inner | epoch 001: 2053 / 3002 loss=2.891, ppl=7.42, wps=5902.3, ups=0.09, wpb=64829, bsz=128, num_updates=2034, lr=9.99917e-05, gnorm=6.112, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22742
2021-06-19 00:57:59 | INFO | train_inner | epoch 001: 2054 / 3002 loss=2.991, ppl=7.95, wps=5775.5, ups=0.09, wpb=64757, bsz=128, num_updates=2035, lr=9.99917e-05, gnorm=2.383, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22753
2021-06-19 00:58:10 | INFO | train_inner | epoch 001: 2055 / 3002 loss=2.806, ppl=6.99, wps=5941.3, ups=0.09, wpb=64804, bsz=128, num_updates=2036, lr=9.99917e-05, gnorm=2.337, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22764
2021-06-19 00:58:20 | INFO | train_inner | epoch 001: 2056 / 3002 loss=3.061, ppl=8.35, wps=5955.7, ups=0.09, wpb=64833, bsz=128, num_updates=2037, lr=9.99917e-05, gnorm=2.444, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22775
2021-06-19 00:58:32 | INFO | train_inner | epoch 001: 2057 / 3002 loss=3.032, ppl=8.18, wps=5805.4, ups=0.09, wpb=64830, bsz=128, num_updates=2038, lr=9.99917e-05, gnorm=2.547, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22786
2021-06-19 00:58:43 | INFO | train_inner | epoch 001: 2058 / 3002 loss=2.773, ppl=6.84, wps=5924.2, ups=0.09, wpb=64806, bsz=128, num_updates=2039, lr=9.99917e-05, gnorm=2.394, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=22797
2021-06-19 00:58:54 | INFO | train_inner | epoch 001: 2059 / 3002 loss=2.974, ppl=7.86, wps=5831.4, ups=0.09, wpb=64871, bsz=128, num_updates=2040, lr=9.99917e-05, gnorm=2.425, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22808
2021-06-19 00:59:05 | INFO | train_inner | epoch 001: 2060 / 3002 loss=2.851, ppl=7.22, wps=5880.4, ups=0.09, wpb=64851, bsz=128, num_updates=2041, lr=9.99917e-05, gnorm=2.465, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22819
2021-06-19 00:59:16 | INFO | train_inner | epoch 001: 2061 / 3002 loss=3.044, ppl=8.25, wps=5879, ups=0.09, wpb=64814, bsz=128, num_updates=2042, lr=9.99917e-05, gnorm=2.418, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22830
2021-06-19 00:59:27 | INFO | train_inner | epoch 001: 2062 / 3002 loss=3.028, ppl=8.16, wps=5859.8, ups=0.09, wpb=64789, bsz=128, num_updates=2043, lr=9.99917e-05, gnorm=2.457, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22841
2021-06-19 00:59:38 | INFO | train_inner | epoch 001: 2063 / 3002 loss=2.781, ppl=6.87, wps=5898.8, ups=0.09, wpb=64827, bsz=128, num_updates=2044, lr=9.99916e-05, gnorm=2.369, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22852
2021-06-19 00:59:49 | INFO | train_inner | epoch 001: 2064 / 3002 loss=2.93, ppl=7.62, wps=5896.5, ups=0.09, wpb=64782, bsz=128, num_updates=2045, lr=9.99916e-05, gnorm=2.453, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22863
2021-06-19 01:00:00 | INFO | train_inner | epoch 001: 2065 / 3002 loss=2.928, ppl=7.61, wps=5872.2, ups=0.09, wpb=64853, bsz=128, num_updates=2046, lr=9.99916e-05, gnorm=2.448, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22874
2021-06-19 01:00:11 | INFO | train_inner | epoch 001: 2066 / 3002 loss=2.97, ppl=7.83, wps=5789.7, ups=0.09, wpb=64842, bsz=128, num_updates=2047, lr=9.99916e-05, gnorm=2.715, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22885
2021-06-19 01:00:22 | INFO | train_inner | epoch 001: 2067 / 3002 loss=2.791, ppl=6.92, wps=5896, ups=0.09, wpb=64847, bsz=128, num_updates=2048, lr=9.99916e-05, gnorm=2.359, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=22896
2021-06-19 01:00:33 | INFO | train_inner | epoch 001: 2068 / 3002 loss=2.899, ppl=7.46, wps=5942.4, ups=0.09, wpb=64901, bsz=128, num_updates=2049, lr=9.99916e-05, gnorm=2.369, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22907
2021-06-19 01:00:44 | INFO | train_inner | epoch 001: 2069 / 3002 loss=2.996, ppl=7.98, wps=5790.9, ups=0.09, wpb=64774, bsz=128, num_updates=2050, lr=9.99916e-05, gnorm=2.434, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=22918
2021-06-19 01:00:55 | INFO | train_inner | epoch 001: 2070 / 3002 loss=3.017, ppl=8.09, wps=6008.6, ups=0.09, wpb=64884, bsz=128, num_updates=2051, lr=9.99916e-05, gnorm=2.486, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22929
2021-06-19 01:01:06 | INFO | train_inner | epoch 001: 2071 / 3002 loss=2.885, ppl=7.39, wps=6046.8, ups=0.09, wpb=64843, bsz=128, num_updates=2052, lr=9.99916e-05, gnorm=2.393, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22940
2021-06-19 01:01:17 | INFO | train_inner | epoch 001: 2072 / 3002 loss=3.005, ppl=8.03, wps=5884.4, ups=0.09, wpb=64772, bsz=128, num_updates=2053, lr=9.99916e-05, gnorm=2.435, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=22951
2021-06-19 01:01:28 | INFO | train_inner | epoch 001: 2073 / 3002 loss=2.947, ppl=7.71, wps=5940.2, ups=0.09, wpb=64909, bsz=128, num_updates=2054, lr=9.99916e-05, gnorm=2.298, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22962
2021-06-19 01:01:39 | INFO | train_inner | epoch 001: 2074 / 3002 loss=3.008, ppl=8.04, wps=5842.8, ups=0.09, wpb=64859, bsz=128, num_updates=2055, lr=9.99916e-05, gnorm=2.42, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=22973
2021-06-19 01:01:50 | INFO | train_inner | epoch 001: 2075 / 3002 loss=3.093, ppl=8.53, wps=5894.5, ups=0.09, wpb=64897, bsz=128, num_updates=2056, lr=9.99916e-05, gnorm=3.097, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=22984
2021-06-19 01:02:01 | INFO | train_inner | epoch 001: 2076 / 3002 loss=2.987, ppl=7.93, wps=5916.7, ups=0.09, wpb=64853, bsz=128, num_updates=2057, lr=9.99915e-05, gnorm=2.352, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=22995
2021-06-19 01:02:12 | INFO | train_inner | epoch 001: 2077 / 3002 loss=3.005, ppl=8.03, wps=5830.8, ups=0.09, wpb=64817, bsz=128, num_updates=2058, lr=9.99915e-05, gnorm=2.371, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23006
2021-06-19 01:02:23 | INFO | train_inner | epoch 001: 2078 / 3002 loss=2.733, ppl=6.65, wps=5860.5, ups=0.09, wpb=64880, bsz=128, num_updates=2059, lr=9.99915e-05, gnorm=2.614, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23017
2021-06-19 01:02:34 | INFO | train_inner | epoch 001: 2079 / 3002 loss=2.998, ppl=7.99, wps=5862.4, ups=0.09, wpb=64793, bsz=128, num_updates=2060, lr=9.99915e-05, gnorm=2.329, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23028
2021-06-19 01:02:45 | INFO | train_inner | epoch 001: 2080 / 3002 loss=2.968, ppl=7.83, wps=5903, ups=0.09, wpb=64846, bsz=128, num_updates=2061, lr=9.99915e-05, gnorm=2.392, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23039
2021-06-19 01:02:56 | INFO | train_inner | epoch 001: 2081 / 3002 loss=2.943, ppl=7.69, wps=5809.5, ups=0.09, wpb=64761, bsz=128, num_updates=2062, lr=9.99915e-05, gnorm=2.449, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23050
2021-06-19 01:03:07 | INFO | train_inner | epoch 001: 2082 / 3002 loss=3.008, ppl=8.04, wps=5976.1, ups=0.09, wpb=64883, bsz=128, num_updates=2063, lr=9.99915e-05, gnorm=2.424, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23061
2021-06-19 01:03:18 | INFO | train_inner | epoch 001: 2083 / 3002 loss=3.012, ppl=8.07, wps=5935.3, ups=0.09, wpb=64892, bsz=128, num_updates=2064, lr=9.99915e-05, gnorm=2.601, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23072
2021-06-19 01:03:29 | INFO | train_inner | epoch 001: 2084 / 3002 loss=2.88, ppl=7.36, wps=5867.3, ups=0.09, wpb=64864, bsz=128, num_updates=2065, lr=9.99915e-05, gnorm=2.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23083
2021-06-19 01:03:40 | INFO | train_inner | epoch 001: 2085 / 3002 loss=2.901, ppl=7.47, wps=5777.9, ups=0.09, wpb=64858, bsz=128, num_updates=2066, lr=9.99915e-05, gnorm=2.448, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23094
2021-06-19 01:03:51 | INFO | train_inner | epoch 001: 2086 / 3002 loss=2.99, ppl=7.94, wps=5873.3, ups=0.09, wpb=64874, bsz=128, num_updates=2067, lr=9.99915e-05, gnorm=2.42, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23105
2021-06-19 01:04:02 | INFO | train_inner | epoch 001: 2087 / 3002 loss=3.002, ppl=8.01, wps=5869.1, ups=0.09, wpb=64905, bsz=128, num_updates=2068, lr=9.99915e-05, gnorm=2.569, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23116
2021-06-19 01:04:13 | INFO | train_inner | epoch 001: 2088 / 3002 loss=3.05, ppl=8.28, wps=5922.6, ups=0.09, wpb=64769, bsz=128, num_updates=2069, lr=9.99914e-05, gnorm=2.38, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23127
2021-06-19 01:04:24 | INFO | train_inner | epoch 001: 2089 / 3002 loss=2.904, ppl=7.49, wps=5871.2, ups=0.09, wpb=64929, bsz=128, num_updates=2070, lr=9.99914e-05, gnorm=2.358, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23138
2021-06-19 01:04:35 | INFO | train_inner | epoch 001: 2090 / 3002 loss=2.779, ppl=6.86, wps=5844.3, ups=0.09, wpb=64805, bsz=128, num_updates=2071, lr=9.99914e-05, gnorm=2.426, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23150
2021-06-19 01:04:46 | INFO | train_inner | epoch 001: 2091 / 3002 loss=2.824, ppl=7.08, wps=5779.2, ups=0.09, wpb=64796, bsz=128, num_updates=2072, lr=9.99914e-05, gnorm=2.644, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23161
2021-06-19 01:04:58 | INFO | train_inner | epoch 001: 2092 / 3002 loss=2.845, ppl=7.19, wps=5851.5, ups=0.09, wpb=64863, bsz=128, num_updates=2073, lr=9.99914e-05, gnorm=2.543, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23172
2021-06-19 01:05:09 | INFO | train_inner | epoch 001: 2093 / 3002 loss=2.867, ppl=7.29, wps=5738.8, ups=0.09, wpb=64813, bsz=128, num_updates=2074, lr=9.99914e-05, gnorm=2.545, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23183
2021-06-19 01:05:20 | INFO | train_inner | epoch 001: 2094 / 3002 loss=2.906, ppl=7.5, wps=5858.4, ups=0.09, wpb=64794, bsz=128, num_updates=2075, lr=9.99914e-05, gnorm=2.463, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23194
2021-06-19 01:05:31 | INFO | train_inner | epoch 001: 2095 / 3002 loss=2.962, ppl=7.79, wps=5860.5, ups=0.09, wpb=64769, bsz=128, num_updates=2076, lr=9.99914e-05, gnorm=2.443, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23205
2021-06-19 01:05:42 | INFO | train_inner | epoch 001: 2096 / 3002 loss=2.833, ppl=7.13, wps=5924.4, ups=0.09, wpb=64817, bsz=128, num_updates=2077, lr=9.99914e-05, gnorm=2.357, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23216
2021-06-19 01:05:53 | INFO | train_inner | epoch 001: 2097 / 3002 loss=2.793, ppl=6.93, wps=5862.7, ups=0.09, wpb=64815, bsz=128, num_updates=2078, lr=9.99914e-05, gnorm=2.48, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23227
2021-06-19 01:06:04 | INFO | train_inner | epoch 001: 2098 / 3002 loss=2.98, ppl=7.89, wps=5878.9, ups=0.09, wpb=64769, bsz=128, num_updates=2079, lr=9.99914e-05, gnorm=2.62, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23238
2021-06-19 01:06:15 | INFO | train_inner | epoch 001: 2099 / 3002 loss=2.981, ppl=7.9, wps=5818, ups=0.09, wpb=64892, bsz=128, num_updates=2080, lr=9.99914e-05, gnorm=2.853, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23249
2021-06-19 01:06:26 | INFO | train_inner | epoch 001: 2100 / 3002 loss=2.942, ppl=7.69, wps=5818.4, ups=0.09, wpb=64752, bsz=128, num_updates=2081, lr=9.99914e-05, gnorm=2.488, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23261
2021-06-19 01:06:37 | INFO | train_inner | epoch 001: 2101 / 3002 loss=3.029, ppl=8.16, wps=5850.2, ups=0.09, wpb=64780, bsz=128, num_updates=2082, lr=9.99913e-05, gnorm=2.439, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23272
2021-06-19 01:06:48 | INFO | train_inner | epoch 001: 2102 / 3002 loss=2.844, ppl=7.18, wps=5854.9, ups=0.09, wpb=64813, bsz=128, num_updates=2083, lr=9.99913e-05, gnorm=2.347, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23283
2021-06-19 01:06:59 | INFO | train_inner | epoch 001: 2103 / 3002 loss=3.134, ppl=8.78, wps=5875.5, ups=0.09, wpb=64874, bsz=128, num_updates=2084, lr=9.99913e-05, gnorm=2.983, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23294
2021-06-19 01:07:11 | INFO | train_inner | epoch 001: 2104 / 3002 loss=2.979, ppl=7.88, wps=5852.3, ups=0.09, wpb=64861, bsz=128, num_updates=2085, lr=9.99913e-05, gnorm=2.4, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23305
2021-06-19 01:07:22 | INFO | train_inner | epoch 001: 2105 / 3002 loss=2.996, ppl=7.98, wps=5845.2, ups=0.09, wpb=64815, bsz=128, num_updates=2086, lr=9.99913e-05, gnorm=2.612, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23316
2021-06-19 01:07:33 | INFO | train_inner | epoch 001: 2106 / 3002 loss=2.934, ppl=7.64, wps=5762.2, ups=0.09, wpb=64873, bsz=128, num_updates=2087, lr=9.99913e-05, gnorm=2.386, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23327
2021-06-19 01:07:44 | INFO | train_inner | epoch 001: 2107 / 3002 loss=2.853, ppl=7.22, wps=5790.9, ups=0.09, wpb=64879, bsz=128, num_updates=2088, lr=9.99913e-05, gnorm=2.364, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23338
2021-06-19 01:07:55 | INFO | train_inner | epoch 001: 2108 / 3002 loss=2.849, ppl=7.21, wps=5930.3, ups=0.09, wpb=64902, bsz=128, num_updates=2089, lr=9.99913e-05, gnorm=2.527, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23349
2021-06-19 01:08:06 | INFO | train_inner | epoch 001: 2109 / 3002 loss=2.769, ppl=6.82, wps=5897.6, ups=0.09, wpb=64804, bsz=128, num_updates=2090, lr=9.99913e-05, gnorm=2.432, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23360
2021-06-19 01:08:17 | INFO | train_inner | epoch 001: 2110 / 3002 loss=2.857, ppl=7.25, wps=5829, ups=0.09, wpb=64751, bsz=128, num_updates=2091, lr=9.99913e-05, gnorm=2.39, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23371
2021-06-19 01:08:28 | INFO | train_inner | epoch 001: 2111 / 3002 loss=2.749, ppl=6.72, wps=5842.7, ups=0.09, wpb=64849, bsz=128, num_updates=2092, lr=9.99913e-05, gnorm=2.526, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23383
2021-06-19 01:08:39 | INFO | train_inner | epoch 001: 2112 / 3002 loss=3.043, ppl=8.24, wps=5948.6, ups=0.09, wpb=64903, bsz=128, num_updates=2093, lr=9.99913e-05, gnorm=2.358, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23393
2021-06-19 01:08:50 | INFO | train_inner | epoch 001: 2113 / 3002 loss=2.885, ppl=7.39, wps=5819.8, ups=0.09, wpb=64798, bsz=128, num_updates=2094, lr=9.99912e-05, gnorm=2.43, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23405
2021-06-19 01:09:02 | INFO | train_inner | epoch 001: 2114 / 3002 loss=3.045, ppl=8.26, wps=5734.2, ups=0.09, wpb=64790, bsz=128, num_updates=2095, lr=9.99912e-05, gnorm=2.508, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23416
2021-06-19 01:09:13 | INFO | train_inner | epoch 001: 2115 / 3002 loss=2.87, ppl=7.31, wps=5761.3, ups=0.09, wpb=64854, bsz=128, num_updates=2096, lr=9.99912e-05, gnorm=2.366, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23427
2021-06-19 01:09:24 | INFO | train_inner | epoch 001: 2116 / 3002 loss=2.897, ppl=7.45, wps=5901.5, ups=0.09, wpb=64829, bsz=128, num_updates=2097, lr=9.99912e-05, gnorm=2.396, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23438
2021-06-19 01:09:35 | INFO | train_inner | epoch 001: 2117 / 3002 loss=2.963, ppl=7.8, wps=6012, ups=0.09, wpb=64871, bsz=128, num_updates=2098, lr=9.99912e-05, gnorm=2.487, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23449
2021-06-19 01:09:45 | INFO | train_inner | epoch 001: 2118 / 3002 loss=2.93, ppl=7.62, wps=5987.7, ups=0.09, wpb=64824, bsz=128, num_updates=2099, lr=9.99912e-05, gnorm=2.435, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23460
2021-06-19 01:09:56 | INFO | train_inner | epoch 001: 2119 / 3002 loss=2.778, ppl=6.86, wps=5891.3, ups=0.09, wpb=64928, bsz=128, num_updates=2100, lr=9.99912e-05, gnorm=2.36, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23471
2021-06-19 01:10:08 | INFO | train_inner | epoch 001: 2120 / 3002 loss=2.881, ppl=7.37, wps=5846.7, ups=0.09, wpb=64853, bsz=128, num_updates=2101, lr=9.99912e-05, gnorm=2.568, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23482
2021-06-19 01:10:18 | INFO | train_inner | epoch 001: 2121 / 3002 loss=3.1, ppl=8.57, wps=5932.5, ups=0.09, wpb=64842, bsz=128, num_updates=2102, lr=9.99912e-05, gnorm=2.432, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23493
2021-06-19 01:10:30 | INFO | train_inner | epoch 001: 2122 / 3002 loss=2.89, ppl=7.41, wps=5780.2, ups=0.09, wpb=64931, bsz=128, num_updates=2103, lr=9.99912e-05, gnorm=2.398, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23504
2021-06-19 01:10:41 | INFO | train_inner | epoch 001: 2123 / 3002 loss=2.953, ppl=7.74, wps=5861.3, ups=0.09, wpb=64871, bsz=128, num_updates=2104, lr=9.99912e-05, gnorm=2.368, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23515
2021-06-19 01:10:52 | INFO | train_inner | epoch 001: 2124 / 3002 loss=2.937, ppl=7.66, wps=5889.6, ups=0.09, wpb=64837, bsz=128, num_updates=2105, lr=9.99912e-05, gnorm=2.353, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23526
2021-06-19 01:11:03 | INFO | train_inner | epoch 001: 2125 / 3002 loss=3.031, ppl=8.17, wps=5946.8, ups=0.09, wpb=64766, bsz=128, num_updates=2106, lr=9.99912e-05, gnorm=2.327, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23537
2021-06-19 01:11:13 | INFO | train_inner | epoch 001: 2126 / 3002 loss=2.825, ppl=7.08, wps=6008, ups=0.09, wpb=64840, bsz=128, num_updates=2107, lr=9.99911e-05, gnorm=2.383, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23548
2021-06-19 01:11:25 | INFO | train_inner | epoch 001: 2127 / 3002 loss=2.874, ppl=7.33, wps=5854.8, ups=0.09, wpb=64770, bsz=128, num_updates=2108, lr=9.99911e-05, gnorm=2.442, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23559
2021-06-19 01:11:35 | INFO | train_inner | epoch 001: 2128 / 3002 loss=3.074, ppl=8.42, wps=5928.7, ups=0.09, wpb=64812, bsz=128, num_updates=2109, lr=9.99911e-05, gnorm=2.416, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23570
2021-06-19 01:11:46 | INFO | train_inner | epoch 001: 2129 / 3002 loss=3.103, ppl=8.59, wps=5900.5, ups=0.09, wpb=64912, bsz=128, num_updates=2110, lr=9.99911e-05, gnorm=2.396, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23581
2021-06-19 01:11:57 | INFO | train_inner | epoch 001: 2130 / 3002 loss=3.141, ppl=8.82, wps=5943.2, ups=0.09, wpb=64870, bsz=128, num_updates=2111, lr=9.99911e-05, gnorm=2.506, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23592
2021-06-19 01:12:08 | INFO | train_inner | epoch 001: 2131 / 3002 loss=2.863, ppl=7.28, wps=5892.4, ups=0.09, wpb=64905, bsz=128, num_updates=2112, lr=9.99911e-05, gnorm=2.982, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23603
2021-06-19 01:12:19 | INFO | train_inner | epoch 001: 2132 / 3002 loss=2.836, ppl=7.14, wps=5835.9, ups=0.09, wpb=64790, bsz=128, num_updates=2113, lr=9.99911e-05, gnorm=3.204, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23614
2021-06-19 01:12:30 | INFO | train_inner | epoch 001: 2133 / 3002 loss=2.818, ppl=7.05, wps=5906.6, ups=0.09, wpb=64815, bsz=128, num_updates=2114, lr=9.99911e-05, gnorm=2.351, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23625
2021-06-19 01:12:41 | INFO | train_inner | epoch 001: 2134 / 3002 loss=2.936, ppl=7.65, wps=5906.1, ups=0.09, wpb=64844, bsz=128, num_updates=2115, lr=9.99911e-05, gnorm=2.425, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23636
2021-06-19 01:12:52 | INFO | train_inner | epoch 001: 2135 / 3002 loss=2.906, ppl=7.5, wps=5961.2, ups=0.09, wpb=64872, bsz=128, num_updates=2116, lr=9.99911e-05, gnorm=2.317, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23647
2021-06-19 01:13:03 | INFO | train_inner | epoch 001: 2136 / 3002 loss=2.798, ppl=6.95, wps=5846.9, ups=0.09, wpb=64827, bsz=128, num_updates=2117, lr=9.99911e-05, gnorm=2.365, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23658
2021-06-19 01:13:15 | INFO | train_inner | epoch 001: 2137 / 3002 loss=3.009, ppl=8.05, wps=5750.5, ups=0.09, wpb=64777, bsz=128, num_updates=2118, lr=9.99911e-05, gnorm=2.481, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23669
2021-06-19 01:13:26 | INFO | train_inner | epoch 001: 2138 / 3002 loss=2.769, ppl=6.81, wps=5838, ups=0.09, wpb=64880, bsz=128, num_updates=2119, lr=9.9991e-05, gnorm=2.303, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23680
2021-06-19 01:13:37 | INFO | train_inner | epoch 001: 2139 / 3002 loss=2.886, ppl=7.39, wps=5827.9, ups=0.09, wpb=64859, bsz=128, num_updates=2120, lr=9.9991e-05, gnorm=3.227, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23691
2021-06-19 01:13:48 | INFO | train_inner | epoch 001: 2140 / 3002 loss=3.054, ppl=8.31, wps=5834.9, ups=0.09, wpb=64828, bsz=128, num_updates=2121, lr=9.9991e-05, gnorm=2.398, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23702
2021-06-19 01:13:59 | INFO | train_inner | epoch 001: 2141 / 3002 loss=3.073, ppl=8.41, wps=5912.1, ups=0.09, wpb=64823, bsz=128, num_updates=2122, lr=9.9991e-05, gnorm=2.555, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23713
2021-06-19 01:14:10 | INFO | train_inner | epoch 001: 2142 / 3002 loss=2.846, ppl=7.19, wps=5714.5, ups=0.09, wpb=64815, bsz=128, num_updates=2123, lr=9.9991e-05, gnorm=2.381, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23725
2021-06-19 01:14:21 | INFO | train_inner | epoch 001: 2143 / 3002 loss=2.858, ppl=7.25, wps=5854, ups=0.09, wpb=64805, bsz=128, num_updates=2124, lr=9.9991e-05, gnorm=2.516, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23736
2021-06-19 01:14:32 | INFO | train_inner | epoch 001: 2144 / 3002 loss=2.883, ppl=7.38, wps=5938.6, ups=0.09, wpb=64867, bsz=128, num_updates=2125, lr=9.9991e-05, gnorm=2.402, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23747
2021-06-19 01:14:43 | INFO | train_inner | epoch 001: 2145 / 3002 loss=2.997, ppl=7.98, wps=5937.2, ups=0.09, wpb=64800, bsz=128, num_updates=2126, lr=9.9991e-05, gnorm=2.367, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23758
2021-06-19 01:14:54 | INFO | train_inner | epoch 001: 2146 / 3002 loss=2.944, ppl=7.69, wps=5877.7, ups=0.09, wpb=64863, bsz=128, num_updates=2127, lr=9.9991e-05, gnorm=2.413, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23769
2021-06-19 01:15:05 | INFO | train_inner | epoch 001: 2147 / 3002 loss=3.076, ppl=8.43, wps=5886.8, ups=0.09, wpb=64805, bsz=128, num_updates=2128, lr=9.9991e-05, gnorm=2.587, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23780
2021-06-19 01:15:16 | INFO | train_inner | epoch 001: 2148 / 3002 loss=3.022, ppl=8.12, wps=5825.6, ups=0.09, wpb=64877, bsz=128, num_updates=2129, lr=9.9991e-05, gnorm=3.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23791
2021-06-19 01:15:27 | INFO | train_inner | epoch 001: 2149 / 3002 loss=2.779, ppl=6.86, wps=5892.7, ups=0.09, wpb=64796, bsz=128, num_updates=2130, lr=9.9991e-05, gnorm=2.376, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23802
2021-06-19 01:15:39 | INFO | train_inner | epoch 001: 2150 / 3002 loss=2.843, ppl=7.18, wps=5812, ups=0.09, wpb=64824, bsz=128, num_updates=2131, lr=9.9991e-05, gnorm=2.347, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23813
2021-06-19 01:15:50 | INFO | train_inner | epoch 001: 2151 / 3002 loss=2.928, ppl=7.61, wps=5862.5, ups=0.09, wpb=64803, bsz=128, num_updates=2132, lr=9.99909e-05, gnorm=2.627, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23824
2021-06-19 01:16:01 | INFO | train_inner | epoch 001: 2152 / 3002 loss=2.902, ppl=7.47, wps=5913.4, ups=0.09, wpb=64811, bsz=128, num_updates=2133, lr=9.99909e-05, gnorm=2.463, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23835
2021-06-19 01:16:12 | INFO | train_inner | epoch 001: 2153 / 3002 loss=2.905, ppl=7.49, wps=5802.9, ups=0.09, wpb=64797, bsz=128, num_updates=2134, lr=9.99909e-05, gnorm=2.498, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23846
2021-06-19 01:16:23 | INFO | train_inner | epoch 001: 2154 / 3002 loss=2.874, ppl=7.33, wps=5913.4, ups=0.09, wpb=64755, bsz=128, num_updates=2135, lr=9.99909e-05, gnorm=2.479, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23857
2021-06-19 01:16:34 | INFO | train_inner | epoch 001: 2155 / 3002 loss=3.024, ppl=8.13, wps=5877.2, ups=0.09, wpb=64853, bsz=128, num_updates=2136, lr=9.99909e-05, gnorm=2.717, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23868
2021-06-19 01:16:45 | INFO | train_inner | epoch 001: 2156 / 3002 loss=2.897, ppl=7.45, wps=5894.5, ups=0.09, wpb=64819, bsz=128, num_updates=2137, lr=9.99909e-05, gnorm=19.662, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23879
2021-06-19 01:16:56 | INFO | train_inner | epoch 001: 2157 / 3002 loss=2.981, ppl=7.89, wps=5976.1, ups=0.09, wpb=64782, bsz=128, num_updates=2138, lr=9.99909e-05, gnorm=2.358, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23890
2021-06-19 01:17:07 | INFO | train_inner | epoch 001: 2158 / 3002 loss=2.973, ppl=7.85, wps=5797.3, ups=0.09, wpb=64800, bsz=128, num_updates=2139, lr=9.99909e-05, gnorm=2.418, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23901
2021-06-19 01:17:18 | INFO | train_inner | epoch 001: 2159 / 3002 loss=2.832, ppl=7.12, wps=5955.8, ups=0.09, wpb=64868, bsz=128, num_updates=2140, lr=9.99909e-05, gnorm=2.438, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23912
2021-06-19 01:17:29 | INFO | train_inner | epoch 001: 2160 / 3002 loss=2.906, ppl=7.49, wps=5958.7, ups=0.09, wpb=64793, bsz=128, num_updates=2141, lr=9.99909e-05, gnorm=2.6, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23923
2021-06-19 01:17:39 | INFO | train_inner | epoch 001: 2161 / 3002 loss=2.937, ppl=7.66, wps=5919.7, ups=0.09, wpb=64846, bsz=128, num_updates=2142, lr=9.99909e-05, gnorm=2.433, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=23934
2021-06-19 01:17:51 | INFO | train_inner | epoch 001: 2162 / 3002 loss=2.846, ppl=7.19, wps=5852.8, ups=0.09, wpb=64882, bsz=128, num_updates=2143, lr=9.99909e-05, gnorm=2.458, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23945
2021-06-19 01:18:02 | INFO | train_inner | epoch 001: 2163 / 3002 loss=2.848, ppl=7.2, wps=5796.1, ups=0.09, wpb=64865, bsz=128, num_updates=2144, lr=9.99908e-05, gnorm=2.536, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23956
2021-06-19 01:18:13 | INFO | train_inner | epoch 001: 2164 / 3002 loss=3.073, ppl=8.42, wps=5895.7, ups=0.09, wpb=64846, bsz=128, num_updates=2145, lr=9.99908e-05, gnorm=2.603, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23967
2021-06-19 01:18:24 | INFO | train_inner | epoch 001: 2165 / 3002 loss=2.967, ppl=7.82, wps=5822.4, ups=0.09, wpb=64746, bsz=128, num_updates=2146, lr=9.99908e-05, gnorm=2.457, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23978
2021-06-19 01:18:35 | INFO | train_inner | epoch 001: 2166 / 3002 loss=2.887, ppl=7.4, wps=5843.2, ups=0.09, wpb=64864, bsz=128, num_updates=2147, lr=9.99908e-05, gnorm=2.398, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=23989
2021-06-19 01:18:46 | INFO | train_inner | epoch 001: 2167 / 3002 loss=2.745, ppl=6.7, wps=5886.2, ups=0.09, wpb=64746, bsz=128, num_updates=2148, lr=9.99908e-05, gnorm=2.419, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24000
2021-06-19 01:18:57 | INFO | train_inner | epoch 001: 2168 / 3002 loss=3.064, ppl=8.36, wps=5799.8, ups=0.09, wpb=64794, bsz=128, num_updates=2149, lr=9.99908e-05, gnorm=2.615, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24011
2021-06-19 01:19:08 | INFO | train_inner | epoch 001: 2169 / 3002 loss=3.001, ppl=8.01, wps=5853.2, ups=0.09, wpb=64877, bsz=128, num_updates=2150, lr=9.99908e-05, gnorm=2.748, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24023
2021-06-19 01:19:19 | INFO | train_inner | epoch 001: 2170 / 3002 loss=2.993, ppl=7.96, wps=5911.2, ups=0.09, wpb=64854, bsz=128, num_updates=2151, lr=9.99908e-05, gnorm=2.456, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=24034
2021-06-19 01:19:30 | INFO | train_inner | epoch 001: 2171 / 3002 loss=2.892, ppl=7.42, wps=5823.4, ups=0.09, wpb=64935, bsz=128, num_updates=2152, lr=9.99908e-05, gnorm=2.458, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24045
2021-06-19 01:19:41 | INFO | train_inner | epoch 001: 2172 / 3002 loss=2.939, ppl=7.67, wps=5900.9, ups=0.09, wpb=64873, bsz=128, num_updates=2153, lr=9.99908e-05, gnorm=2.52, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24056
2021-06-19 01:19:52 | INFO | train_inner | epoch 001: 2173 / 3002 loss=2.971, ppl=7.84, wps=5848, ups=0.09, wpb=64745, bsz=128, num_updates=2154, lr=9.99908e-05, gnorm=2.469, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24067
2021-06-19 01:20:04 | INFO | train_inner | epoch 001: 2174 / 3002 loss=2.765, ppl=6.8, wps=5725.4, ups=0.09, wpb=64915, bsz=128, num_updates=2155, lr=9.99908e-05, gnorm=2.407, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24078
2021-06-19 01:20:15 | INFO | train_inner | epoch 001: 2175 / 3002 loss=2.825, ppl=7.09, wps=5905.2, ups=0.09, wpb=64799, bsz=128, num_updates=2156, lr=9.99908e-05, gnorm=2.544, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24089
2021-06-19 01:20:26 | INFO | train_inner | epoch 001: 2176 / 3002 loss=2.805, ppl=6.99, wps=5775.6, ups=0.09, wpb=64810, bsz=128, num_updates=2157, lr=9.99907e-05, gnorm=2.454, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24100
2021-06-19 01:20:37 | INFO | train_inner | epoch 001: 2177 / 3002 loss=2.902, ppl=7.47, wps=5884.4, ups=0.09, wpb=64879, bsz=128, num_updates=2158, lr=9.99907e-05, gnorm=2.5, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24111
2021-06-19 01:20:48 | INFO | train_inner | epoch 001: 2178 / 3002 loss=3.016, ppl=8.09, wps=5836.4, ups=0.09, wpb=64884, bsz=128, num_updates=2159, lr=9.99907e-05, gnorm=2.589, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24122
2021-06-19 01:20:59 | INFO | train_inner | epoch 001: 2179 / 3002 loss=3.031, ppl=8.18, wps=5909.1, ups=0.09, wpb=64776, bsz=128, num_updates=2160, lr=9.99907e-05, gnorm=2.555, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=24133
2021-06-19 01:21:10 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25
2021-06-19 01:21:21 | INFO | train_inner | epoch 001: 2181 / 3002 loss=2.866, ppl=7.29, wps=2945.7, ups=0.05, wpb=64827, bsz=128, num_updates=2161, lr=9.99907e-05, gnorm=2.615, loss_scale=0.25, train_wall=21, gb_free=2.8, wall=24155
2021-06-19 01:21:32 | INFO | train_inner | epoch 001: 2182 / 3002 loss=2.847, ppl=7.19, wps=5850.8, ups=0.09, wpb=64781, bsz=128, num_updates=2162, lr=9.99907e-05, gnorm=2.411, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24166
2021-06-19 01:21:43 | INFO | train_inner | epoch 001: 2183 / 3002 loss=3.087, ppl=8.5, wps=6065.9, ups=0.09, wpb=64973, bsz=128, num_updates=2163, lr=9.99907e-05, gnorm=2.401, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24177
2021-06-19 01:21:54 | INFO | train_inner | epoch 001: 2184 / 3002 loss=3.011, ppl=8.06, wps=5875.1, ups=0.09, wpb=64762, bsz=128, num_updates=2164, lr=9.99907e-05, gnorm=2.464, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24188
2021-06-19 01:22:05 | INFO | train_inner | epoch 001: 2185 / 3002 loss=2.878, ppl=7.35, wps=5872.6, ups=0.09, wpb=64781, bsz=128, num_updates=2165, lr=9.99907e-05, gnorm=2.479, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24199
2021-06-19 01:22:16 | INFO | train_inner | epoch 001: 2186 / 3002 loss=3.03, ppl=8.17, wps=5937.7, ups=0.09, wpb=64865, bsz=128, num_updates=2166, lr=9.99907e-05, gnorm=2.577, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24210
2021-06-19 01:22:27 | INFO | train_inner | epoch 001: 2187 / 3002 loss=2.914, ppl=7.54, wps=5941.8, ups=0.09, wpb=64817, bsz=128, num_updates=2167, lr=9.99907e-05, gnorm=2.689, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24221
2021-06-19 01:22:38 | INFO | train_inner | epoch 001: 2188 / 3002 loss=2.904, ppl=7.49, wps=5904.8, ups=0.09, wpb=64874, bsz=128, num_updates=2168, lr=9.99907e-05, gnorm=2.494, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24232
2021-06-19 01:22:49 | INFO | train_inner | epoch 001: 2189 / 3002 loss=2.745, ppl=6.71, wps=5749.9, ups=0.09, wpb=64768, bsz=128, num_updates=2169, lr=9.99906e-05, gnorm=2.338, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24243
2021-06-19 01:23:00 | INFO | train_inner | epoch 001: 2190 / 3002 loss=3.015, ppl=8.09, wps=5899.6, ups=0.09, wpb=64829, bsz=128, num_updates=2170, lr=9.99906e-05, gnorm=2.382, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24254
2021-06-19 01:23:11 | INFO | train_inner | epoch 001: 2191 / 3002 loss=2.914, ppl=7.53, wps=5860.5, ups=0.09, wpb=64867, bsz=128, num_updates=2171, lr=9.99906e-05, gnorm=2.47, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24265
2021-06-19 01:23:22 | INFO | train_inner | epoch 001: 2192 / 3002 loss=3.066, ppl=8.37, wps=5981.4, ups=0.09, wpb=64778, bsz=128, num_updates=2172, lr=9.99906e-05, gnorm=2.637, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24276
2021-06-19 01:23:33 | INFO | train_inner | epoch 001: 2193 / 3002 loss=2.925, ppl=7.59, wps=5839.7, ups=0.09, wpb=64890, bsz=128, num_updates=2173, lr=9.99906e-05, gnorm=2.55, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24287
2021-06-19 01:23:44 | INFO | train_inner | epoch 001: 2194 / 3002 loss=2.944, ppl=7.7, wps=5878.8, ups=0.09, wpb=64812, bsz=128, num_updates=2174, lr=9.99906e-05, gnorm=2.464, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24298
2021-06-19 01:23:55 | INFO | train_inner | epoch 001: 2195 / 3002 loss=2.884, ppl=7.38, wps=5967.6, ups=0.09, wpb=64861, bsz=128, num_updates=2175, lr=9.99906e-05, gnorm=2.42, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24309
2021-06-19 01:24:06 | INFO | train_inner | epoch 001: 2196 / 3002 loss=3.026, ppl=8.14, wps=5758, ups=0.09, wpb=64809, bsz=128, num_updates=2176, lr=9.99906e-05, gnorm=2.292, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24320
2021-06-19 01:24:17 | INFO | train_inner | epoch 001: 2197 / 3002 loss=2.873, ppl=7.33, wps=5940.4, ups=0.09, wpb=64907, bsz=128, num_updates=2177, lr=9.99906e-05, gnorm=2.409, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24331
2021-06-19 01:24:28 | INFO | train_inner | epoch 001: 2198 / 3002 loss=2.815, ppl=7.04, wps=5768.9, ups=0.09, wpb=64810, bsz=128, num_updates=2178, lr=9.99906e-05, gnorm=7.06, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24343
2021-06-19 01:24:40 | INFO | train_inner | epoch 001: 2199 / 3002 loss=2.896, ppl=7.45, wps=5761.8, ups=0.09, wpb=64809, bsz=128, num_updates=2179, lr=9.99906e-05, gnorm=2.405, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24354
2021-06-19 01:24:51 | INFO | train_inner | epoch 001: 2200 / 3002 loss=2.937, ppl=7.66, wps=5870.9, ups=0.09, wpb=64880, bsz=128, num_updates=2180, lr=9.99906e-05, gnorm=2.614, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24365
2021-06-19 01:25:02 | INFO | train_inner | epoch 001: 2201 / 3002 loss=2.862, ppl=7.27, wps=5797.9, ups=0.09, wpb=64821, bsz=128, num_updates=2181, lr=9.99906e-05, gnorm=2.376, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24376
2021-06-19 01:25:13 | INFO | train_inner | epoch 001: 2202 / 3002 loss=2.965, ppl=7.81, wps=5916.2, ups=0.09, wpb=64855, bsz=128, num_updates=2182, lr=9.99905e-05, gnorm=2.487, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24387
2021-06-19 01:25:24 | INFO | train_inner | epoch 001: 2203 / 3002 loss=2.931, ppl=7.63, wps=5978.3, ups=0.09, wpb=64907, bsz=128, num_updates=2183, lr=9.99905e-05, gnorm=2.486, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24398
2021-06-19 01:25:35 | INFO | train_inner | epoch 001: 2204 / 3002 loss=2.945, ppl=7.7, wps=5785.3, ups=0.09, wpb=64787, bsz=128, num_updates=2184, lr=9.99905e-05, gnorm=2.322, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24409
2021-06-19 01:25:46 | INFO | train_inner | epoch 001: 2205 / 3002 loss=3.013, ppl=8.07, wps=5848.5, ups=0.09, wpb=64819, bsz=128, num_updates=2185, lr=9.99905e-05, gnorm=2.424, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24420
2021-06-19 01:25:57 | INFO | train_inner | epoch 001: 2206 / 3002 loss=3.048, ppl=8.27, wps=5775.6, ups=0.09, wpb=64776, bsz=128, num_updates=2186, lr=9.99905e-05, gnorm=2.402, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24431
2021-06-19 01:26:08 | INFO | train_inner | epoch 001: 2207 / 3002 loss=3.161, ppl=8.94, wps=5873, ups=0.09, wpb=64839, bsz=128, num_updates=2187, lr=9.99905e-05, gnorm=2.611, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24442
2021-06-19 01:26:19 | INFO | train_inner | epoch 001: 2208 / 3002 loss=2.916, ppl=7.55, wps=5997, ups=0.09, wpb=64830, bsz=128, num_updates=2188, lr=9.99905e-05, gnorm=2.534, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24453
2021-06-19 01:26:30 | INFO | train_inner | epoch 001: 2209 / 3002 loss=2.882, ppl=7.37, wps=5852.7, ups=0.09, wpb=64871, bsz=128, num_updates=2189, lr=9.99905e-05, gnorm=2.562, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24464
2021-06-19 01:26:41 | INFO | train_inner | epoch 001: 2210 / 3002 loss=2.93, ppl=7.62, wps=5799.8, ups=0.09, wpb=64808, bsz=128, num_updates=2190, lr=9.99905e-05, gnorm=2.478, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24476
2021-06-19 01:26:52 | INFO | train_inner | epoch 001: 2211 / 3002 loss=2.847, ppl=7.2, wps=5957.2, ups=0.09, wpb=64809, bsz=128, num_updates=2191, lr=9.99905e-05, gnorm=4.216, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24486
2021-06-19 01:27:03 | INFO | train_inner | epoch 001: 2212 / 3002 loss=2.84, ppl=7.16, wps=5774.1, ups=0.09, wpb=64821, bsz=128, num_updates=2192, lr=9.99905e-05, gnorm=2.8, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24498
2021-06-19 01:27:14 | INFO | train_inner | epoch 001: 2213 / 3002 loss=3.032, ppl=8.18, wps=5941.8, ups=0.09, wpb=64807, bsz=128, num_updates=2193, lr=9.99905e-05, gnorm=2.465, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24509
2021-06-19 01:27:25 | INFO | train_inner | epoch 001: 2214 / 3002 loss=2.928, ppl=7.61, wps=5823.5, ups=0.09, wpb=64818, bsz=128, num_updates=2194, lr=9.99904e-05, gnorm=2.611, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24520
2021-06-19 01:27:36 | INFO | train_inner | epoch 001: 2215 / 3002 loss=2.885, ppl=7.39, wps=5879.3, ups=0.09, wpb=64831, bsz=128, num_updates=2195, lr=9.99904e-05, gnorm=2.906, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24531
2021-06-19 01:27:47 | INFO | train_inner | epoch 001: 2216 / 3002 loss=2.92, ppl=7.57, wps=5876.6, ups=0.09, wpb=64791, bsz=128, num_updates=2196, lr=9.99904e-05, gnorm=2.514, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24542
2021-06-19 01:27:59 | INFO | train_inner | epoch 001: 2217 / 3002 loss=2.708, ppl=6.53, wps=5799.7, ups=0.09, wpb=64808, bsz=128, num_updates=2197, lr=9.99904e-05, gnorm=2.372, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24553
2021-06-19 01:28:09 | INFO | train_inner | epoch 001: 2218 / 3002 loss=3.015, ppl=8.08, wps=5965.5, ups=0.09, wpb=64802, bsz=128, num_updates=2198, lr=9.99904e-05, gnorm=2.458, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24564
2021-06-19 01:28:21 | INFO | train_inner | epoch 001: 2219 / 3002 loss=3.047, ppl=8.26, wps=5776, ups=0.09, wpb=64797, bsz=128, num_updates=2199, lr=9.99904e-05, gnorm=2.952, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24575
2021-06-19 01:28:32 | INFO | train_inner | epoch 001: 2220 / 3002 loss=2.799, ppl=6.96, wps=5879.4, ups=0.09, wpb=64922, bsz=128, num_updates=2200, lr=9.99904e-05, gnorm=2.428, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24586
2021-06-19 01:28:43 | INFO | train_inner | epoch 001: 2221 / 3002 loss=2.911, ppl=7.52, wps=5985.3, ups=0.09, wpb=64881, bsz=128, num_updates=2201, lr=9.99904e-05, gnorm=2.544, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24597
2021-06-19 01:28:53 | INFO | train_inner | epoch 001: 2222 / 3002 loss=2.856, ppl=7.24, wps=5978.9, ups=0.09, wpb=64842, bsz=128, num_updates=2202, lr=9.99904e-05, gnorm=2.526, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24608
2021-06-19 01:29:04 | INFO | train_inner | epoch 001: 2223 / 3002 loss=2.975, ppl=7.86, wps=5874.4, ups=0.09, wpb=64863, bsz=128, num_updates=2203, lr=9.99904e-05, gnorm=2.441, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24619
2021-06-19 01:29:15 | INFO | train_inner | epoch 001: 2224 / 3002 loss=2.791, ppl=6.92, wps=5854.7, ups=0.09, wpb=64836, bsz=128, num_updates=2204, lr=9.99904e-05, gnorm=2.395, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24630
2021-06-19 01:29:26 | INFO | train_inner | epoch 001: 2225 / 3002 loss=2.958, ppl=7.77, wps=5939.5, ups=0.09, wpb=64861, bsz=128, num_updates=2205, lr=9.99904e-05, gnorm=2.513, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24641
2021-06-19 01:29:38 | INFO | train_inner | epoch 001: 2226 / 3002 loss=3.103, ppl=8.59, wps=5801.2, ups=0.09, wpb=64785, bsz=128, num_updates=2206, lr=9.99904e-05, gnorm=2.693, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24652
2021-06-19 01:29:48 | INFO | train_inner | epoch 001: 2227 / 3002 loss=2.908, ppl=7.51, wps=5993.2, ups=0.09, wpb=64818, bsz=128, num_updates=2207, lr=9.99903e-05, gnorm=2.356, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24663
2021-06-19 01:30:00 | INFO | train_inner | epoch 001: 2228 / 3002 loss=2.727, ppl=6.62, wps=5830.6, ups=0.09, wpb=64855, bsz=128, num_updates=2208, lr=9.99903e-05, gnorm=2.412, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24674
2021-06-19 01:30:11 | INFO | train_inner | epoch 001: 2229 / 3002 loss=3.021, ppl=8.12, wps=5846.5, ups=0.09, wpb=64883, bsz=128, num_updates=2209, lr=9.99903e-05, gnorm=86.127, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24685
2021-06-19 01:30:22 | INFO | train_inner | epoch 001: 2230 / 3002 loss=2.969, ppl=7.83, wps=5889.3, ups=0.09, wpb=64814, bsz=128, num_updates=2210, lr=9.99903e-05, gnorm=2.364, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24696
2021-06-19 01:30:33 | INFO | train_inner | epoch 001: 2231 / 3002 loss=2.903, ppl=7.48, wps=5721.9, ups=0.09, wpb=64771, bsz=128, num_updates=2211, lr=9.99903e-05, gnorm=2.86, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24707
2021-06-19 01:30:44 | INFO | train_inner | epoch 001: 2232 / 3002 loss=2.928, ppl=7.61, wps=5917.8, ups=0.09, wpb=64860, bsz=128, num_updates=2212, lr=9.99903e-05, gnorm=2.883, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24718
2021-06-19 01:30:55 | INFO | train_inner | epoch 001: 2233 / 3002 loss=2.909, ppl=7.51, wps=5888.1, ups=0.09, wpb=64880, bsz=128, num_updates=2213, lr=9.99903e-05, gnorm=2.698, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24729
2021-06-19 01:31:06 | INFO | train_inner | epoch 001: 2234 / 3002 loss=2.827, ppl=7.09, wps=5766.2, ups=0.09, wpb=64845, bsz=128, num_updates=2214, lr=9.99903e-05, gnorm=2.953, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24741
2021-06-19 01:31:17 | INFO | train_inner | epoch 001: 2235 / 3002 loss=2.893, ppl=7.43, wps=5874.1, ups=0.09, wpb=64829, bsz=128, num_updates=2215, lr=9.99903e-05, gnorm=3.006, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24752
2021-06-19 01:31:28 | INFO | train_inner | epoch 001: 2236 / 3002 loss=2.862, ppl=7.27, wps=5956.2, ups=0.09, wpb=64845, bsz=128, num_updates=2216, lr=9.99903e-05, gnorm=4.227, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24762
2021-06-19 01:31:39 | INFO | train_inner | epoch 001: 2237 / 3002 loss=2.811, ppl=7.02, wps=5855.7, ups=0.09, wpb=64875, bsz=128, num_updates=2217, lr=9.99903e-05, gnorm=3.145, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24774
2021-06-19 01:31:50 | INFO | train_inner | epoch 001: 2238 / 3002 loss=3.029, ppl=8.16, wps=5756.4, ups=0.09, wpb=64801, bsz=128, num_updates=2218, lr=9.99903e-05, gnorm=6.11, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24785
2021-06-19 01:32:01 | INFO | train_inner | epoch 001: 2239 / 3002 loss=3.203, ppl=9.21, wps=5926.9, ups=0.09, wpb=64828, bsz=128, num_updates=2219, lr=9.99902e-05, gnorm=4.395, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24796
2021-06-19 01:32:12 | INFO | train_inner | epoch 001: 2240 / 3002 loss=3.091, ppl=8.52, wps=5888.8, ups=0.09, wpb=64826, bsz=128, num_updates=2220, lr=9.99902e-05, gnorm=9.295, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24807
2021-06-19 01:32:23 | INFO | train_inner | epoch 001: 2241 / 3002 loss=2.942, ppl=7.68, wps=5872.8, ups=0.09, wpb=64769, bsz=128, num_updates=2221, lr=9.99902e-05, gnorm=3.979, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24818
2021-06-19 01:32:35 | INFO | train_inner | epoch 001: 2242 / 3002 loss=2.845, ppl=7.18, wps=5752.3, ups=0.09, wpb=64885, bsz=128, num_updates=2222, lr=9.99902e-05, gnorm=5.865, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24829
2021-06-19 01:32:46 | INFO | train_inner | epoch 001: 2243 / 3002 loss=2.919, ppl=7.56, wps=5857.9, ups=0.09, wpb=64911, bsz=128, num_updates=2223, lr=9.99902e-05, gnorm=4.606, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24840
2021-06-19 01:32:57 | INFO | train_inner | epoch 001: 2244 / 3002 loss=2.94, ppl=7.67, wps=5836.3, ups=0.09, wpb=64757, bsz=128, num_updates=2224, lr=9.99902e-05, gnorm=3.518, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24851
2021-06-19 01:33:08 | INFO | train_inner | epoch 001: 2245 / 3002 loss=2.839, ppl=7.15, wps=5899.7, ups=0.09, wpb=64808, bsz=128, num_updates=2225, lr=9.99902e-05, gnorm=2.969, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24862
2021-06-19 01:33:19 | INFO | train_inner | epoch 001: 2246 / 3002 loss=2.933, ppl=7.64, wps=5861, ups=0.09, wpb=64768, bsz=128, num_updates=2226, lr=9.99902e-05, gnorm=2.684, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24873
2021-06-19 01:33:30 | INFO | train_inner | epoch 001: 2247 / 3002 loss=3.088, ppl=8.5, wps=5836.9, ups=0.09, wpb=64738, bsz=128, num_updates=2227, lr=9.99902e-05, gnorm=2.855, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24884
2021-06-19 01:33:41 | INFO | train_inner | epoch 001: 2248 / 3002 loss=3.02, ppl=8.11, wps=5867.1, ups=0.09, wpb=64783, bsz=128, num_updates=2228, lr=9.99902e-05, gnorm=2.612, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24895
2021-06-19 01:33:52 | INFO | train_inner | epoch 001: 2249 / 3002 loss=3.093, ppl=8.53, wps=5778.7, ups=0.09, wpb=64808, bsz=128, num_updates=2229, lr=9.99902e-05, gnorm=2.597, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24907
2021-06-19 01:34:03 | INFO | train_inner | epoch 001: 2250 / 3002 loss=3.074, ppl=8.42, wps=5866.3, ups=0.09, wpb=64766, bsz=128, num_updates=2230, lr=9.99902e-05, gnorm=5.796, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24918
2021-06-19 01:34:14 | INFO | train_inner | epoch 001: 2251 / 3002 loss=3.047, ppl=8.26, wps=5914.8, ups=0.09, wpb=64767, bsz=128, num_updates=2231, lr=9.99902e-05, gnorm=2.537, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24929
2021-06-19 01:34:25 | INFO | train_inner | epoch 001: 2252 / 3002 loss=3.045, ppl=8.25, wps=5782.4, ups=0.09, wpb=64756, bsz=128, num_updates=2232, lr=9.99901e-05, gnorm=2.551, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24940
2021-06-19 01:34:36 | INFO | train_inner | epoch 001: 2253 / 3002 loss=2.994, ppl=7.97, wps=5871.3, ups=0.09, wpb=64801, bsz=128, num_updates=2233, lr=9.99901e-05, gnorm=2.575, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24951
2021-06-19 01:34:48 | INFO | train_inner | epoch 001: 2254 / 3002 loss=3.034, ppl=8.19, wps=5836.2, ups=0.09, wpb=64829, bsz=128, num_updates=2234, lr=9.99901e-05, gnorm=2.518, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24962
2021-06-19 01:34:59 | INFO | train_inner | epoch 001: 2255 / 3002 loss=2.97, ppl=7.84, wps=5668.5, ups=0.09, wpb=64784, bsz=128, num_updates=2235, lr=9.99901e-05, gnorm=2.566, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24973
2021-06-19 01:35:10 | INFO | train_inner | epoch 001: 2256 / 3002 loss=3.046, ppl=8.26, wps=5941.1, ups=0.09, wpb=64875, bsz=128, num_updates=2236, lr=9.99901e-05, gnorm=3.583, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=24984
2021-06-19 01:35:21 | INFO | train_inner | epoch 001: 2257 / 3002 loss=2.905, ppl=7.49, wps=5849.2, ups=0.09, wpb=64810, bsz=128, num_updates=2237, lr=9.99901e-05, gnorm=2.444, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=24995
2021-06-19 01:35:32 | INFO | train_inner | epoch 001: 2258 / 3002 loss=3.18, ppl=9.06, wps=5710, ups=0.09, wpb=64815, bsz=128, num_updates=2238, lr=9.99901e-05, gnorm=2.572, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25007
2021-06-19 01:35:43 | INFO | train_inner | epoch 001: 2259 / 3002 loss=2.811, ppl=7.02, wps=5870.3, ups=0.09, wpb=64826, bsz=128, num_updates=2239, lr=9.99901e-05, gnorm=2.677, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25018
2021-06-19 01:35:55 | INFO | train_inner | epoch 001: 2260 / 3002 loss=2.916, ppl=7.55, wps=5788.2, ups=0.09, wpb=64742, bsz=128, num_updates=2240, lr=9.99901e-05, gnorm=2.479, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25029
2021-06-19 01:36:06 | INFO | train_inner | epoch 001: 2261 / 3002 loss=3.073, ppl=8.41, wps=5818, ups=0.09, wpb=64851, bsz=128, num_updates=2241, lr=9.99901e-05, gnorm=3.907, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25040
2021-06-19 01:36:17 | INFO | train_inner | epoch 001: 2262 / 3002 loss=3.038, ppl=8.21, wps=5998.4, ups=0.09, wpb=64808, bsz=128, num_updates=2242, lr=9.99901e-05, gnorm=2.542, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=25051
2021-06-19 01:36:28 | INFO | train_inner | epoch 001: 2263 / 3002 loss=2.96, ppl=7.78, wps=5832.3, ups=0.09, wpb=64800, bsz=128, num_updates=2243, lr=9.99901e-05, gnorm=2.792, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25062
2021-06-19 01:36:39 | INFO | train_inner | epoch 001: 2264 / 3002 loss=3.084, ppl=8.48, wps=5847.9, ups=0.09, wpb=64899, bsz=128, num_updates=2244, lr=9.999e-05, gnorm=2.531, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25073
2021-06-19 01:36:50 | INFO | train_inner | epoch 001: 2265 / 3002 loss=2.875, ppl=7.34, wps=5909.1, ups=0.09, wpb=64947, bsz=128, num_updates=2245, lr=9.999e-05, gnorm=2.436, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25084
2021-06-19 01:37:01 | INFO | train_inner | epoch 001: 2266 / 3002 loss=3.038, ppl=8.21, wps=5753.5, ups=0.09, wpb=64761, bsz=128, num_updates=2246, lr=9.999e-05, gnorm=2.579, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25095
2021-06-19 01:37:12 | INFO | train_inner | epoch 001: 2267 / 3002 loss=3.009, ppl=8.05, wps=5857.3, ups=0.09, wpb=64781, bsz=128, num_updates=2247, lr=9.999e-05, gnorm=2.448, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25106
2021-06-19 01:37:23 | INFO | train_inner | epoch 001: 2268 / 3002 loss=2.877, ppl=7.35, wps=5873.5, ups=0.09, wpb=64883, bsz=128, num_updates=2248, lr=9.999e-05, gnorm=2.467, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25117
2021-06-19 01:37:34 | INFO | train_inner | epoch 001: 2269 / 3002 loss=2.929, ppl=7.61, wps=5867.8, ups=0.09, wpb=64851, bsz=128, num_updates=2249, lr=9.999e-05, gnorm=2.439, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25128
2021-06-19 01:37:45 | INFO | train_inner | epoch 001: 2270 / 3002 loss=2.978, ppl=7.88, wps=5880.8, ups=0.09, wpb=64820, bsz=128, num_updates=2250, lr=9.999e-05, gnorm=2.414, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25140
2021-06-19 01:37:56 | INFO | train_inner | epoch 001: 2271 / 3002 loss=2.929, ppl=7.61, wps=5892, ups=0.09, wpb=64883, bsz=128, num_updates=2251, lr=9.999e-05, gnorm=2.604, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25151
2021-06-19 01:38:07 | INFO | train_inner | epoch 001: 2272 / 3002 loss=3.018, ppl=8.1, wps=5830.3, ups=0.09, wpb=64878, bsz=128, num_updates=2252, lr=9.999e-05, gnorm=2.543, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25162
2021-06-19 01:38:18 | INFO | train_inner | epoch 001: 2273 / 3002 loss=3.041, ppl=8.23, wps=5961.8, ups=0.09, wpb=64801, bsz=128, num_updates=2253, lr=9.999e-05, gnorm=2.635, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=25173
2021-06-19 01:38:29 | INFO | train_inner | epoch 001: 2274 / 3002 loss=2.893, ppl=7.43, wps=5900.3, ups=0.09, wpb=64851, bsz=128, num_updates=2254, lr=9.999e-05, gnorm=2.403, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25184
2021-06-19 01:38:40 | INFO | train_inner | epoch 001: 2275 / 3002 loss=3.152, ppl=8.89, wps=5838.3, ups=0.09, wpb=64804, bsz=128, num_updates=2255, lr=9.999e-05, gnorm=2.432, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25195
2021-06-19 01:38:51 | INFO | train_inner | epoch 001: 2276 / 3002 loss=2.899, ppl=7.46, wps=5833, ups=0.09, wpb=64885, bsz=128, num_updates=2256, lr=9.999e-05, gnorm=2.558, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25206
2021-06-19 01:39:03 | INFO | train_inner | epoch 001: 2277 / 3002 loss=2.97, ppl=7.84, wps=5774.6, ups=0.09, wpb=64770, bsz=128, num_updates=2257, lr=9.99899e-05, gnorm=2.506, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25217
2021-06-19 01:39:14 | INFO | train_inner | epoch 001: 2278 / 3002 loss=2.943, ppl=7.69, wps=5711.7, ups=0.09, wpb=64814, bsz=128, num_updates=2258, lr=9.99899e-05, gnorm=3.467, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25228
2021-06-19 01:39:25 | INFO | train_inner | epoch 001: 2279 / 3002 loss=2.98, ppl=7.89, wps=5794.9, ups=0.09, wpb=64738, bsz=128, num_updates=2259, lr=9.99899e-05, gnorm=2.482, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25239
2021-06-19 01:39:36 | INFO | train_inner | epoch 001: 2280 / 3002 loss=3.129, ppl=8.75, wps=5787, ups=0.09, wpb=64756, bsz=128, num_updates=2260, lr=9.99899e-05, gnorm=2.565, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25251
2021-06-19 01:39:47 | INFO | train_inner | epoch 001: 2281 / 3002 loss=2.906, ppl=7.49, wps=5866.5, ups=0.09, wpb=64728, bsz=128, num_updates=2261, lr=9.99899e-05, gnorm=2.52, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25262
2021-06-19 01:39:58 | INFO | train_inner | epoch 001: 2282 / 3002 loss=2.841, ppl=7.17, wps=5859.7, ups=0.09, wpb=64913, bsz=128, num_updates=2262, lr=9.99899e-05, gnorm=2.475, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25273
2021-06-19 01:40:10 | INFO | train_inner | epoch 001: 2283 / 3002 loss=2.969, ppl=7.83, wps=5800, ups=0.09, wpb=64845, bsz=128, num_updates=2263, lr=9.99899e-05, gnorm=2.529, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25284
2021-06-19 01:40:21 | INFO | train_inner | epoch 001: 2284 / 3002 loss=2.954, ppl=7.75, wps=5841.8, ups=0.09, wpb=64831, bsz=128, num_updates=2264, lr=9.99899e-05, gnorm=2.408, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25295
2021-06-19 01:40:32 | INFO | train_inner | epoch 001: 2285 / 3002 loss=3.228, ppl=9.37, wps=5792.9, ups=0.09, wpb=64778, bsz=128, num_updates=2265, lr=9.99899e-05, gnorm=2.447, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25306
2021-06-19 01:40:43 | INFO | train_inner | epoch 001: 2286 / 3002 loss=2.837, ppl=7.14, wps=5843.5, ups=0.09, wpb=64832, bsz=128, num_updates=2266, lr=9.99899e-05, gnorm=2.352, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25317
2021-06-19 01:40:54 | INFO | train_inner | epoch 001: 2287 / 3002 loss=2.975, ppl=7.86, wps=5777.8, ups=0.09, wpb=64780, bsz=128, num_updates=2267, lr=9.99899e-05, gnorm=3.604, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25329
2021-06-19 01:41:05 | INFO | train_inner | epoch 001: 2288 / 3002 loss=3.014, ppl=8.08, wps=5824.3, ups=0.09, wpb=64831, bsz=128, num_updates=2268, lr=9.99899e-05, gnorm=2.455, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25340
2021-06-19 01:41:16 | INFO | train_inner | epoch 001: 2289 / 3002 loss=2.819, ppl=7.06, wps=5846.5, ups=0.09, wpb=64912, bsz=128, num_updates=2269, lr=9.99898e-05, gnorm=2.326, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25351
2021-06-19 01:41:28 | INFO | train_inner | epoch 001: 2290 / 3002 loss=3.072, ppl=8.41, wps=5821.3, ups=0.09, wpb=64760, bsz=128, num_updates=2270, lr=9.99898e-05, gnorm=2.679, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25362
2021-06-19 01:41:39 | INFO | train_inner | epoch 001: 2291 / 3002 loss=2.903, ppl=7.48, wps=5784.3, ups=0.09, wpb=64632, bsz=128, num_updates=2271, lr=9.99898e-05, gnorm=2.464, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25373
2021-06-19 01:41:50 | INFO | train_inner | epoch 001: 2292 / 3002 loss=2.934, ppl=7.64, wps=5902.4, ups=0.09, wpb=64802, bsz=128, num_updates=2272, lr=9.99898e-05, gnorm=2.508, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25384
2021-06-19 01:42:01 | INFO | train_inner | epoch 001: 2293 / 3002 loss=3.023, ppl=8.13, wps=5770.9, ups=0.09, wpb=64793, bsz=128, num_updates=2273, lr=9.99898e-05, gnorm=2.393, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25395
2021-06-19 01:42:12 | INFO | train_inner | epoch 001: 2294 / 3002 loss=2.976, ppl=7.87, wps=5868.9, ups=0.09, wpb=64770, bsz=128, num_updates=2274, lr=9.99898e-05, gnorm=2.643, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25406
2021-06-19 01:42:23 | INFO | train_inner | epoch 001: 2295 / 3002 loss=3.108, ppl=8.62, wps=5917.8, ups=0.09, wpb=64753, bsz=128, num_updates=2275, lr=9.99898e-05, gnorm=4.927, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=25417
2021-06-19 01:42:34 | INFO | train_inner | epoch 001: 2296 / 3002 loss=2.931, ppl=7.63, wps=5839.5, ups=0.09, wpb=64772, bsz=128, num_updates=2276, lr=9.99898e-05, gnorm=2.43, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25428
2021-06-19 01:42:45 | INFO | train_inner | epoch 001: 2297 / 3002 loss=2.882, ppl=7.37, wps=5816.5, ups=0.09, wpb=64869, bsz=128, num_updates=2277, lr=9.99898e-05, gnorm=2.276, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25440
2021-06-19 01:42:56 | INFO | train_inner | epoch 001: 2298 / 3002 loss=2.906, ppl=7.5, wps=5830.9, ups=0.09, wpb=64732, bsz=128, num_updates=2278, lr=9.99898e-05, gnorm=2.345, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25451
2021-06-19 01:43:07 | INFO | train_inner | epoch 001: 2299 / 3002 loss=2.874, ppl=7.33, wps=5840.7, ups=0.09, wpb=64790, bsz=128, num_updates=2279, lr=9.99898e-05, gnorm=2.396, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25462
2021-06-19 01:43:18 | INFO | train_inner | epoch 001: 2300 / 3002 loss=2.867, ppl=7.3, wps=5841.3, ups=0.09, wpb=64733, bsz=128, num_updates=2280, lr=9.99898e-05, gnorm=2.561, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25473
2021-06-19 01:43:30 | INFO | train_inner | epoch 001: 2301 / 3002 loss=2.929, ppl=7.62, wps=5810.4, ups=0.09, wpb=64738, bsz=128, num_updates=2281, lr=9.99898e-05, gnorm=2.448, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25484
2021-06-19 01:43:41 | INFO | train_inner | epoch 001: 2302 / 3002 loss=2.919, ppl=7.56, wps=5785.1, ups=0.09, wpb=64854, bsz=128, num_updates=2282, lr=9.99897e-05, gnorm=2.378, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25495
2021-06-19 01:43:52 | INFO | train_inner | epoch 001: 2303 / 3002 loss=2.901, ppl=7.47, wps=5873, ups=0.09, wpb=64788, bsz=128, num_updates=2283, lr=9.99897e-05, gnorm=2.536, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25506
2021-06-19 01:44:03 | INFO | train_inner | epoch 001: 2304 / 3002 loss=2.959, ppl=7.78, wps=5844, ups=0.09, wpb=64841, bsz=128, num_updates=2284, lr=9.99897e-05, gnorm=2.465, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25517
2021-06-19 01:44:14 | INFO | train_inner | epoch 001: 2305 / 3002 loss=2.869, ppl=7.3, wps=5976.7, ups=0.09, wpb=64824, bsz=128, num_updates=2285, lr=9.99897e-05, gnorm=2.386, loss_scale=0.25, train_wall=10, gb_free=2.8, wall=25528
2021-06-19 01:44:25 | INFO | train_inner | epoch 001: 2306 / 3002 loss=2.945, ppl=7.7, wps=5806.6, ups=0.09, wpb=64875, bsz=128, num_updates=2286, lr=9.99897e-05, gnorm=2.355, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25539
2021-06-19 01:44:36 | INFO | train_inner | epoch 001: 2307 / 3002 loss=2.827, ppl=7.1, wps=5838, ups=0.09, wpb=64816, bsz=128, num_updates=2287, lr=9.99897e-05, gnorm=2.293, loss_scale=0.25, train_wall=11, gb_free=2.8, wall=25550
2021-06-19 01:44:47 | INFO | train_inner | epoch 001: 2308 / 3002 loss=3.052, ppl=8.3, wps=5863.1, ups=0.09, wpb=64832, bsz=128, num_updates=2288, lr=9.99897e-05, gnorm=2.288, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25561
2021-06-19 01:44:58 | INFO | train_inner | epoch 001: 2309 / 3002 loss=2.986, ppl=7.92, wps=5895.3, ups=0.09, wpb=64784, bsz=128, num_updates=2289, lr=9.99897e-05, gnorm=2.394, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25572
2021-06-19 01:45:09 | INFO | train_inner | epoch 001: 2310 / 3002 loss=2.88, ppl=7.36, wps=5876.8, ups=0.09, wpb=64870, bsz=128, num_updates=2290, lr=9.99897e-05, gnorm=2.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25583
2021-06-19 01:45:20 | INFO | train_inner | epoch 001: 2311 / 3002 loss=2.86, ppl=7.26, wps=5828.4, ups=0.09, wpb=64827, bsz=128, num_updates=2291, lr=9.99897e-05, gnorm=3.624, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25595
2021-06-19 01:45:31 | INFO | train_inner | epoch 001: 2312 / 3002 loss=2.869, ppl=7.3, wps=5888.7, ups=0.09, wpb=64838, bsz=128, num_updates=2292, lr=9.99897e-05, gnorm=2.431, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25606
2021-06-19 01:45:42 | INFO | train_inner | epoch 001: 2313 / 3002 loss=2.986, ppl=7.92, wps=5916.2, ups=0.09, wpb=64864, bsz=128, num_updates=2293, lr=9.99897e-05, gnorm=2.456, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25617
2021-06-19 01:45:53 | INFO | train_inner | epoch 001: 2314 / 3002 loss=2.885, ppl=7.39, wps=5868.7, ups=0.09, wpb=64810, bsz=128, num_updates=2294, lr=9.99896e-05, gnorm=2.444, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25628
2021-06-19 01:46:04 | INFO | train_inner | epoch 001: 2315 / 3002 loss=2.988, ppl=7.93, wps=5822.4, ups=0.09, wpb=64832, bsz=128, num_updates=2295, lr=9.99896e-05, gnorm=2.526, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25639
2021-06-19 01:46:16 | INFO | train_inner | epoch 001: 2316 / 3002 loss=2.845, ppl=7.19, wps=5843.4, ups=0.09, wpb=64896, bsz=128, num_updates=2296, lr=9.99896e-05, gnorm=2.323, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25650
2021-06-19 01:46:26 | INFO | train_inner | epoch 001: 2317 / 3002 loss=2.996, ppl=7.98, wps=5929, ups=0.09, wpb=64837, bsz=128, num_updates=2297, lr=9.99896e-05, gnorm=2.387, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25661
2021-06-19 01:46:37 | INFO | train_inner | epoch 001: 2318 / 3002 loss=2.998, ppl=7.99, wps=5895.6, ups=0.09, wpb=64797, bsz=128, num_updates=2298, lr=9.99896e-05, gnorm=21.177, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25672
2021-06-19 01:46:48 | INFO | train_inner | epoch 001: 2319 / 3002 loss=2.982, ppl=7.9, wps=5929.2, ups=0.09, wpb=64834, bsz=128, num_updates=2299, lr=9.99896e-05, gnorm=2.28, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25683
2021-06-19 01:47:00 | INFO | train_inner | epoch 001: 2320 / 3002 loss=2.947, ppl=7.71, wps=5748.6, ups=0.09, wpb=64740, bsz=128, num_updates=2300, lr=9.99896e-05, gnorm=2.436, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25694
2021-06-19 01:47:11 | INFO | train_inner | epoch 001: 2321 / 3002 loss=2.873, ppl=7.33, wps=5838.9, ups=0.09, wpb=64788, bsz=128, num_updates=2301, lr=9.99896e-05, gnorm=2.588, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25705
2021-06-19 01:47:22 | INFO | train_inner | epoch 001: 2322 / 3002 loss=2.82, ppl=7.06, wps=5944.6, ups=0.09, wpb=64928, bsz=128, num_updates=2302, lr=9.99896e-05, gnorm=8.33, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25716
2021-06-19 01:47:33 | INFO | train_inner | epoch 001: 2323 / 3002 loss=2.973, ppl=7.85, wps=5792.3, ups=0.09, wpb=64760, bsz=128, num_updates=2303, lr=9.99896e-05, gnorm=2.415, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25727
2021-06-19 01:47:44 | INFO | train_inner | epoch 001: 2324 / 3002 loss=3.051, ppl=8.29, wps=5875.7, ups=0.09, wpb=64821, bsz=128, num_updates=2304, lr=9.99896e-05, gnorm=3.57, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25738
2021-06-19 01:47:55 | INFO | train_inner | epoch 001: 2325 / 3002 loss=3.18, ppl=9.06, wps=5732, ups=0.09, wpb=64776, bsz=128, num_updates=2305, lr=9.99896e-05, gnorm=2.467, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25750
2021-06-19 01:48:06 | INFO | train_inner | epoch 001: 2326 / 3002 loss=3.005, ppl=8.03, wps=5892.1, ups=0.09, wpb=64846, bsz=128, num_updates=2306, lr=9.99896e-05, gnorm=2.783, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25761
2021-06-19 01:48:17 | INFO | train_inner | epoch 001: 2327 / 3002 loss=2.941, ppl=7.68, wps=5953.5, ups=0.09, wpb=64798, bsz=128, num_updates=2307, lr=9.99895e-05, gnorm=2.431, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25771
2021-06-19 01:48:28 | INFO | train_inner | epoch 001: 2328 / 3002 loss=3.027, ppl=8.15, wps=5799.8, ups=0.09, wpb=64871, bsz=128, num_updates=2308, lr=9.99895e-05, gnorm=2.923, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25783
2021-06-19 01:48:39 | INFO | train_inner | epoch 001: 2329 / 3002 loss=2.914, ppl=7.54, wps=5894.6, ups=0.09, wpb=64809, bsz=128, num_updates=2309, lr=9.99895e-05, gnorm=2.501, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25794
2021-06-19 01:48:50 | INFO | train_inner | epoch 001: 2330 / 3002 loss=3.142, ppl=8.83, wps=5839.4, ups=0.09, wpb=64843, bsz=128, num_updates=2310, lr=9.99895e-05, gnorm=2.549, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25805
2021-06-19 01:49:01 | INFO | train_inner | epoch 001: 2331 / 3002 loss=2.977, ppl=7.88, wps=5968.6, ups=0.09, wpb=64782, bsz=128, num_updates=2311, lr=9.99895e-05, gnorm=2.489, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25816
2021-06-19 01:49:12 | INFO | train_inner | epoch 001: 2332 / 3002 loss=2.971, ppl=7.84, wps=5904.2, ups=0.09, wpb=64780, bsz=128, num_updates=2312, lr=9.99895e-05, gnorm=2.571, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25827
2021-06-19 01:49:23 | INFO | train_inner | epoch 001: 2333 / 3002 loss=2.839, ppl=7.15, wps=5921.4, ups=0.09, wpb=64830, bsz=128, num_updates=2313, lr=9.99895e-05, gnorm=2.392, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25837
2021-06-19 01:49:34 | INFO | train_inner | epoch 001: 2334 / 3002 loss=2.965, ppl=7.81, wps=5806.6, ups=0.09, wpb=64816, bsz=128, num_updates=2314, lr=9.99895e-05, gnorm=2.472, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25849
2021-06-19 01:49:45 | INFO | train_inner | epoch 001: 2335 / 3002 loss=2.922, ppl=7.58, wps=5855.3, ups=0.09, wpb=64872, bsz=128, num_updates=2315, lr=9.99895e-05, gnorm=2.508, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25860
2021-06-19 01:49:56 | INFO | train_inner | epoch 001: 2336 / 3002 loss=3.013, ppl=8.08, wps=5875.8, ups=0.09, wpb=64743, bsz=128, num_updates=2316, lr=9.99895e-05, gnorm=2.537, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25871
2021-06-19 01:50:08 | INFO | train_inner | epoch 001: 2337 / 3002 loss=2.909, ppl=7.51, wps=5795.2, ups=0.09, wpb=64877, bsz=128, num_updates=2317, lr=9.99895e-05, gnorm=2.495, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25882
2021-06-19 01:50:19 | INFO | train_inner | epoch 001: 2338 / 3002 loss=2.774, ppl=6.84, wps=5844.9, ups=0.09, wpb=64751, bsz=128, num_updates=2318, lr=9.99895e-05, gnorm=4.337, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25893
2021-06-19 01:50:30 | INFO | train_inner | epoch 001: 2339 / 3002 loss=2.853, ppl=7.22, wps=5846, ups=0.09, wpb=64809, bsz=128, num_updates=2319, lr=9.99894e-05, gnorm=2.36, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25904
2021-06-19 01:50:41 | INFO | train_inner | epoch 001: 2340 / 3002 loss=2.971, ppl=7.84, wps=5823.4, ups=0.09, wpb=64841, bsz=128, num_updates=2320, lr=9.99894e-05, gnorm=2.437, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25915
2021-06-19 01:50:52 | INFO | train_inner | epoch 001: 2341 / 3002 loss=2.86, ppl=7.26, wps=5903.4, ups=0.09, wpb=64902, bsz=128, num_updates=2321, lr=9.99894e-05, gnorm=2.364, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25926
2021-06-19 01:51:03 | INFO | train_inner | epoch 001: 2342 / 3002 loss=2.926, ppl=7.6, wps=5950.5, ups=0.09, wpb=64868, bsz=128, num_updates=2322, lr=9.99894e-05, gnorm=2.452, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25937
2021-06-19 01:51:14 | INFO | train_inner | epoch 001: 2343 / 3002 loss=2.899, ppl=7.46, wps=5876.4, ups=0.09, wpb=64807, bsz=128, num_updates=2323, lr=9.99894e-05, gnorm=6.751, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25948
2021-06-19 01:51:25 | INFO | train_inner | epoch 001: 2344 / 3002 loss=2.863, ppl=7.28, wps=5682.3, ups=0.09, wpb=64803, bsz=128, num_updates=2324, lr=9.99894e-05, gnorm=2.534, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25960
2021-06-19 01:51:36 | INFO | train_inner | epoch 001: 2345 / 3002 loss=2.844, ppl=7.18, wps=5856.4, ups=0.09, wpb=64859, bsz=128, num_updates=2325, lr=9.99894e-05, gnorm=4.598, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25971
2021-06-19 01:51:47 | INFO | train_inner | epoch 001: 2346 / 3002 loss=2.927, ppl=7.61, wps=5922.7, ups=0.09, wpb=64875, bsz=128, num_updates=2326, lr=9.99894e-05, gnorm=2.698, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=25982
2021-06-19 01:51:58 | INFO | train_inner | epoch 001: 2347 / 3002 loss=2.856, ppl=7.24, wps=5970.8, ups=0.09, wpb=64827, bsz=128, num_updates=2327, lr=9.99894e-05, gnorm=2.616, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=25992
2021-06-19 01:52:09 | INFO | train_inner | epoch 001: 2348 / 3002 loss=3.032, ppl=8.18, wps=5842.7, ups=0.09, wpb=64763, bsz=128, num_updates=2328, lr=9.99894e-05, gnorm=2.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26004
2021-06-19 01:52:20 | INFO | train_inner | epoch 001: 2349 / 3002 loss=2.79, ppl=6.92, wps=5824.2, ups=0.09, wpb=64776, bsz=128, num_updates=2329, lr=9.99894e-05, gnorm=2.453, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26015
2021-06-19 01:52:31 | INFO | train_inner | epoch 001: 2350 / 3002 loss=2.799, ppl=6.96, wps=5822.2, ups=0.09, wpb=64810, bsz=128, num_updates=2330, lr=9.99894e-05, gnorm=2.474, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26026
2021-06-19 01:52:43 | INFO | train_inner | epoch 001: 2351 / 3002 loss=2.67, ppl=6.36, wps=5812.2, ups=0.09, wpb=64799, bsz=128, num_updates=2331, lr=9.99894e-05, gnorm=2.323, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26037
2021-06-19 01:52:54 | INFO | train_inner | epoch 001: 2352 / 3002 loss=2.758, ppl=6.76, wps=5880.6, ups=0.09, wpb=64792, bsz=128, num_updates=2332, lr=9.99893e-05, gnorm=2.532, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26048
2021-06-19 01:53:05 | INFO | train_inner | epoch 001: 2353 / 3002 loss=2.84, ppl=7.16, wps=5915.7, ups=0.09, wpb=64931, bsz=128, num_updates=2333, lr=9.99893e-05, gnorm=2.406, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26059
2021-06-19 01:53:16 | INFO | train_inner | epoch 001: 2354 / 3002 loss=2.811, ppl=7.02, wps=5863.5, ups=0.09, wpb=64792, bsz=128, num_updates=2334, lr=9.99893e-05, gnorm=2.459, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26070
2021-06-19 01:53:27 | INFO | train_inner | epoch 001: 2355 / 3002 loss=2.975, ppl=7.86, wps=5775.9, ups=0.09, wpb=64841, bsz=128, num_updates=2335, lr=9.99893e-05, gnorm=2.475, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26081
2021-06-19 01:53:38 | INFO | train_inner | epoch 001: 2356 / 3002 loss=2.943, ppl=7.69, wps=5841.4, ups=0.09, wpb=64768, bsz=128, num_updates=2336, lr=9.99893e-05, gnorm=2.387, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26092
2021-06-19 01:53:49 | INFO | train_inner | epoch 001: 2357 / 3002 loss=3.066, ppl=8.37, wps=5914.5, ups=0.09, wpb=64725, bsz=128, num_updates=2337, lr=9.99893e-05, gnorm=2.499, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26103
2021-06-19 01:54:00 | INFO | train_inner | epoch 001: 2358 / 3002 loss=2.952, ppl=7.74, wps=5838.3, ups=0.09, wpb=64822, bsz=128, num_updates=2338, lr=9.99893e-05, gnorm=2.395, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26114
2021-06-19 01:54:11 | INFO | train_inner | epoch 001: 2359 / 3002 loss=2.784, ppl=6.89, wps=5782.1, ups=0.09, wpb=64819, bsz=128, num_updates=2339, lr=9.99893e-05, gnorm=2.434, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26126
2021-06-19 01:54:22 | INFO | train_inner | epoch 001: 2360 / 3002 loss=2.893, ppl=7.43, wps=5962.9, ups=0.09, wpb=64912, bsz=128, num_updates=2340, lr=9.99893e-05, gnorm=2.507, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26136
2021-06-19 01:54:33 | INFO | train_inner | epoch 001: 2361 / 3002 loss=2.904, ppl=7.48, wps=5902.9, ups=0.09, wpb=64853, bsz=128, num_updates=2341, lr=9.99893e-05, gnorm=2.347, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26147
2021-06-19 01:54:44 | INFO | train_inner | epoch 001: 2362 / 3002 loss=3.01, ppl=8.06, wps=5785.1, ups=0.09, wpb=64728, bsz=128, num_updates=2342, lr=9.99893e-05, gnorm=2.848, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26159
2021-06-19 01:54:55 | INFO | train_inner | epoch 001: 2363 / 3002 loss=3.035, ppl=8.19, wps=5833, ups=0.09, wpb=64873, bsz=128, num_updates=2343, lr=9.99893e-05, gnorm=2.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26170
2021-06-19 01:55:06 | INFO | train_inner | epoch 001: 2364 / 3002 loss=2.716, ppl=6.57, wps=5892.1, ups=0.09, wpb=64886, bsz=128, num_updates=2344, lr=9.99892e-05, gnorm=49.332, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26181
2021-06-19 01:55:18 | INFO | train_inner | epoch 001: 2365 / 3002 loss=2.948, ppl=7.72, wps=5825, ups=0.09, wpb=64846, bsz=128, num_updates=2345, lr=9.99892e-05, gnorm=2.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26192
2021-06-19 01:55:28 | INFO | train_inner | epoch 001: 2366 / 3002 loss=2.885, ppl=7.39, wps=5959.2, ups=0.09, wpb=64826, bsz=128, num_updates=2346, lr=9.99892e-05, gnorm=2.527, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26203
2021-06-19 01:55:40 | INFO | train_inner | epoch 001: 2367 / 3002 loss=2.799, ppl=6.96, wps=5840.6, ups=0.09, wpb=64837, bsz=128, num_updates=2347, lr=9.99892e-05, gnorm=2.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26214
2021-06-19 01:55:51 | INFO | train_inner | epoch 001: 2368 / 3002 loss=3.016, ppl=8.09, wps=5885.8, ups=0.09, wpb=64815, bsz=128, num_updates=2348, lr=9.99892e-05, gnorm=2.555, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26225
2021-06-19 01:56:02 | INFO | train_inner | epoch 001: 2369 / 3002 loss=2.948, ppl=7.72, wps=5812.9, ups=0.09, wpb=64847, bsz=128, num_updates=2349, lr=9.99892e-05, gnorm=3.71, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26236
2021-06-19 01:56:13 | INFO | train_inner | epoch 001: 2370 / 3002 loss=2.892, ppl=7.42, wps=5886, ups=0.09, wpb=64875, bsz=128, num_updates=2350, lr=9.99892e-05, gnorm=2.881, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26247
2021-06-19 01:56:24 | INFO | train_inner | epoch 001: 2371 / 3002 loss=3.133, ppl=8.77, wps=5798.8, ups=0.09, wpb=64777, bsz=128, num_updates=2351, lr=9.99892e-05, gnorm=2.581, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26258
2021-06-19 01:56:35 | INFO | train_inner | epoch 001: 2372 / 3002 loss=2.921, ppl=7.58, wps=5871.3, ups=0.09, wpb=64823, bsz=128, num_updates=2352, lr=9.99892e-05, gnorm=3.693, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26269
2021-06-19 01:56:46 | INFO | train_inner | epoch 001: 2373 / 3002 loss=2.912, ppl=7.53, wps=5854.4, ups=0.09, wpb=64833, bsz=128, num_updates=2353, lr=9.99892e-05, gnorm=3.514, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26280
2021-06-19 01:56:57 | INFO | train_inner | epoch 001: 2374 / 3002 loss=3.068, ppl=8.39, wps=5763.7, ups=0.09, wpb=64855, bsz=128, num_updates=2354, lr=9.99892e-05, gnorm=2.602, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26292
2021-06-19 01:57:08 | INFO | train_inner | epoch 001: 2375 / 3002 loss=2.962, ppl=7.79, wps=5838.4, ups=0.09, wpb=64864, bsz=128, num_updates=2355, lr=9.99892e-05, gnorm=2.821, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26303
2021-06-19 01:57:19 | INFO | train_inner | epoch 001: 2376 / 3002 loss=3.035, ppl=8.19, wps=5912, ups=0.09, wpb=64775, bsz=128, num_updates=2356, lr=9.99892e-05, gnorm=2.781, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26314
2021-06-19 01:57:30 | INFO | train_inner | epoch 001: 2377 / 3002 loss=2.905, ppl=7.49, wps=5809.6, ups=0.09, wpb=64788, bsz=128, num_updates=2357, lr=9.99891e-05, gnorm=8.051, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26325
2021-06-19 01:57:42 | INFO | train_inner | epoch 001: 2378 / 3002 loss=2.991, ppl=7.95, wps=5790.7, ups=0.09, wpb=64769, bsz=128, num_updates=2358, lr=9.99891e-05, gnorm=2.565, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26336
2021-06-19 01:57:53 | INFO | train_inner | epoch 001: 2379 / 3002 loss=2.954, ppl=7.75, wps=5812.5, ups=0.09, wpb=64889, bsz=128, num_updates=2359, lr=9.99891e-05, gnorm=2.581, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26347
2021-06-19 01:58:04 | INFO | train_inner | epoch 001: 2380 / 3002 loss=3.053, ppl=8.3, wps=5868.2, ups=0.09, wpb=64845, bsz=128, num_updates=2360, lr=9.99891e-05, gnorm=2.568, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26358
2021-06-19 01:58:15 | INFO | train_inner | epoch 001: 2381 / 3002 loss=2.967, ppl=7.82, wps=5802.8, ups=0.09, wpb=64770, bsz=128, num_updates=2361, lr=9.99891e-05, gnorm=2.462, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26369
2021-06-19 01:58:26 | INFO | train_inner | epoch 001: 2382 / 3002 loss=2.867, ppl=7.29, wps=5872, ups=0.09, wpb=64889, bsz=128, num_updates=2362, lr=9.99891e-05, gnorm=2.64, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26380
2021-06-19 01:58:37 | INFO | train_inner | epoch 001: 2383 / 3002 loss=3.015, ppl=8.08, wps=5845.9, ups=0.09, wpb=64810, bsz=128, num_updates=2363, lr=9.99891e-05, gnorm=2.522, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26392
2021-06-19 01:58:48 | INFO | train_inner | epoch 001: 2384 / 3002 loss=2.864, ppl=7.28, wps=5852.8, ups=0.09, wpb=64779, bsz=128, num_updates=2364, lr=9.99891e-05, gnorm=2.449, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26403
2021-06-19 01:58:59 | INFO | train_inner | epoch 001: 2385 / 3002 loss=2.926, ppl=7.6, wps=5906.7, ups=0.09, wpb=64814, bsz=128, num_updates=2365, lr=9.99891e-05, gnorm=2.614, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26414
2021-06-19 01:59:10 | INFO | train_inner | epoch 001: 2386 / 3002 loss=3.02, ppl=8.11, wps=5948.7, ups=0.09, wpb=64823, bsz=128, num_updates=2366, lr=9.99891e-05, gnorm=2.732, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26424
2021-06-19 01:59:21 | INFO | train_inner | epoch 001: 2387 / 3002 loss=2.905, ppl=7.49, wps=5857.5, ups=0.09, wpb=64792, bsz=128, num_updates=2367, lr=9.99891e-05, gnorm=2.527, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26436
2021-06-19 01:59:32 | INFO | train_inner | epoch 001: 2388 / 3002 loss=2.941, ppl=7.68, wps=5830, ups=0.09, wpb=64790, bsz=128, num_updates=2368, lr=9.99891e-05, gnorm=2.53, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26447
2021-06-19 01:59:43 | INFO | train_inner | epoch 001: 2389 / 3002 loss=2.969, ppl=7.83, wps=5893.3, ups=0.09, wpb=64834, bsz=128, num_updates=2369, lr=9.9989e-05, gnorm=2.501, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26458
2021-06-19 01:59:55 | INFO | train_inner | epoch 001: 2390 / 3002 loss=3.032, ppl=8.18, wps=5759.5, ups=0.09, wpb=64834, bsz=128, num_updates=2370, lr=9.9989e-05, gnorm=2.554, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26469
2021-06-19 02:00:06 | INFO | train_inner | epoch 001: 2391 / 3002 loss=2.828, ppl=7.1, wps=5770.3, ups=0.09, wpb=64828, bsz=128, num_updates=2371, lr=9.9989e-05, gnorm=2.439, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26480
2021-06-19 02:00:17 | INFO | train_inner | epoch 001: 2392 / 3002 loss=3.033, ppl=8.19, wps=5771.8, ups=0.09, wpb=64807, bsz=128, num_updates=2372, lr=9.9989e-05, gnorm=2.427, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26491
2021-06-19 02:00:28 | INFO | train_inner | epoch 001: 2393 / 3002 loss=2.986, ppl=7.92, wps=5791.7, ups=0.09, wpb=64779, bsz=128, num_updates=2373, lr=9.9989e-05, gnorm=2.736, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26503
2021-06-19 02:00:39 | INFO | train_inner | epoch 001: 2394 / 3002 loss=2.968, ppl=7.83, wps=5783.5, ups=0.09, wpb=64828, bsz=128, num_updates=2374, lr=9.9989e-05, gnorm=2.646, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26514
2021-06-19 02:00:50 | INFO | train_inner | epoch 001: 2395 / 3002 loss=2.821, ppl=7.07, wps=5952.9, ups=0.09, wpb=64813, bsz=128, num_updates=2375, lr=9.9989e-05, gnorm=4.172, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26525
2021-06-19 02:01:01 | INFO | train_inner | epoch 001: 2396 / 3002 loss=3.184, ppl=9.09, wps=5792.8, ups=0.09, wpb=64755, bsz=128, num_updates=2376, lr=9.9989e-05, gnorm=2.537, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26536
2021-06-19 02:01:13 | INFO | train_inner | epoch 001: 2397 / 3002 loss=2.829, ppl=7.11, wps=5792.3, ups=0.09, wpb=64809, bsz=128, num_updates=2377, lr=9.9989e-05, gnorm=2.539, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26547
2021-06-19 02:01:24 | INFO | train_inner | epoch 001: 2398 / 3002 loss=2.823, ppl=7.08, wps=5911.9, ups=0.09, wpb=64893, bsz=128, num_updates=2378, lr=9.9989e-05, gnorm=2.481, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26558
2021-06-19 02:01:34 | INFO | train_inner | epoch 001: 2399 / 3002 loss=2.943, ppl=7.69, wps=5975.6, ups=0.09, wpb=64763, bsz=128, num_updates=2379, lr=9.9989e-05, gnorm=2.521, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26569
2021-06-19 02:01:45 | INFO | train_inner | epoch 001: 2400 / 3002 loss=2.733, ppl=6.65, wps=5938.7, ups=0.09, wpb=64861, bsz=128, num_updates=2380, lr=9.9989e-05, gnorm=2.416, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26580
2021-06-19 02:01:56 | INFO | train_inner | epoch 001: 2401 / 3002 loss=2.833, ppl=7.12, wps=5854.1, ups=0.09, wpb=64812, bsz=128, num_updates=2381, lr=9.9989e-05, gnorm=2.512, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26591
2021-06-19 02:02:07 | INFO | train_inner | epoch 001: 2402 / 3002 loss=2.862, ppl=7.27, wps=5873.2, ups=0.09, wpb=64819, bsz=128, num_updates=2382, lr=9.99889e-05, gnorm=2.501, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26602
2021-06-19 02:02:18 | INFO | train_inner | epoch 001: 2403 / 3002 loss=2.964, ppl=7.8, wps=5897.6, ups=0.09, wpb=64799, bsz=128, num_updates=2383, lr=9.99889e-05, gnorm=2.472, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26613
2021-06-19 02:02:29 | INFO | train_inner | epoch 001: 2404 / 3002 loss=2.734, ppl=6.65, wps=5885.4, ups=0.09, wpb=64806, bsz=128, num_updates=2384, lr=9.99889e-05, gnorm=2.259, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26624
2021-06-19 02:02:40 | INFO | train_inner | epoch 001: 2405 / 3002 loss=2.977, ppl=7.87, wps=6078.3, ups=0.09, wpb=64863, bsz=128, num_updates=2385, lr=9.99889e-05, gnorm=2.409, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26635
2021-06-19 02:02:51 | INFO | train_inner | epoch 001: 2406 / 3002 loss=2.716, ppl=6.57, wps=5802.7, ups=0.09, wpb=64857, bsz=128, num_updates=2386, lr=9.99889e-05, gnorm=2.325, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26646
2021-06-19 02:03:02 | INFO | train_inner | epoch 001: 2407 / 3002 loss=2.853, ppl=7.23, wps=5909.8, ups=0.09, wpb=64930, bsz=128, num_updates=2387, lr=9.99889e-05, gnorm=2.384, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26657
2021-06-19 02:03:13 | INFO | train_inner | epoch 001: 2408 / 3002 loss=2.871, ppl=7.32, wps=5816.5, ups=0.09, wpb=64769, bsz=128, num_updates=2388, lr=9.99889e-05, gnorm=2.533, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26668
2021-06-19 02:03:24 | INFO | train_inner | epoch 001: 2409 / 3002 loss=2.92, ppl=7.57, wps=5999, ups=0.09, wpb=64874, bsz=128, num_updates=2389, lr=9.99889e-05, gnorm=62.891, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26679
2021-06-19 02:03:35 | INFO | train_inner | epoch 001: 2410 / 3002 loss=2.942, ppl=7.69, wps=5861.5, ups=0.09, wpb=64828, bsz=128, num_updates=2390, lr=9.99889e-05, gnorm=2.611, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26690
2021-06-19 02:03:46 | INFO | train_inner | epoch 001: 2411 / 3002 loss=3.004, ppl=8.02, wps=5945, ups=0.09, wpb=64797, bsz=128, num_updates=2391, lr=9.99889e-05, gnorm=2.739, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26701
2021-06-19 02:03:57 | INFO | train_inner | epoch 001: 2412 / 3002 loss=2.999, ppl=7.99, wps=5832.8, ups=0.09, wpb=64763, bsz=128, num_updates=2392, lr=9.99889e-05, gnorm=7.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26712
2021-06-19 02:04:08 | INFO | train_inner | epoch 001: 2413 / 3002 loss=3.224, ppl=9.34, wps=5895.6, ups=0.09, wpb=64786, bsz=128, num_updates=2393, lr=9.99889e-05, gnorm=10.52, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26723
2021-06-19 02:04:20 | INFO | train_inner | epoch 001: 2414 / 3002 loss=3.082, ppl=8.47, wps=5798.9, ups=0.09, wpb=64796, bsz=128, num_updates=2394, lr=9.99888e-05, gnorm=4.589, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26734
2021-06-19 02:04:31 | INFO | train_inner | epoch 001: 2415 / 3002 loss=2.953, ppl=7.75, wps=5700.5, ups=0.09, wpb=64811, bsz=128, num_updates=2395, lr=9.99888e-05, gnorm=6.095, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26745
2021-06-19 02:04:42 | INFO | train_inner | epoch 001: 2416 / 3002 loss=2.903, ppl=7.48, wps=5929.1, ups=0.09, wpb=64880, bsz=128, num_updates=2396, lr=9.99888e-05, gnorm=4.854, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26756
2021-06-19 02:04:53 | INFO | train_inner | epoch 001: 2417 / 3002 loss=2.928, ppl=7.61, wps=5822.1, ups=0.09, wpb=64878, bsz=128, num_updates=2397, lr=9.99888e-05, gnorm=2.906, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26767
2021-06-19 02:05:04 | INFO | train_inner | epoch 001: 2418 / 3002 loss=3.116, ppl=8.67, wps=5925.2, ups=0.09, wpb=64917, bsz=128, num_updates=2398, lr=9.99888e-05, gnorm=3.848, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26778
2021-06-19 02:05:15 | INFO | train_inner | epoch 001: 2419 / 3002 loss=2.898, ppl=7.45, wps=5927.7, ups=0.09, wpb=64870, bsz=128, num_updates=2399, lr=9.99888e-05, gnorm=2.812, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26789
2021-06-19 02:05:26 | INFO | train_inner | epoch 001: 2420 / 3002 loss=2.884, ppl=7.38, wps=5893.2, ups=0.09, wpb=64820, bsz=128, num_updates=2400, lr=9.99888e-05, gnorm=2.58, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26800
2021-06-19 02:05:37 | INFO | train_inner | epoch 001: 2421 / 3002 loss=2.996, ppl=7.98, wps=5836.3, ups=0.09, wpb=64829, bsz=128, num_updates=2401, lr=9.99888e-05, gnorm=2.813, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26811
2021-06-19 02:05:48 | INFO | train_inner | epoch 001: 2422 / 3002 loss=3.231, ppl=9.39, wps=5908.3, ups=0.09, wpb=64877, bsz=128, num_updates=2402, lr=9.99888e-05, gnorm=2.807, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26822
2021-06-19 02:05:59 | INFO | train_inner | epoch 001: 2423 / 3002 loss=2.959, ppl=7.78, wps=5801.2, ups=0.09, wpb=64872, bsz=128, num_updates=2403, lr=9.99888e-05, gnorm=10.088, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26833
2021-06-19 02:06:10 | INFO | train_inner | epoch 001: 2424 / 3002 loss=3.013, ppl=8.08, wps=5879, ups=0.09, wpb=64781, bsz=128, num_updates=2404, lr=9.99888e-05, gnorm=2.649, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26844
2021-06-19 02:06:21 | INFO | train_inner | epoch 001: 2425 / 3002 loss=3.045, ppl=8.25, wps=5811, ups=0.09, wpb=64763, bsz=128, num_updates=2405, lr=9.99888e-05, gnorm=2.632, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26856
2021-06-19 02:06:32 | INFO | train_inner | epoch 001: 2426 / 3002 loss=2.854, ppl=7.23, wps=5844.1, ups=0.09, wpb=64872, bsz=128, num_updates=2406, lr=9.99888e-05, gnorm=2.599, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26867
2021-06-19 02:06:43 | INFO | train_inner | epoch 001: 2427 / 3002 loss=3.009, ppl=8.05, wps=5889.8, ups=0.09, wpb=64882, bsz=128, num_updates=2407, lr=9.99887e-05, gnorm=5.998, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26878
2021-06-19 02:06:55 | INFO | train_inner | epoch 001: 2428 / 3002 loss=3.035, ppl=8.2, wps=5751.1, ups=0.09, wpb=64792, bsz=128, num_updates=2408, lr=9.99887e-05, gnorm=2.963, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26889
2021-06-19 02:07:06 | INFO | train_inner | epoch 001: 2429 / 3002 loss=2.852, ppl=7.22, wps=5860.2, ups=0.09, wpb=64837, bsz=128, num_updates=2409, lr=9.99887e-05, gnorm=10.095, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26900
2021-06-19 02:07:17 | INFO | train_inner | epoch 001: 2430 / 3002 loss=2.951, ppl=7.73, wps=5762.7, ups=0.09, wpb=64760, bsz=128, num_updates=2410, lr=9.99887e-05, gnorm=2.765, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26911
2021-06-19 02:07:28 | INFO | train_inner | epoch 001: 2431 / 3002 loss=3.044, ppl=8.25, wps=5936.8, ups=0.09, wpb=64766, bsz=128, num_updates=2411, lr=9.99887e-05, gnorm=2.475, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=26922
2021-06-19 02:07:39 | INFO | train_inner | epoch 001: 2432 / 3002 loss=2.864, ppl=7.28, wps=5860.1, ups=0.09, wpb=64863, bsz=128, num_updates=2412, lr=9.99887e-05, gnorm=2.449, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26933
2021-06-19 02:07:50 | INFO | train_inner | epoch 001: 2433 / 3002 loss=3.108, ppl=8.62, wps=5827.1, ups=0.09, wpb=64823, bsz=128, num_updates=2413, lr=9.99887e-05, gnorm=2.616, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26944
2021-06-19 02:08:01 | INFO | train_inner | epoch 001: 2434 / 3002 loss=2.92, ppl=7.57, wps=5825.4, ups=0.09, wpb=64835, bsz=128, num_updates=2414, lr=9.99887e-05, gnorm=2.499, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26956
2021-06-19 02:08:12 | INFO | train_inner | epoch 001: 2435 / 3002 loss=2.904, ppl=7.49, wps=5801.9, ups=0.09, wpb=64858, bsz=128, num_updates=2415, lr=9.99887e-05, gnorm=2.516, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=26967
2021-06-19 02:08:24 | INFO | train_inner | epoch 001: 2436 / 3002 loss=2.918, ppl=7.56, wps=5784.7, ups=0.09, wpb=64844, bsz=128, num_updates=2416, lr=9.99887e-05, gnorm=2.495, loss_scale=1, train_wall=11, gb_free=2.8, wall=26978
2021-06-19 02:08:35 | INFO | train_inner | epoch 001: 2437 / 3002 loss=3.099, ppl=8.57, wps=5817.6, ups=0.09, wpb=64800, bsz=128, num_updates=2417, lr=9.99887e-05, gnorm=2.513, loss_scale=1, train_wall=11, gb_free=2.8, wall=26989
2021-06-19 02:08:46 | INFO | train_inner | epoch 001: 2438 / 3002 loss=2.896, ppl=7.45, wps=5917.9, ups=0.09, wpb=64777, bsz=128, num_updates=2418, lr=9.99887e-05, gnorm=12.403, loss_scale=1, train_wall=10, gb_free=2.8, wall=27000
2021-06-19 02:08:57 | INFO | train_inner | epoch 001: 2439 / 3002 loss=3.181, ppl=9.07, wps=5799.4, ups=0.09, wpb=64801, bsz=128, num_updates=2419, lr=9.99886e-05, gnorm=2.537, loss_scale=1, train_wall=11, gb_free=2.8, wall=27011
2021-06-19 02:09:08 | INFO | train_inner | epoch 001: 2440 / 3002 loss=2.837, ppl=7.15, wps=5825.5, ups=0.09, wpb=64850, bsz=128, num_updates=2420, lr=9.99886e-05, gnorm=2.401, loss_scale=1, train_wall=11, gb_free=2.8, wall=27022
2021-06-19 02:09:19 | INFO | train_inner | epoch 001: 2441 / 3002 loss=3.004, ppl=8.02, wps=5871.2, ups=0.09, wpb=64770, bsz=128, num_updates=2421, lr=9.99886e-05, gnorm=2.473, loss_scale=1, train_wall=11, gb_free=2.8, wall=27033
2021-06-19 02:09:30 | INFO | train_inner | epoch 001: 2442 / 3002 loss=2.833, ppl=7.13, wps=5834.2, ups=0.09, wpb=64843, bsz=128, num_updates=2422, lr=9.99886e-05, gnorm=2.901, loss_scale=1, train_wall=11, gb_free=2.8, wall=27044
2021-06-19 02:09:41 | INFO | train_inner | epoch 001: 2443 / 3002 loss=2.92, ppl=7.57, wps=5749.8, ups=0.09, wpb=64789, bsz=128, num_updates=2423, lr=9.99886e-05, gnorm=2.487, loss_scale=1, train_wall=11, gb_free=2.8, wall=27056
2021-06-19 02:09:52 | INFO | train_inner | epoch 001: 2444 / 3002 loss=2.957, ppl=7.77, wps=5871.7, ups=0.09, wpb=64808, bsz=128, num_updates=2424, lr=9.99886e-05, gnorm=2.547, loss_scale=1, train_wall=11, gb_free=2.8, wall=27067
2021-06-19 02:10:04 | INFO | train_inner | epoch 001: 2445 / 3002 loss=2.886, ppl=7.39, wps=5842.8, ups=0.09, wpb=64859, bsz=128, num_updates=2425, lr=9.99886e-05, gnorm=2.492, loss_scale=1, train_wall=11, gb_free=2.8, wall=27078
2021-06-19 02:10:15 | INFO | train_inner | epoch 001: 2446 / 3002 loss=2.924, ppl=7.59, wps=5906.7, ups=0.09, wpb=64806, bsz=128, num_updates=2426, lr=9.99886e-05, gnorm=2.562, loss_scale=1, train_wall=10, gb_free=2.8, wall=27089
2021-06-19 02:10:26 | INFO | train_inner | epoch 001: 2447 / 3002 loss=2.836, ppl=7.14, wps=5859.6, ups=0.09, wpb=64835, bsz=128, num_updates=2427, lr=9.99886e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=27100
2021-06-19 02:10:37 | INFO | train_inner | epoch 001: 2448 / 3002 loss=2.955, ppl=7.75, wps=5847.7, ups=0.09, wpb=64760, bsz=128, num_updates=2428, lr=9.99886e-05, gnorm=2.539, loss_scale=1, train_wall=11, gb_free=2.8, wall=27111
2021-06-19 02:10:48 | INFO | train_inner | epoch 001: 2449 / 3002 loss=2.962, ppl=7.79, wps=5905.6, ups=0.09, wpb=64831, bsz=128, num_updates=2429, lr=9.99886e-05, gnorm=2.493, loss_scale=1, train_wall=11, gb_free=2.8, wall=27122
2021-06-19 02:10:59 | INFO | train_inner | epoch 001: 2450 / 3002 loss=2.913, ppl=7.53, wps=5966.1, ups=0.09, wpb=64873, bsz=128, num_updates=2430, lr=9.99886e-05, gnorm=2.453, loss_scale=1, train_wall=10, gb_free=2.8, wall=27133
2021-06-19 02:11:09 | INFO | train_inner | epoch 001: 2451 / 3002 loss=2.855, ppl=7.24, wps=5997.5, ups=0.09, wpb=64836, bsz=128, num_updates=2431, lr=9.99886e-05, gnorm=2.359, loss_scale=1, train_wall=10, gb_free=2.8, wall=27144
2021-06-19 02:11:20 | INFO | train_inner | epoch 001: 2452 / 3002 loss=2.952, ppl=7.74, wps=5904.1, ups=0.09, wpb=64846, bsz=128, num_updates=2432, lr=9.99885e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=27155
2021-06-19 02:11:31 | INFO | train_inner | epoch 001: 2453 / 3002 loss=3.101, ppl=8.58, wps=5886.5, ups=0.09, wpb=64797, bsz=128, num_updates=2433, lr=9.99885e-05, gnorm=2.305, loss_scale=1, train_wall=11, gb_free=2.8, wall=27166
2021-06-19 02:11:43 | INFO | train_inner | epoch 001: 2454 / 3002 loss=2.917, ppl=7.55, wps=5786.5, ups=0.09, wpb=64793, bsz=128, num_updates=2434, lr=9.99885e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=27177
2021-06-19 02:11:54 | INFO | train_inner | epoch 001: 2455 / 3002 loss=2.988, ppl=7.93, wps=5867.3, ups=0.09, wpb=64824, bsz=128, num_updates=2435, lr=9.99885e-05, gnorm=2.393, loss_scale=1, train_wall=11, gb_free=2.8, wall=27188
2021-06-19 02:12:05 | INFO | train_inner | epoch 001: 2456 / 3002 loss=2.933, ppl=7.64, wps=5893.3, ups=0.09, wpb=64760, bsz=128, num_updates=2436, lr=9.99885e-05, gnorm=2.354, loss_scale=1, train_wall=11, gb_free=2.8, wall=27199
2021-06-19 02:12:16 | INFO | train_inner | epoch 001: 2457 / 3002 loss=2.955, ppl=7.75, wps=5816.2, ups=0.09, wpb=64802, bsz=128, num_updates=2437, lr=9.99885e-05, gnorm=2.431, loss_scale=1, train_wall=11, gb_free=2.8, wall=27210
2021-06-19 02:12:26 | INFO | train_inner | epoch 001: 2458 / 3002 loss=3, ppl=8, wps=6048.1, ups=0.09, wpb=64832, bsz=128, num_updates=2438, lr=9.99885e-05, gnorm=2.504, loss_scale=1, train_wall=10, gb_free=2.8, wall=27221
2021-06-19 02:12:37 | INFO | train_inner | epoch 001: 2459 / 3002 loss=3.04, ppl=8.23, wps=5955, ups=0.09, wpb=64792, bsz=128, num_updates=2439, lr=9.99885e-05, gnorm=2.441, loss_scale=1, train_wall=10, gb_free=2.8, wall=27232
2021-06-19 02:12:48 | INFO | train_inner | epoch 001: 2460 / 3002 loss=2.955, ppl=7.76, wps=5921.2, ups=0.09, wpb=64811, bsz=128, num_updates=2440, lr=9.99885e-05, gnorm=2.455, loss_scale=1, train_wall=10, gb_free=2.8, wall=27243
2021-06-19 02:12:59 | INFO | train_inner | epoch 001: 2461 / 3002 loss=2.826, ppl=7.09, wps=5795.2, ups=0.09, wpb=64828, bsz=128, num_updates=2441, lr=9.99885e-05, gnorm=2.64, loss_scale=1, train_wall=11, gb_free=2.8, wall=27254
2021-06-19 02:13:10 | INFO | train_inner | epoch 001: 2462 / 3002 loss=2.926, ppl=7.6, wps=5890.9, ups=0.09, wpb=64836, bsz=128, num_updates=2442, lr=9.99885e-05, gnorm=2.504, loss_scale=1, train_wall=11, gb_free=2.8, wall=27265
2021-06-19 02:13:21 | INFO | train_inner | epoch 001: 2463 / 3002 loss=2.765, ppl=6.8, wps=5908.2, ups=0.09, wpb=64871, bsz=128, num_updates=2443, lr=9.99885e-05, gnorm=2.752, loss_scale=1, train_wall=11, gb_free=2.8, wall=27276
2021-06-19 02:13:33 | INFO | train_inner | epoch 001: 2464 / 3002 loss=2.99, ppl=7.94, wps=5800.5, ups=0.09, wpb=64825, bsz=128, num_updates=2444, lr=9.99884e-05, gnorm=2.468, loss_scale=1, train_wall=11, gb_free=2.8, wall=27287
2021-06-19 02:13:44 | INFO | train_inner | epoch 001: 2465 / 3002 loss=3.016, ppl=8.09, wps=5775.1, ups=0.09, wpb=64817, bsz=128, num_updates=2445, lr=9.99884e-05, gnorm=2.5, loss_scale=1, train_wall=11, gb_free=2.8, wall=27298
2021-06-19 02:13:55 | INFO | train_inner | epoch 001: 2466 / 3002 loss=2.9, ppl=7.46, wps=5872.4, ups=0.09, wpb=64796, bsz=128, num_updates=2446, lr=9.99884e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=27309
2021-06-19 02:14:06 | INFO | train_inner | epoch 001: 2467 / 3002 loss=3.001, ppl=8, wps=5845.8, ups=0.09, wpb=64774, bsz=128, num_updates=2447, lr=9.99884e-05, gnorm=2.396, loss_scale=1, train_wall=11, gb_free=2.8, wall=27320
2021-06-19 02:14:17 | INFO | train_inner | epoch 001: 2468 / 3002 loss=3.063, ppl=8.35, wps=5886.9, ups=0.09, wpb=64882, bsz=128, num_updates=2448, lr=9.99884e-05, gnorm=2.506, loss_scale=1, train_wall=11, gb_free=2.8, wall=27331
2021-06-19 02:14:28 | INFO | train_inner | epoch 001: 2469 / 3002 loss=3.013, ppl=8.07, wps=5855.6, ups=0.09, wpb=64795, bsz=128, num_updates=2449, lr=9.99884e-05, gnorm=2.443, loss_scale=1, train_wall=11, gb_free=2.8, wall=27342
2021-06-19 02:14:39 | INFO | train_inner | epoch 001: 2470 / 3002 loss=2.911, ppl=7.52, wps=5868, ups=0.09, wpb=64795, bsz=128, num_updates=2450, lr=9.99884e-05, gnorm=2.295, loss_scale=1, train_wall=11, gb_free=2.8, wall=27353
2021-06-19 02:14:50 | INFO | train_inner | epoch 001: 2471 / 3002 loss=2.949, ppl=7.72, wps=5799.6, ups=0.09, wpb=64856, bsz=128, num_updates=2451, lr=9.99884e-05, gnorm=2.431, loss_scale=1, train_wall=11, gb_free=2.8, wall=27365
2021-06-19 02:15:01 | INFO | train_inner | epoch 001: 2472 / 3002 loss=3.043, ppl=8.24, wps=5872.2, ups=0.09, wpb=64816, bsz=128, num_updates=2452, lr=9.99884e-05, gnorm=2.383, loss_scale=1, train_wall=11, gb_free=2.8, wall=27376
2021-06-19 02:15:12 | INFO | train_inner | epoch 001: 2473 / 3002 loss=2.885, ppl=7.39, wps=5846.9, ups=0.09, wpb=64781, bsz=128, num_updates=2453, lr=9.99884e-05, gnorm=2.418, loss_scale=1, train_wall=11, gb_free=2.8, wall=27387
2021-06-19 02:15:23 | INFO | train_inner | epoch 001: 2474 / 3002 loss=2.946, ppl=7.71, wps=5861.2, ups=0.09, wpb=64817, bsz=128, num_updates=2454, lr=9.99884e-05, gnorm=2.339, loss_scale=1, train_wall=11, gb_free=2.8, wall=27398
2021-06-19 02:15:35 | INFO | train_inner | epoch 001: 2475 / 3002 loss=2.99, ppl=7.94, wps=5823.9, ups=0.09, wpb=64810, bsz=128, num_updates=2455, lr=9.99884e-05, gnorm=2.648, loss_scale=1, train_wall=11, gb_free=2.8, wall=27409
2021-06-19 02:15:45 | INFO | train_inner | epoch 001: 2476 / 3002 loss=2.891, ppl=7.42, wps=5942.7, ups=0.09, wpb=64880, bsz=128, num_updates=2456, lr=9.99884e-05, gnorm=3.059, loss_scale=1, train_wall=10, gb_free=2.8, wall=27420
2021-06-19 02:15:57 | INFO | train_inner | epoch 001: 2477 / 3002 loss=3.007, ppl=8.04, wps=5848.7, ups=0.09, wpb=64755, bsz=128, num_updates=2457, lr=9.99883e-05, gnorm=2.392, loss_scale=1, train_wall=11, gb_free=2.8, wall=27431
2021-06-19 02:16:08 | INFO | train_inner | epoch 001: 2478 / 3002 loss=2.94, ppl=7.67, wps=5782.8, ups=0.09, wpb=64758, bsz=128, num_updates=2458, lr=9.99883e-05, gnorm=2.73, loss_scale=1, train_wall=11, gb_free=2.8, wall=27442
2021-06-19 02:16:19 | INFO | train_inner | epoch 001: 2479 / 3002 loss=2.884, ppl=7.38, wps=5853.6, ups=0.09, wpb=64904, bsz=128, num_updates=2459, lr=9.99883e-05, gnorm=2.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=27453
2021-06-19 02:16:30 | INFO | train_inner | epoch 001: 2480 / 3002 loss=2.848, ppl=7.2, wps=5804.3, ups=0.09, wpb=64823, bsz=128, num_updates=2460, lr=9.99883e-05, gnorm=2.422, loss_scale=1, train_wall=11, gb_free=2.8, wall=27464
2021-06-19 02:16:41 | INFO | train_inner | epoch 001: 2481 / 3002 loss=2.815, ppl=7.04, wps=5799.6, ups=0.09, wpb=64822, bsz=128, num_updates=2461, lr=9.99883e-05, gnorm=2.398, loss_scale=1, train_wall=11, gb_free=2.8, wall=27476
2021-06-19 02:16:52 | INFO | train_inner | epoch 001: 2482 / 3002 loss=2.863, ppl=7.28, wps=5843.4, ups=0.09, wpb=64540, bsz=128, num_updates=2462, lr=9.99883e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=27487
2021-06-19 02:17:03 | INFO | train_inner | epoch 001: 2483 / 3002 loss=2.872, ppl=7.32, wps=5865.8, ups=0.09, wpb=64764, bsz=128, num_updates=2463, lr=9.99883e-05, gnorm=2.487, loss_scale=1, train_wall=11, gb_free=2.8, wall=27498
2021-06-19 02:17:14 | INFO | train_inner | epoch 001: 2484 / 3002 loss=2.889, ppl=7.41, wps=5792.7, ups=0.09, wpb=64887, bsz=128, num_updates=2464, lr=9.99883e-05, gnorm=2.366, loss_scale=1, train_wall=11, gb_free=2.8, wall=27509
2021-06-19 02:17:26 | INFO | train_inner | epoch 001: 2485 / 3002 loss=2.913, ppl=7.53, wps=5820.6, ups=0.09, wpb=64784, bsz=128, num_updates=2465, lr=9.99883e-05, gnorm=2.483, loss_scale=1, train_wall=11, gb_free=2.8, wall=27520
2021-06-19 02:17:37 | INFO | train_inner | epoch 001: 2486 / 3002 loss=2.942, ppl=7.68, wps=5812.9, ups=0.09, wpb=64827, bsz=128, num_updates=2466, lr=9.99883e-05, gnorm=2.345, loss_scale=1, train_wall=11, gb_free=2.8, wall=27531
2021-06-19 02:17:48 | INFO | train_inner | epoch 001: 2487 / 3002 loss=2.753, ppl=6.74, wps=5847.4, ups=0.09, wpb=64889, bsz=128, num_updates=2467, lr=9.99883e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=27542
2021-06-19 02:17:59 | INFO | train_inner | epoch 001: 2488 / 3002 loss=2.995, ppl=7.97, wps=5865.7, ups=0.09, wpb=64790, bsz=128, num_updates=2468, lr=9.99883e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=27553
2021-06-19 02:18:10 | INFO | train_inner | epoch 001: 2489 / 3002 loss=3.002, ppl=8.01, wps=5904.5, ups=0.09, wpb=64799, bsz=128, num_updates=2469, lr=9.99882e-05, gnorm=2.392, loss_scale=1, train_wall=11, gb_free=2.8, wall=27564
2021-06-19 02:18:21 | INFO | train_inner | epoch 001: 2490 / 3002 loss=2.778, ppl=6.86, wps=5748.4, ups=0.09, wpb=64873, bsz=128, num_updates=2470, lr=9.99882e-05, gnorm=2.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=27575
2021-06-19 02:18:32 | INFO | train_inner | epoch 001: 2491 / 3002 loss=2.754, ppl=6.75, wps=5872.9, ups=0.09, wpb=64795, bsz=128, num_updates=2471, lr=9.99882e-05, gnorm=2.227, loss_scale=1, train_wall=11, gb_free=2.8, wall=27587
2021-06-19 02:18:43 | INFO | train_inner | epoch 001: 2492 / 3002 loss=3.009, ppl=8.05, wps=5841.7, ups=0.09, wpb=64796, bsz=128, num_updates=2472, lr=9.99882e-05, gnorm=2.356, loss_scale=1, train_wall=11, gb_free=2.8, wall=27598
2021-06-19 02:18:54 | INFO | train_inner | epoch 001: 2493 / 3002 loss=2.896, ppl=7.45, wps=5921.9, ups=0.09, wpb=64792, bsz=128, num_updates=2473, lr=9.99882e-05, gnorm=2.371, loss_scale=1, train_wall=10, gb_free=2.8, wall=27609
2021-06-19 02:19:05 | INFO | train_inner | epoch 001: 2494 / 3002 loss=3.106, ppl=8.61, wps=5748.3, ups=0.09, wpb=64763, bsz=128, num_updates=2474, lr=9.99882e-05, gnorm=2.398, loss_scale=1, train_wall=11, gb_free=2.8, wall=27620
2021-06-19 02:19:17 | INFO | train_inner | epoch 001: 2495 / 3002 loss=2.881, ppl=7.37, wps=5813.8, ups=0.09, wpb=64850, bsz=128, num_updates=2475, lr=9.99882e-05, gnorm=2.343, loss_scale=1, train_wall=11, gb_free=2.8, wall=27631
2021-06-19 02:19:28 | INFO | train_inner | epoch 001: 2496 / 3002 loss=2.845, ppl=7.18, wps=5878.2, ups=0.09, wpb=64876, bsz=128, num_updates=2476, lr=9.99882e-05, gnorm=2.292, loss_scale=1, train_wall=11, gb_free=2.8, wall=27642
2021-06-19 02:19:39 | INFO | train_inner | epoch 001: 2497 / 3002 loss=2.942, ppl=7.68, wps=5860.3, ups=0.09, wpb=64816, bsz=128, num_updates=2477, lr=9.99882e-05, gnorm=2.702, loss_scale=1, train_wall=11, gb_free=2.8, wall=27653
2021-06-19 02:19:50 | INFO | train_inner | epoch 001: 2498 / 3002 loss=2.935, ppl=7.65, wps=5818.4, ups=0.09, wpb=64779, bsz=128, num_updates=2478, lr=9.99882e-05, gnorm=2.512, loss_scale=1, train_wall=11, gb_free=2.8, wall=27664
2021-06-19 02:20:01 | INFO | train_inner | epoch 001: 2499 / 3002 loss=2.925, ppl=7.59, wps=5884.3, ups=0.09, wpb=64832, bsz=128, num_updates=2479, lr=9.99882e-05, gnorm=2.37, loss_scale=1, train_wall=11, gb_free=2.8, wall=27675
2021-06-19 02:20:12 | INFO | train_inner | epoch 001: 2500 / 3002 loss=2.959, ppl=7.77, wps=5962.1, ups=0.09, wpb=64759, bsz=128, num_updates=2480, lr=9.99882e-05, gnorm=2.351, loss_scale=1, train_wall=10, gb_free=2.8, wall=27686
2021-06-19 02:20:23 | INFO | train_inner | epoch 001: 2501 / 3002 loss=2.755, ppl=6.75, wps=5869.5, ups=0.09, wpb=64872, bsz=128, num_updates=2481, lr=9.99882e-05, gnorm=2.334, loss_scale=1, train_wall=11, gb_free=2.8, wall=27697
2021-06-19 02:20:34 | INFO | train_inner | epoch 001: 2502 / 3002 loss=2.898, ppl=7.46, wps=5786.6, ups=0.09, wpb=64831, bsz=128, num_updates=2482, lr=9.99881e-05, gnorm=2.468, loss_scale=1, train_wall=11, gb_free=2.8, wall=27708
2021-06-19 02:20:45 | INFO | train_inner | epoch 001: 2503 / 3002 loss=2.669, ppl=6.36, wps=5841.4, ups=0.09, wpb=64782, bsz=128, num_updates=2483, lr=9.99881e-05, gnorm=2.396, loss_scale=1, train_wall=11, gb_free=2.8, wall=27719
2021-06-19 02:20:56 | INFO | train_inner | epoch 001: 2504 / 3002 loss=2.869, ppl=7.31, wps=5873.2, ups=0.09, wpb=64841, bsz=128, num_updates=2484, lr=9.99881e-05, gnorm=2.314, loss_scale=1, train_wall=11, gb_free=2.8, wall=27730
2021-06-19 02:21:07 | INFO | train_inner | epoch 001: 2505 / 3002 loss=2.761, ppl=6.78, wps=5921.5, ups=0.09, wpb=64786, bsz=128, num_updates=2485, lr=9.99881e-05, gnorm=3.066, loss_scale=1, train_wall=10, gb_free=2.8, wall=27741
2021-06-19 02:21:18 | INFO | train_inner | epoch 001: 2506 / 3002 loss=2.893, ppl=7.43, wps=5856, ups=0.09, wpb=64864, bsz=128, num_updates=2486, lr=9.99881e-05, gnorm=2.509, loss_scale=1, train_wall=11, gb_free=2.8, wall=27752
2021-06-19 02:21:29 | INFO | train_inner | epoch 001: 2507 / 3002 loss=2.82, ppl=7.06, wps=5730.6, ups=0.09, wpb=64943, bsz=128, num_updates=2487, lr=9.99881e-05, gnorm=2.28, loss_scale=1, train_wall=11, gb_free=2.8, wall=27764
2021-06-19 02:21:41 | INFO | train_inner | epoch 001: 2508 / 3002 loss=2.854, ppl=7.23, wps=5865.3, ups=0.09, wpb=64916, bsz=128, num_updates=2488, lr=9.99881e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=27775
2021-06-19 02:21:51 | INFO | train_inner | epoch 001: 2509 / 3002 loss=2.861, ppl=7.26, wps=6001.5, ups=0.09, wpb=64838, bsz=128, num_updates=2489, lr=9.99881e-05, gnorm=2.393, loss_scale=1, train_wall=10, gb_free=2.8, wall=27786
2021-06-19 02:22:02 | INFO | train_inner | epoch 001: 2510 / 3002 loss=2.842, ppl=7.17, wps=5854.4, ups=0.09, wpb=64886, bsz=128, num_updates=2490, lr=9.99881e-05, gnorm=2.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=27797
2021-06-19 02:22:13 | INFO | train_inner | epoch 001: 2511 / 3002 loss=2.817, ppl=7.05, wps=5937, ups=0.09, wpb=64948, bsz=128, num_updates=2491, lr=9.99881e-05, gnorm=2.364, loss_scale=1, train_wall=10, gb_free=2.8, wall=27808
2021-06-19 02:22:24 | INFO | train_inner | epoch 001: 2512 / 3002 loss=2.964, ppl=7.8, wps=5853.3, ups=0.09, wpb=64862, bsz=128, num_updates=2492, lr=9.99881e-05, gnorm=2.455, loss_scale=1, train_wall=11, gb_free=2.8, wall=27819
2021-06-19 02:22:35 | INFO | train_inner | epoch 001: 2513 / 3002 loss=2.902, ppl=7.48, wps=5887.7, ups=0.09, wpb=64805, bsz=128, num_updates=2493, lr=9.99881e-05, gnorm=2.467, loss_scale=1, train_wall=11, gb_free=2.8, wall=27830
2021-06-19 02:22:47 | INFO | train_inner | epoch 001: 2514 / 3002 loss=2.803, ppl=6.98, wps=5779.3, ups=0.09, wpb=64757, bsz=128, num_updates=2494, lr=9.9988e-05, gnorm=2.471, loss_scale=1, train_wall=11, gb_free=2.8, wall=27841
2021-06-19 02:22:58 | INFO | train_inner | epoch 001: 2515 / 3002 loss=2.954, ppl=7.75, wps=5867.2, ups=0.09, wpb=64851, bsz=128, num_updates=2495, lr=9.9988e-05, gnorm=3.046, loss_scale=1, train_wall=11, gb_free=2.8, wall=27852
2021-06-19 02:23:09 | INFO | train_inner | epoch 001: 2516 / 3002 loss=2.837, ppl=7.14, wps=5828.1, ups=0.09, wpb=64809, bsz=128, num_updates=2496, lr=9.9988e-05, gnorm=2.381, loss_scale=1, train_wall=11, gb_free=2.8, wall=27863
2021-06-19 02:23:20 | INFO | train_inner | epoch 001: 2517 / 3002 loss=2.975, ppl=7.86, wps=5859.5, ups=0.09, wpb=64839, bsz=128, num_updates=2497, lr=9.9988e-05, gnorm=2.523, loss_scale=1, train_wall=11, gb_free=2.8, wall=27874
2021-06-19 02:23:31 | INFO | train_inner | epoch 001: 2518 / 3002 loss=2.723, ppl=6.6, wps=5840.6, ups=0.09, wpb=64800, bsz=128, num_updates=2498, lr=9.9988e-05, gnorm=2.362, loss_scale=1, train_wall=11, gb_free=2.8, wall=27885
2021-06-19 02:23:42 | INFO | train_inner | epoch 001: 2519 / 3002 loss=2.782, ppl=6.88, wps=5825.5, ups=0.09, wpb=64791, bsz=128, num_updates=2499, lr=9.9988e-05, gnorm=2.352, loss_scale=1, train_wall=11, gb_free=2.8, wall=27896
2021-06-19 02:23:53 | INFO | train_inner | epoch 001: 2520 / 3002 loss=2.827, ppl=7.1, wps=5837.1, ups=0.09, wpb=64833, bsz=128, num_updates=2500, lr=9.9988e-05, gnorm=2.491, loss_scale=1, train_wall=11, gb_free=2.8, wall=27908
2021-06-19 02:24:04 | INFO | train_inner | epoch 001: 2521 / 3002 loss=2.916, ppl=7.55, wps=5812, ups=0.09, wpb=64839, bsz=128, num_updates=2501, lr=9.9988e-05, gnorm=2.445, loss_scale=1, train_wall=11, gb_free=2.8, wall=27919
2021-06-19 02:24:15 | INFO | train_inner | epoch 001: 2522 / 3002 loss=2.977, ppl=7.87, wps=5887.6, ups=0.09, wpb=64821, bsz=128, num_updates=2502, lr=9.9988e-05, gnorm=2.536, loss_scale=1, train_wall=11, gb_free=2.8, wall=27930
2021-06-19 02:24:27 | INFO | train_inner | epoch 001: 2523 / 3002 loss=2.775, ppl=6.84, wps=5826.7, ups=0.09, wpb=64755, bsz=128, num_updates=2503, lr=9.9988e-05, gnorm=2.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=27941
2021-06-19 02:24:37 | INFO | train_inner | epoch 001: 2524 / 3002 loss=3.171, ppl=9.01, wps=5980.8, ups=0.09, wpb=64939, bsz=128, num_updates=2504, lr=9.9988e-05, gnorm=2.418, loss_scale=1, train_wall=10, gb_free=2.8, wall=27952
2021-06-19 02:24:48 | INFO | train_inner | epoch 001: 2525 / 3002 loss=2.922, ppl=7.58, wps=5890.5, ups=0.09, wpb=64862, bsz=128, num_updates=2505, lr=9.9988e-05, gnorm=2.38, loss_scale=1, train_wall=11, gb_free=2.8, wall=27963
2021-06-19 02:24:59 | INFO | train_inner | epoch 001: 2526 / 3002 loss=3.033, ppl=8.18, wps=5917.4, ups=0.09, wpb=64868, bsz=128, num_updates=2506, lr=9.9988e-05, gnorm=103.216, loss_scale=1, train_wall=11, gb_free=2.8, wall=27974
2021-06-19 02:25:10 | INFO | train_inner | epoch 001: 2527 / 3002 loss=2.997, ppl=7.98, wps=5901.7, ups=0.09, wpb=64916, bsz=128, num_updates=2507, lr=9.99879e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=27985
2021-06-19 02:25:21 | INFO | train_inner | epoch 001: 2528 / 3002 loss=2.86, ppl=7.26, wps=5847.5, ups=0.09, wpb=64856, bsz=128, num_updates=2508, lr=9.99879e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=27996
2021-06-19 02:25:32 | INFO | train_inner | epoch 001: 2529 / 3002 loss=2.959, ppl=7.77, wps=5993.9, ups=0.09, wpb=64807, bsz=128, num_updates=2509, lr=9.99879e-05, gnorm=2.662, loss_scale=1, train_wall=10, gb_free=2.8, wall=28007
2021-06-19 02:25:44 | INFO | train_inner | epoch 001: 2530 / 3002 loss=3.005, ppl=8.03, wps=5748.5, ups=0.09, wpb=64866, bsz=128, num_updates=2510, lr=9.99879e-05, gnorm=2.949, loss_scale=1, train_wall=11, gb_free=2.8, wall=28018
2021-06-19 02:25:55 | INFO | train_inner | epoch 001: 2531 / 3002 loss=2.857, ppl=7.25, wps=5833.5, ups=0.09, wpb=64848, bsz=128, num_updates=2511, lr=9.99879e-05, gnorm=2.436, loss_scale=1, train_wall=11, gb_free=2.8, wall=28029
2021-06-19 02:26:06 | INFO | train_inner | epoch 001: 2532 / 3002 loss=3.014, ppl=8.08, wps=5808.3, ups=0.09, wpb=64910, bsz=128, num_updates=2512, lr=9.99879e-05, gnorm=2.419, loss_scale=1, train_wall=11, gb_free=2.8, wall=28040
2021-06-19 02:26:17 | INFO | train_inner | epoch 001: 2533 / 3002 loss=2.842, ppl=7.17, wps=5894, ups=0.09, wpb=64903, bsz=128, num_updates=2513, lr=9.99879e-05, gnorm=3.675, loss_scale=1, train_wall=11, gb_free=2.8, wall=28051
2021-06-19 02:26:28 | INFO | train_inner | epoch 001: 2534 / 3002 loss=2.842, ppl=7.17, wps=5878, ups=0.09, wpb=64864, bsz=128, num_updates=2514, lr=9.99879e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=28062
2021-06-19 02:26:39 | INFO | train_inner | epoch 001: 2535 / 3002 loss=3.006, ppl=8.04, wps=5781, ups=0.09, wpb=64831, bsz=128, num_updates=2515, lr=9.99879e-05, gnorm=3.171, loss_scale=1, train_wall=11, gb_free=2.8, wall=28073
2021-06-19 02:26:50 | INFO | train_inner | epoch 001: 2536 / 3002 loss=2.988, ppl=7.93, wps=5943.9, ups=0.09, wpb=64864, bsz=128, num_updates=2516, lr=9.99879e-05, gnorm=2.398, loss_scale=1, train_wall=10, gb_free=2.8, wall=28084
2021-06-19 02:27:01 | INFO | train_inner | epoch 001: 2537 / 3002 loss=2.978, ppl=7.88, wps=5751.8, ups=0.09, wpb=64812, bsz=128, num_updates=2517, lr=9.99879e-05, gnorm=6.894, loss_scale=1, train_wall=11, gb_free=2.8, wall=28096
2021-06-19 02:27:12 | INFO | train_inner | epoch 001: 2538 / 3002 loss=2.927, ppl=7.61, wps=5877.3, ups=0.09, wpb=64882, bsz=128, num_updates=2518, lr=9.99879e-05, gnorm=12.744, loss_scale=1, train_wall=11, gb_free=2.8, wall=28107
2021-06-19 02:27:23 | INFO | train_inner | epoch 001: 2539 / 3002 loss=3.124, ppl=8.72, wps=5867.8, ups=0.09, wpb=64779, bsz=128, num_updates=2519, lr=9.99878e-05, gnorm=5.87, loss_scale=1, train_wall=11, gb_free=2.8, wall=28118
2021-06-19 02:27:34 | INFO | train_inner | epoch 001: 2540 / 3002 loss=2.942, ppl=7.69, wps=5896.8, ups=0.09, wpb=64750, bsz=128, num_updates=2520, lr=9.99878e-05, gnorm=11.817, loss_scale=1, train_wall=11, gb_free=2.8, wall=28129
2021-06-19 02:27:45 | INFO | train_inner | epoch 001: 2541 / 3002 loss=3.157, ppl=8.92, wps=5839.8, ups=0.09, wpb=64817, bsz=128, num_updates=2521, lr=9.99878e-05, gnorm=5.065, loss_scale=1, train_wall=11, gb_free=2.8, wall=28140
2021-06-19 02:27:56 | INFO | train_inner | epoch 001: 2542 / 3002 loss=2.96, ppl=7.78, wps=5970.7, ups=0.09, wpb=64841, bsz=128, num_updates=2522, lr=9.99878e-05, gnorm=5.508, loss_scale=1, train_wall=10, gb_free=2.8, wall=28151
2021-06-19 02:28:07 | INFO | train_inner | epoch 001: 2543 / 3002 loss=2.985, ppl=7.92, wps=5851.7, ups=0.09, wpb=64843, bsz=128, num_updates=2523, lr=9.99878e-05, gnorm=2.724, loss_scale=1, train_wall=11, gb_free=2.8, wall=28162
2021-06-19 02:28:19 | INFO | train_inner | epoch 001: 2544 / 3002 loss=3.086, ppl=8.49, wps=5783.6, ups=0.09, wpb=64804, bsz=128, num_updates=2524, lr=9.99878e-05, gnorm=2.638, loss_scale=1, train_wall=11, gb_free=2.8, wall=28173
2021-06-19 02:28:30 | INFO | train_inner | epoch 001: 2545 / 3002 loss=2.984, ppl=7.91, wps=5795.7, ups=0.09, wpb=64760, bsz=128, num_updates=2525, lr=9.99878e-05, gnorm=2.421, loss_scale=1, train_wall=11, gb_free=2.8, wall=28184
2021-06-19 02:28:41 | INFO | train_inner | epoch 001: 2546 / 3002 loss=3.067, ppl=8.38, wps=5969.2, ups=0.09, wpb=64824, bsz=128, num_updates=2526, lr=9.99878e-05, gnorm=2.564, loss_scale=1, train_wall=10, gb_free=2.8, wall=28195
2021-06-19 02:28:52 | INFO | train_inner | epoch 001: 2547 / 3002 loss=2.875, ppl=7.34, wps=5920.8, ups=0.09, wpb=64775, bsz=128, num_updates=2527, lr=9.99878e-05, gnorm=2.418, loss_scale=1, train_wall=10, gb_free=2.8, wall=28206
2021-06-19 02:29:02 | INFO | train_inner | epoch 001: 2548 / 3002 loss=2.849, ppl=7.2, wps=5985.6, ups=0.09, wpb=64856, bsz=128, num_updates=2528, lr=9.99878e-05, gnorm=2.356, loss_scale=1, train_wall=10, gb_free=2.8, wall=28217
2021-06-19 02:29:13 | INFO | train_inner | epoch 001: 2549 / 3002 loss=2.856, ppl=7.24, wps=5859.8, ups=0.09, wpb=64813, bsz=128, num_updates=2529, lr=9.99878e-05, gnorm=2.465, loss_scale=1, train_wall=11, gb_free=2.8, wall=28228
2021-06-19 02:29:25 | INFO | train_inner | epoch 001: 2550 / 3002 loss=3.124, ppl=8.72, wps=5808.1, ups=0.09, wpb=64838, bsz=128, num_updates=2530, lr=9.99878e-05, gnorm=2.546, loss_scale=1, train_wall=11, gb_free=2.8, wall=28239
2021-06-19 02:29:36 | INFO | train_inner | epoch 001: 2551 / 3002 loss=3.072, ppl=8.41, wps=5715.2, ups=0.09, wpb=64783, bsz=128, num_updates=2531, lr=9.99878e-05, gnorm=2.406, loss_scale=1, train_wall=11, gb_free=2.8, wall=28250
2021-06-19 02:29:47 | INFO | train_inner | epoch 001: 2552 / 3002 loss=2.71, ppl=6.54, wps=5911.3, ups=0.09, wpb=64927, bsz=128, num_updates=2532, lr=9.99877e-05, gnorm=2.424, loss_scale=1, train_wall=11, gb_free=2.8, wall=28261
2021-06-19 02:29:58 | INFO | train_inner | epoch 001: 2553 / 3002 loss=2.948, ppl=7.72, wps=5946.5, ups=0.09, wpb=64927, bsz=128, num_updates=2533, lr=9.99877e-05, gnorm=2.642, loss_scale=1, train_wall=10, gb_free=2.8, wall=28272
2021-06-19 02:30:09 | INFO | train_inner | epoch 001: 2554 / 3002 loss=2.773, ppl=6.83, wps=5814.7, ups=0.09, wpb=64861, bsz=128, num_updates=2534, lr=9.99877e-05, gnorm=2.321, loss_scale=1, train_wall=11, gb_free=2.8, wall=28283
2021-06-19 02:30:20 | INFO | train_inner | epoch 001: 2555 / 3002 loss=3.042, ppl=8.24, wps=5771.4, ups=0.09, wpb=64769, bsz=128, num_updates=2535, lr=9.99877e-05, gnorm=2.504, loss_scale=1, train_wall=11, gb_free=2.8, wall=28295
2021-06-19 02:30:31 | INFO | train_inner | epoch 001: 2556 / 3002 loss=2.945, ppl=7.7, wps=5864.6, ups=0.09, wpb=64816, bsz=128, num_updates=2536, lr=9.99877e-05, gnorm=4.323, loss_scale=1, train_wall=11, gb_free=2.8, wall=28306
2021-06-19 02:30:42 | INFO | train_inner | epoch 001: 2557 / 3002 loss=3.109, ppl=8.63, wps=5893.5, ups=0.09, wpb=64797, bsz=128, num_updates=2537, lr=9.99877e-05, gnorm=2.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=28317
2021-06-19 02:30:53 | INFO | train_inner | epoch 001: 2558 / 3002 loss=2.995, ppl=7.97, wps=5838.9, ups=0.09, wpb=64899, bsz=128, num_updates=2538, lr=9.99877e-05, gnorm=2.509, loss_scale=1, train_wall=11, gb_free=2.8, wall=28328
2021-06-19 02:31:05 | INFO | train_inner | epoch 001: 2559 / 3002 loss=3.123, ppl=8.71, wps=5836, ups=0.09, wpb=64825, bsz=128, num_updates=2539, lr=9.99877e-05, gnorm=3.699, loss_scale=1, train_wall=11, gb_free=2.8, wall=28339
2021-06-19 02:31:16 | INFO | train_inner | epoch 001: 2560 / 3002 loss=2.917, ppl=7.55, wps=5783, ups=0.09, wpb=64786, bsz=128, num_updates=2540, lr=9.99877e-05, gnorm=2.679, loss_scale=1, train_wall=11, gb_free=2.8, wall=28350
2021-06-19 02:31:27 | INFO | train_inner | epoch 001: 2561 / 3002 loss=2.875, ppl=7.34, wps=5833.6, ups=0.09, wpb=64816, bsz=128, num_updates=2541, lr=9.99877e-05, gnorm=2.344, loss_scale=1, train_wall=11, gb_free=2.8, wall=28361
2021-06-19 02:31:38 | INFO | train_inner | epoch 001: 2562 / 3002 loss=2.868, ppl=7.3, wps=5942.9, ups=0.09, wpb=64866, bsz=128, num_updates=2542, lr=9.99877e-05, gnorm=2.277, loss_scale=1, train_wall=10, gb_free=2.8, wall=28372
2021-06-19 02:31:49 | INFO | train_inner | epoch 001: 2563 / 3002 loss=2.877, ppl=7.35, wps=5849.7, ups=0.09, wpb=64881, bsz=128, num_updates=2543, lr=9.99877e-05, gnorm=2.314, loss_scale=1, train_wall=11, gb_free=2.8, wall=28383
2021-06-19 02:32:00 | INFO | train_inner | epoch 001: 2564 / 3002 loss=3.08, ppl=8.46, wps=5824.8, ups=0.09, wpb=64765, bsz=128, num_updates=2544, lr=9.99876e-05, gnorm=72.555, loss_scale=2, train_wall=11, gb_free=2.8, wall=28394
2021-06-19 02:32:11 | INFO | train_inner | epoch 001: 2565 / 3002 loss=2.907, ppl=7.5, wps=5766.2, ups=0.09, wpb=64755, bsz=128, num_updates=2545, lr=9.99876e-05, gnorm=2.401, loss_scale=2, train_wall=11, gb_free=2.8, wall=28406
2021-06-19 02:32:22 | INFO | train_inner | epoch 001: 2566 / 3002 loss=2.807, ppl=7, wps=5750.8, ups=0.09, wpb=64821, bsz=128, num_updates=2546, lr=9.99876e-05, gnorm=2.855, loss_scale=2, train_wall=11, gb_free=2.8, wall=28417
2021-06-19 02:32:33 | INFO | train_inner | epoch 001: 2567 / 3002 loss=2.939, ppl=7.67, wps=5929.6, ups=0.09, wpb=64843, bsz=128, num_updates=2547, lr=9.99876e-05, gnorm=2.632, loss_scale=2, train_wall=10, gb_free=2.8, wall=28428
2021-06-19 02:32:45 | INFO | train_inner | epoch 001: 2568 / 3002 loss=2.887, ppl=7.4, wps=5823, ups=0.09, wpb=64811, bsz=128, num_updates=2548, lr=9.99876e-05, gnorm=2.544, loss_scale=2, train_wall=11, gb_free=2.8, wall=28439
2021-06-19 02:32:55 | INFO | train_inner | epoch 001: 2569 / 3002 loss=2.805, ppl=6.99, wps=5930, ups=0.09, wpb=64919, bsz=128, num_updates=2549, lr=9.99876e-05, gnorm=4.183, loss_scale=2, train_wall=10, gb_free=2.8, wall=28450
2021-06-19 02:33:06 | INFO | train_inner | epoch 001: 2570 / 3002 loss=2.88, ppl=7.36, wps=5904.9, ups=0.09, wpb=64798, bsz=128, num_updates=2550, lr=9.99876e-05, gnorm=3.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=28461
2021-06-19 02:33:17 | INFO | train_inner | epoch 001: 2571 / 3002 loss=2.948, ppl=7.72, wps=5918.2, ups=0.09, wpb=64869, bsz=128, num_updates=2551, lr=9.99876e-05, gnorm=2.716, loss_scale=2, train_wall=10, gb_free=2.8, wall=28472
2021-06-19 02:33:28 | INFO | train_inner | epoch 001: 2572 / 3002 loss=2.855, ppl=7.24, wps=5861, ups=0.09, wpb=64833, bsz=128, num_updates=2552, lr=9.99876e-05, gnorm=2.311, loss_scale=2, train_wall=11, gb_free=2.8, wall=28483
2021-06-19 02:33:40 | INFO | train_inner | epoch 001: 2573 / 3002 loss=3.122, ppl=8.71, wps=5736.4, ups=0.09, wpb=64780, bsz=128, num_updates=2553, lr=9.99876e-05, gnorm=3.072, loss_scale=2, train_wall=11, gb_free=2.8, wall=28494
2021-06-19 02:33:51 | INFO | train_inner | epoch 001: 2574 / 3002 loss=2.902, ppl=7.47, wps=5855.9, ups=0.09, wpb=64813, bsz=128, num_updates=2554, lr=9.99876e-05, gnorm=3.108, loss_scale=2, train_wall=11, gb_free=2.8, wall=28505
2021-06-19 02:34:02 | INFO | train_inner | epoch 001: 2575 / 3002 loss=3.034, ppl=8.19, wps=5856, ups=0.09, wpb=64826, bsz=128, num_updates=2555, lr=9.99876e-05, gnorm=2.575, loss_scale=2, train_wall=11, gb_free=2.8, wall=28516
2021-06-19 02:34:13 | INFO | train_inner | epoch 001: 2576 / 3002 loss=2.77, ppl=6.82, wps=5900.3, ups=0.09, wpb=64812, bsz=128, num_updates=2556, lr=9.99876e-05, gnorm=3.463, loss_scale=2, train_wall=10, gb_free=2.8, wall=28527
2021-06-19 02:34:24 | INFO | train_inner | epoch 001: 2577 / 3002 loss=2.828, ppl=7.1, wps=5894.4, ups=0.09, wpb=64827, bsz=128, num_updates=2557, lr=9.99875e-05, gnorm=2.493, loss_scale=2, train_wall=11, gb_free=2.8, wall=28538
2021-06-19 02:34:35 | INFO | train_inner | epoch 001: 2578 / 3002 loss=2.88, ppl=7.36, wps=5779, ups=0.09, wpb=64797, bsz=128, num_updates=2558, lr=9.99875e-05, gnorm=2.471, loss_scale=2, train_wall=11, gb_free=2.8, wall=28549
2021-06-19 02:34:46 | INFO | train_inner | epoch 001: 2579 / 3002 loss=2.859, ppl=7.25, wps=5907.5, ups=0.09, wpb=64868, bsz=128, num_updates=2559, lr=9.99875e-05, gnorm=2.764, loss_scale=2, train_wall=11, gb_free=2.8, wall=28560
2021-06-19 02:34:57 | INFO | train_inner | epoch 001: 2580 / 3002 loss=2.954, ppl=7.75, wps=5917.9, ups=0.09, wpb=64828, bsz=128, num_updates=2560, lr=9.99875e-05, gnorm=2.576, loss_scale=2, train_wall=10, gb_free=2.8, wall=28571
2021-06-19 02:35:08 | INFO | train_inner | epoch 001: 2581 / 3002 loss=2.98, ppl=7.89, wps=5844.4, ups=0.09, wpb=64831, bsz=128, num_updates=2561, lr=9.99875e-05, gnorm=2.642, loss_scale=2, train_wall=11, gb_free=2.8, wall=28582
2021-06-19 02:35:19 | INFO | train_inner | epoch 001: 2582 / 3002 loss=2.934, ppl=7.64, wps=5907.7, ups=0.09, wpb=64867, bsz=128, num_updates=2562, lr=9.99875e-05, gnorm=9.593, loss_scale=2, train_wall=11, gb_free=2.8, wall=28593
2021-06-19 02:35:30 | INFO | train_inner | epoch 001: 2583 / 3002 loss=2.873, ppl=7.32, wps=5924.6, ups=0.09, wpb=64832, bsz=128, num_updates=2563, lr=9.99875e-05, gnorm=6.625, loss_scale=2, train_wall=10, gb_free=2.8, wall=28604
2021-06-19 02:35:41 | INFO | train_inner | epoch 001: 2584 / 3002 loss=2.763, ppl=6.79, wps=5757.8, ups=0.09, wpb=64840, bsz=128, num_updates=2564, lr=9.99875e-05, gnorm=19.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=28616
2021-06-19 02:35:52 | INFO | train_inner | epoch 001: 2585 / 3002 loss=2.984, ppl=7.91, wps=5890.6, ups=0.09, wpb=64853, bsz=128, num_updates=2565, lr=9.99875e-05, gnorm=2.738, loss_scale=2, train_wall=11, gb_free=2.8, wall=28627
2021-06-19 02:36:03 | INFO | train_inner | epoch 001: 2586 / 3002 loss=3.057, ppl=8.32, wps=5805.2, ups=0.09, wpb=64807, bsz=128, num_updates=2566, lr=9.99875e-05, gnorm=4.539, loss_scale=2, train_wall=11, gb_free=2.8, wall=28638
2021-06-19 02:36:15 | INFO | train_inner | epoch 001: 2587 / 3002 loss=3.068, ppl=8.39, wps=5826.6, ups=0.09, wpb=64804, bsz=128, num_updates=2567, lr=9.99875e-05, gnorm=3.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=28649
2021-06-19 02:36:26 | INFO | train_inner | epoch 001: 2588 / 3002 loss=2.887, ppl=7.4, wps=5889.3, ups=0.09, wpb=64843, bsz=128, num_updates=2568, lr=9.99875e-05, gnorm=4.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=28660
2021-06-19 02:36:37 | INFO | train_inner | epoch 001: 2589 / 3002 loss=3.089, ppl=8.51, wps=5832.7, ups=0.09, wpb=64844, bsz=128, num_updates=2569, lr=9.99874e-05, gnorm=2.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=28671
2021-06-19 02:36:48 | INFO | train_inner | epoch 001: 2590 / 3002 loss=2.954, ppl=7.75, wps=5981.2, ups=0.09, wpb=64822, bsz=128, num_updates=2570, lr=9.99874e-05, gnorm=2.924, loss_scale=2, train_wall=10, gb_free=2.8, wall=28682
2021-06-19 02:36:59 | INFO | train_inner | epoch 001: 2591 / 3002 loss=3.007, ppl=8.04, wps=5897.9, ups=0.09, wpb=64853, bsz=128, num_updates=2571, lr=9.99874e-05, gnorm=3.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=28693
2021-06-19 02:37:10 | INFO | train_inner | epoch 001: 2592 / 3002 loss=2.999, ppl=7.99, wps=5779.2, ups=0.09, wpb=64774, bsz=128, num_updates=2572, lr=9.99874e-05, gnorm=2.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=28704
2021-06-19 02:37:21 | INFO | train_inner | epoch 001: 2593 / 3002 loss=2.905, ppl=7.49, wps=5901.8, ups=0.09, wpb=64867, bsz=128, num_updates=2573, lr=9.99874e-05, gnorm=2.742, loss_scale=2, train_wall=11, gb_free=2.8, wall=28715
2021-06-19 02:37:32 | INFO | train_inner | epoch 001: 2594 / 3002 loss=2.818, ppl=7.05, wps=5853.1, ups=0.09, wpb=64824, bsz=128, num_updates=2574, lr=9.99874e-05, gnorm=2.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=28726
2021-06-19 02:37:43 | INFO | train_inner | epoch 001: 2595 / 3002 loss=2.979, ppl=7.88, wps=5904, ups=0.09, wpb=64840, bsz=128, num_updates=2575, lr=9.99874e-05, gnorm=2.68, loss_scale=2, train_wall=11, gb_free=2.8, wall=28737
2021-06-19 02:37:54 | INFO | train_inner | epoch 001: 2596 / 3002 loss=3.049, ppl=8.27, wps=5935.9, ups=0.09, wpb=64862, bsz=128, num_updates=2576, lr=9.99874e-05, gnorm=5.816, loss_scale=2, train_wall=10, gb_free=2.8, wall=28748
2021-06-19 02:38:05 | INFO | train_inner | epoch 001: 2597 / 3002 loss=2.898, ppl=7.45, wps=5834.1, ups=0.09, wpb=64758, bsz=128, num_updates=2577, lr=9.99874e-05, gnorm=3.221, loss_scale=2, train_wall=11, gb_free=2.8, wall=28759
2021-06-19 02:38:16 | INFO | train_inner | epoch 001: 2598 / 3002 loss=2.905, ppl=7.49, wps=5794.8, ups=0.09, wpb=64791, bsz=128, num_updates=2578, lr=9.99874e-05, gnorm=3.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=28770
2021-06-19 02:38:27 | INFO | train_inner | epoch 001: 2599 / 3002 loss=2.894, ppl=7.43, wps=5911.3, ups=0.09, wpb=64796, bsz=128, num_updates=2579, lr=9.99874e-05, gnorm=3.018, loss_scale=2, train_wall=10, gb_free=2.8, wall=28781
2021-06-19 02:38:38 | INFO | train_inner | epoch 001: 2600 / 3002 loss=3.036, ppl=8.2, wps=5866, ups=0.09, wpb=64836, bsz=128, num_updates=2580, lr=9.99874e-05, gnorm=2.726, loss_scale=2, train_wall=11, gb_free=2.8, wall=28792
2021-06-19 02:38:49 | INFO | train_inner | epoch 001: 2601 / 3002 loss=3.002, ppl=8.01, wps=6005.8, ups=0.09, wpb=64901, bsz=128, num_updates=2581, lr=9.99874e-05, gnorm=2.668, loss_scale=2, train_wall=10, gb_free=2.8, wall=28803
2021-06-19 02:39:00 | INFO | train_inner | epoch 001: 2602 / 3002 loss=3.058, ppl=8.33, wps=5873.6, ups=0.09, wpb=64817, bsz=128, num_updates=2582, lr=9.99873e-05, gnorm=2.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=28814
2021-06-19 02:39:11 | INFO | train_inner | epoch 001: 2603 / 3002 loss=2.731, ppl=6.64, wps=5819.7, ups=0.09, wpb=64791, bsz=128, num_updates=2583, lr=9.99873e-05, gnorm=2.375, loss_scale=2, train_wall=11, gb_free=2.8, wall=28825
2021-06-19 02:39:22 | INFO | train_inner | epoch 001: 2604 / 3002 loss=2.976, ppl=7.87, wps=5937.5, ups=0.09, wpb=64863, bsz=128, num_updates=2584, lr=9.99873e-05, gnorm=5.467, loss_scale=2, train_wall=10, gb_free=2.8, wall=28836
2021-06-19 02:39:33 | INFO | train_inner | epoch 001: 2605 / 3002 loss=2.962, ppl=7.79, wps=5796, ups=0.09, wpb=64878, bsz=128, num_updates=2585, lr=9.99873e-05, gnorm=3.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=28847
2021-06-19 02:39:44 | INFO | train_inner | epoch 001: 2606 / 3002 loss=2.928, ppl=7.61, wps=5868.2, ups=0.09, wpb=64804, bsz=128, num_updates=2586, lr=9.99873e-05, gnorm=2.739, loss_scale=2, train_wall=11, gb_free=2.8, wall=28859
2021-06-19 02:39:55 | INFO | train_inner | epoch 001: 2607 / 3002 loss=2.9, ppl=7.46, wps=5822.3, ups=0.09, wpb=64741, bsz=128, num_updates=2587, lr=9.99873e-05, gnorm=2.623, loss_scale=2, train_wall=11, gb_free=2.8, wall=28870
2021-06-19 02:40:06 | INFO | train_inner | epoch 001: 2608 / 3002 loss=2.876, ppl=7.34, wps=5800.5, ups=0.09, wpb=64798, bsz=128, num_updates=2588, lr=9.99873e-05, gnorm=2.411, loss_scale=2, train_wall=11, gb_free=2.8, wall=28881
2021-06-19 02:40:18 | INFO | train_inner | epoch 001: 2609 / 3002 loss=2.969, ppl=7.83, wps=5818.5, ups=0.09, wpb=64831, bsz=128, num_updates=2589, lr=9.99873e-05, gnorm=3.398, loss_scale=2, train_wall=11, gb_free=2.8, wall=28892
2021-06-19 02:40:29 | INFO | train_inner | epoch 001: 2610 / 3002 loss=3.008, ppl=8.05, wps=5958.3, ups=0.09, wpb=64889, bsz=128, num_updates=2590, lr=9.99873e-05, gnorm=2.834, loss_scale=2, train_wall=10, gb_free=2.8, wall=28903
2021-06-19 02:40:39 | INFO | train_inner | epoch 001: 2611 / 3002 loss=2.893, ppl=7.43, wps=6002, ups=0.09, wpb=64808, bsz=128, num_updates=2591, lr=9.99873e-05, gnorm=4.431, loss_scale=2, train_wall=10, gb_free=2.8, wall=28914
2021-06-19 02:40:50 | INFO | train_inner | epoch 001: 2612 / 3002 loss=2.739, ppl=6.68, wps=6020, ups=0.09, wpb=64903, bsz=128, num_updates=2592, lr=9.99873e-05, gnorm=2.485, loss_scale=2, train_wall=10, gb_free=2.8, wall=28924
2021-06-19 02:41:01 | INFO | train_inner | epoch 001: 2613 / 3002 loss=2.846, ppl=7.19, wps=5834, ups=0.09, wpb=64863, bsz=128, num_updates=2593, lr=9.99873e-05, gnorm=2.459, loss_scale=2, train_wall=11, gb_free=2.8, wall=28936
2021-06-19 02:41:12 | INFO | train_inner | epoch 001: 2614 / 3002 loss=2.938, ppl=7.66, wps=5835.4, ups=0.09, wpb=64847, bsz=128, num_updates=2594, lr=9.99872e-05, gnorm=3.098, loss_scale=2, train_wall=11, gb_free=2.8, wall=28947
2021-06-19 02:41:23 | INFO | train_inner | epoch 001: 2615 / 3002 loss=2.806, ppl=6.99, wps=5895.4, ups=0.09, wpb=64872, bsz=128, num_updates=2595, lr=9.99872e-05, gnorm=2.44, loss_scale=2, train_wall=11, gb_free=2.8, wall=28958
2021-06-19 02:41:34 | INFO | train_inner | epoch 001: 2616 / 3002 loss=3.054, ppl=8.3, wps=5851.9, ups=0.09, wpb=64761, bsz=128, num_updates=2596, lr=9.99872e-05, gnorm=2.655, loss_scale=2, train_wall=11, gb_free=2.8, wall=28969
2021-06-19 02:41:45 | INFO | train_inner | epoch 001: 2617 / 3002 loss=2.941, ppl=7.68, wps=5848.6, ups=0.09, wpb=64852, bsz=128, num_updates=2597, lr=9.99872e-05, gnorm=2.408, loss_scale=2, train_wall=11, gb_free=2.8, wall=28980
2021-06-19 02:41:56 | INFO | train_inner | epoch 001: 2618 / 3002 loss=2.847, ppl=7.2, wps=5946.2, ups=0.09, wpb=64951, bsz=128, num_updates=2598, lr=9.99872e-05, gnorm=2.443, loss_scale=2, train_wall=10, gb_free=2.8, wall=28991
2021-06-19 02:42:08 | INFO | train_inner | epoch 001: 2619 / 3002 loss=2.827, ppl=7.1, wps=5780, ups=0.09, wpb=64824, bsz=128, num_updates=2599, lr=9.99872e-05, gnorm=7.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=29002
2021-06-19 02:42:19 | INFO | train_inner | epoch 001: 2620 / 3002 loss=2.844, ppl=7.18, wps=5820, ups=0.09, wpb=64796, bsz=128, num_updates=2600, lr=9.99872e-05, gnorm=2.564, loss_scale=2, train_wall=11, gb_free=2.8, wall=29013
2021-06-19 02:42:30 | INFO | train_inner | epoch 001: 2621 / 3002 loss=3.009, ppl=8.05, wps=5872.6, ups=0.09, wpb=64788, bsz=128, num_updates=2601, lr=9.99872e-05, gnorm=2.762, loss_scale=2, train_wall=11, gb_free=2.8, wall=29024
2021-06-19 02:42:41 | INFO | train_inner | epoch 001: 2622 / 3002 loss=3.011, ppl=8.06, wps=5886.6, ups=0.09, wpb=64825, bsz=128, num_updates=2602, lr=9.99872e-05, gnorm=2.451, loss_scale=2, train_wall=11, gb_free=2.8, wall=29035
2021-06-19 02:42:52 | INFO | train_inner | epoch 001: 2623 / 3002 loss=3.038, ppl=8.22, wps=5944.4, ups=0.09, wpb=64922, bsz=128, num_updates=2603, lr=9.99872e-05, gnorm=2.436, loss_scale=2, train_wall=10, gb_free=2.8, wall=29046
2021-06-19 02:43:03 | INFO | train_inner | epoch 001: 2624 / 3002 loss=2.883, ppl=7.38, wps=5865.3, ups=0.09, wpb=64771, bsz=128, num_updates=2604, lr=9.99872e-05, gnorm=2.419, loss_scale=2, train_wall=11, gb_free=2.8, wall=29057
2021-06-19 02:43:14 | INFO | train_inner | epoch 001: 2625 / 3002 loss=2.945, ppl=7.7, wps=5985.5, ups=0.09, wpb=64856, bsz=128, num_updates=2605, lr=9.99872e-05, gnorm=2.645, loss_scale=2, train_wall=10, gb_free=2.8, wall=29068
2021-06-19 02:43:25 | INFO | train_inner | epoch 001: 2626 / 3002 loss=2.984, ppl=7.91, wps=5925.5, ups=0.09, wpb=64938, bsz=128, num_updates=2606, lr=9.99872e-05, gnorm=3.794, loss_scale=2, train_wall=11, gb_free=2.8, wall=29079
2021-06-19 02:43:36 | INFO | train_inner | epoch 001: 2627 / 3002 loss=3.073, ppl=8.42, wps=5791.9, ups=0.09, wpb=64828, bsz=128, num_updates=2607, lr=9.99871e-05, gnorm=2.517, loss_scale=2, train_wall=11, gb_free=2.8, wall=29090
2021-06-19 02:43:47 | INFO | train_inner | epoch 001: 2628 / 3002 loss=2.769, ppl=6.81, wps=5808.7, ups=0.09, wpb=64807, bsz=128, num_updates=2608, lr=9.99871e-05, gnorm=2.385, loss_scale=2, train_wall=11, gb_free=2.8, wall=29101
2021-06-19 02:43:58 | INFO | train_inner | epoch 001: 2629 / 3002 loss=2.81, ppl=7.01, wps=5948.9, ups=0.09, wpb=64902, bsz=128, num_updates=2609, lr=9.99871e-05, gnorm=2.413, loss_scale=2, train_wall=10, gb_free=2.8, wall=29112
2021-06-19 02:44:09 | INFO | train_inner | epoch 001: 2630 / 3002 loss=2.832, ppl=7.12, wps=5760.2, ups=0.09, wpb=64801, bsz=128, num_updates=2610, lr=9.99871e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=29123
2021-06-19 02:44:20 | INFO | train_inner | epoch 001: 2631 / 3002 loss=2.782, ppl=6.88, wps=5838.8, ups=0.09, wpb=64917, bsz=128, num_updates=2611, lr=9.99871e-05, gnorm=2.598, loss_scale=2, train_wall=11, gb_free=2.8, wall=29135
2021-06-19 02:44:31 | INFO | train_inner | epoch 001: 2632 / 3002 loss=2.806, ppl=6.99, wps=5855.4, ups=0.09, wpb=64958, bsz=128, num_updates=2612, lr=9.99871e-05, gnorm=2.45, loss_scale=2, train_wall=11, gb_free=2.8, wall=29146
2021-06-19 02:44:42 | INFO | train_inner | epoch 001: 2633 / 3002 loss=2.841, ppl=7.17, wps=5854.5, ups=0.09, wpb=64831, bsz=128, num_updates=2613, lr=9.99871e-05, gnorm=2.377, loss_scale=2, train_wall=11, gb_free=2.8, wall=29157
2021-06-19 02:44:53 | INFO | train_inner | epoch 001: 2634 / 3002 loss=2.814, ppl=7.03, wps=5851, ups=0.09, wpb=64807, bsz=128, num_updates=2614, lr=9.99871e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=29168
2021-06-19 02:45:05 | INFO | train_inner | epoch 001: 2635 / 3002 loss=2.945, ppl=7.7, wps=5803.3, ups=0.09, wpb=64764, bsz=128, num_updates=2615, lr=9.99871e-05, gnorm=2.438, loss_scale=2, train_wall=11, gb_free=2.8, wall=29179
2021-06-19 02:45:16 | INFO | train_inner | epoch 001: 2636 / 3002 loss=3.058, ppl=8.33, wps=5859.7, ups=0.09, wpb=64835, bsz=128, num_updates=2616, lr=9.99871e-05, gnorm=2.46, loss_scale=2, train_wall=11, gb_free=2.8, wall=29190
2021-06-19 02:45:27 | INFO | train_inner | epoch 001: 2637 / 3002 loss=2.893, ppl=7.43, wps=5819.7, ups=0.09, wpb=64812, bsz=128, num_updates=2617, lr=9.99871e-05, gnorm=2.533, loss_scale=2, train_wall=11, gb_free=2.8, wall=29201
2021-06-19 02:45:38 | INFO | train_inner | epoch 001: 2638 / 3002 loss=3.038, ppl=8.22, wps=5933.6, ups=0.09, wpb=64809, bsz=128, num_updates=2618, lr=9.99871e-05, gnorm=2.318, loss_scale=2, train_wall=10, gb_free=2.8, wall=29212
2021-06-19 02:45:49 | INFO | train_inner | epoch 001: 2639 / 3002 loss=2.854, ppl=7.23, wps=5986.5, ups=0.09, wpb=64907, bsz=128, num_updates=2619, lr=9.9987e-05, gnorm=2.405, loss_scale=2, train_wall=10, gb_free=2.8, wall=29223
2021-06-19 02:46:00 | INFO | train_inner | epoch 001: 2640 / 3002 loss=2.807, ppl=7, wps=5896.1, ups=0.09, wpb=64833, bsz=128, num_updates=2620, lr=9.9987e-05, gnorm=2.371, loss_scale=2, train_wall=11, gb_free=2.8, wall=29234
2021-06-19 02:46:11 | INFO | train_inner | epoch 001: 2641 / 3002 loss=2.902, ppl=7.47, wps=5821.5, ups=0.09, wpb=64812, bsz=128, num_updates=2621, lr=9.9987e-05, gnorm=2.406, loss_scale=2, train_wall=11, gb_free=2.8, wall=29245
2021-06-19 02:46:21 | INFO | train_inner | epoch 001: 2642 / 3002 loss=3.054, ppl=8.31, wps=6009.7, ups=0.09, wpb=64888, bsz=128, num_updates=2622, lr=9.9987e-05, gnorm=2.422, loss_scale=2, train_wall=10, gb_free=2.8, wall=29256
2021-06-19 02:46:32 | INFO | train_inner | epoch 001: 2643 / 3002 loss=2.916, ppl=7.55, wps=5892.6, ups=0.09, wpb=64804, bsz=128, num_updates=2623, lr=9.9987e-05, gnorm=2.495, loss_scale=2, train_wall=11, gb_free=2.8, wall=29267
2021-06-19 02:46:44 | INFO | train_inner | epoch 001: 2644 / 3002 loss=3.161, ppl=8.95, wps=5793.4, ups=0.09, wpb=64829, bsz=128, num_updates=2624, lr=9.9987e-05, gnorm=2.495, loss_scale=2, train_wall=11, gb_free=2.8, wall=29278
2021-06-19 02:46:55 | INFO | train_inner | epoch 001: 2645 / 3002 loss=3.124, ppl=8.72, wps=5888.7, ups=0.09, wpb=64780, bsz=128, num_updates=2625, lr=9.9987e-05, gnorm=2.483, loss_scale=2, train_wall=11, gb_free=2.8, wall=29289
2021-06-19 02:47:06 | INFO | train_inner | epoch 001: 2646 / 3002 loss=2.92, ppl=7.57, wps=5774.6, ups=0.09, wpb=64721, bsz=128, num_updates=2626, lr=9.9987e-05, gnorm=2.748, loss_scale=2, train_wall=11, gb_free=2.8, wall=29300
2021-06-19 02:47:17 | INFO | train_inner | epoch 001: 2647 / 3002 loss=2.775, ppl=6.85, wps=5898.8, ups=0.09, wpb=64903, bsz=128, num_updates=2627, lr=9.9987e-05, gnorm=2.348, loss_scale=2, train_wall=11, gb_free=2.8, wall=29311
2021-06-19 02:47:28 | INFO | train_inner | epoch 001: 2648 / 3002 loss=2.847, ppl=7.19, wps=6029.8, ups=0.09, wpb=64826, bsz=128, num_updates=2628, lr=9.9987e-05, gnorm=2.398, loss_scale=2, train_wall=10, gb_free=2.8, wall=29322
2021-06-19 02:47:39 | INFO | train_inner | epoch 001: 2649 / 3002 loss=2.885, ppl=7.39, wps=5888.1, ups=0.09, wpb=64830, bsz=128, num_updates=2629, lr=9.9987e-05, gnorm=2.709, loss_scale=2, train_wall=11, gb_free=2.8, wall=29333
2021-06-19 02:47:50 | INFO | train_inner | epoch 001: 2650 / 3002 loss=3.018, ppl=8.1, wps=5847.4, ups=0.09, wpb=64791, bsz=128, num_updates=2630, lr=9.9987e-05, gnorm=2.399, loss_scale=2, train_wall=11, gb_free=2.8, wall=29344
2021-06-19 02:48:01 | INFO | train_inner | epoch 001: 2651 / 3002 loss=2.853, ppl=7.23, wps=5925.1, ups=0.09, wpb=64825, bsz=128, num_updates=2631, lr=9.9987e-05, gnorm=2.468, loss_scale=2, train_wall=10, gb_free=2.8, wall=29355
2021-06-19 02:48:12 | INFO | train_inner | epoch 001: 2652 / 3002 loss=2.977, ppl=7.87, wps=5903.2, ups=0.09, wpb=64830, bsz=128, num_updates=2632, lr=9.99869e-05, gnorm=2.625, loss_scale=2, train_wall=11, gb_free=2.8, wall=29366
2021-06-19 02:48:23 | INFO | train_inner | epoch 001: 2653 / 3002 loss=2.911, ppl=7.52, wps=5805, ups=0.09, wpb=64850, bsz=128, num_updates=2633, lr=9.99869e-05, gnorm=2.76, loss_scale=2, train_wall=11, gb_free=2.8, wall=29377
2021-06-19 02:48:34 | INFO | train_inner | epoch 001: 2654 / 3002 loss=3.016, ppl=8.09, wps=5934.2, ups=0.09, wpb=64863, bsz=128, num_updates=2634, lr=9.99869e-05, gnorm=2.377, loss_scale=2, train_wall=10, gb_free=2.8, wall=29388
2021-06-19 02:48:45 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-19 02:48:56 | INFO | train_inner | epoch 001: 2656 / 3002 loss=2.949, ppl=7.72, wps=2944.6, ups=0.05, wpb=64860, bsz=128, num_updates=2635, lr=9.99869e-05, gnorm=2.36, loss_scale=1, train_wall=21, gb_free=2.8, wall=29410
2021-06-19 02:49:07 | INFO | train_inner | epoch 001: 2657 / 3002 loss=2.742, ppl=6.69, wps=5867, ups=0.09, wpb=64893, bsz=128, num_updates=2636, lr=9.99869e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=29421
2021-06-19 02:49:18 | INFO | train_inner | epoch 001: 2658 / 3002 loss=2.903, ppl=7.48, wps=5741.5, ups=0.09, wpb=64868, bsz=128, num_updates=2637, lr=9.99869e-05, gnorm=2.871, loss_scale=1, train_wall=11, gb_free=2.8, wall=29432
2021-06-19 02:49:29 | INFO | train_inner | epoch 001: 2659 / 3002 loss=2.903, ppl=7.48, wps=5766.9, ups=0.09, wpb=64814, bsz=128, num_updates=2638, lr=9.99869e-05, gnorm=6.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=29444
2021-06-19 02:49:40 | INFO | train_inner | epoch 001: 2660 / 3002 loss=2.896, ppl=7.44, wps=5893.2, ups=0.09, wpb=64906, bsz=128, num_updates=2639, lr=9.99869e-05, gnorm=2.519, loss_scale=1, train_wall=11, gb_free=2.8, wall=29455
2021-06-19 02:49:51 | INFO | train_inner | epoch 001: 2661 / 3002 loss=2.717, ppl=6.57, wps=5963.9, ups=0.09, wpb=64894, bsz=128, num_updates=2640, lr=9.99869e-05, gnorm=2.388, loss_scale=1, train_wall=10, gb_free=2.8, wall=29466
2021-06-19 02:50:02 | INFO | train_inner | epoch 001: 2662 / 3002 loss=2.909, ppl=7.51, wps=5889.4, ups=0.09, wpb=64836, bsz=128, num_updates=2641, lr=9.99869e-05, gnorm=2.646, loss_scale=1, train_wall=11, gb_free=2.8, wall=29477
2021-06-19 02:50:13 | INFO | train_inner | epoch 001: 2663 / 3002 loss=2.748, ppl=6.72, wps=5839.7, ups=0.09, wpb=64868, bsz=128, num_updates=2642, lr=9.99869e-05, gnorm=2.329, loss_scale=1, train_wall=11, gb_free=2.8, wall=29488
2021-06-19 02:50:25 | INFO | train_inner | epoch 001: 2664 / 3002 loss=2.764, ppl=6.79, wps=5825.8, ups=0.09, wpb=64787, bsz=128, num_updates=2643, lr=9.99869e-05, gnorm=2.434, loss_scale=1, train_wall=11, gb_free=2.8, wall=29499
2021-06-19 02:50:36 | INFO | train_inner | epoch 001: 2665 / 3002 loss=2.989, ppl=7.94, wps=5767.3, ups=0.09, wpb=64813, bsz=128, num_updates=2644, lr=9.99868e-05, gnorm=2.533, loss_scale=1, train_wall=11, gb_free=2.8, wall=29510
2021-06-19 02:50:47 | INFO | train_inner | epoch 001: 2666 / 3002 loss=3.142, ppl=8.83, wps=5826.4, ups=0.09, wpb=64876, bsz=128, num_updates=2645, lr=9.99868e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=29521
2021-06-19 02:50:58 | INFO | train_inner | epoch 001: 2667 / 3002 loss=2.813, ppl=7.03, wps=5884.3, ups=0.09, wpb=64855, bsz=128, num_updates=2646, lr=9.99868e-05, gnorm=2.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=29532
2021-06-19 02:51:09 | INFO | train_inner | epoch 001: 2668 / 3002 loss=3.074, ppl=8.42, wps=5806.5, ups=0.09, wpb=64807, bsz=128, num_updates=2647, lr=9.99868e-05, gnorm=2.389, loss_scale=1, train_wall=11, gb_free=2.8, wall=29543
2021-06-19 02:51:20 | INFO | train_inner | epoch 001: 2669 / 3002 loss=2.923, ppl=7.59, wps=6038.8, ups=0.09, wpb=64832, bsz=128, num_updates=2648, lr=9.99868e-05, gnorm=2.718, loss_scale=1, train_wall=10, gb_free=2.8, wall=29554
2021-06-19 02:51:31 | INFO | train_inner | epoch 001: 2670 / 3002 loss=2.858, ppl=7.25, wps=5951.1, ups=0.09, wpb=64810, bsz=128, num_updates=2649, lr=9.99868e-05, gnorm=4.241, loss_scale=1, train_wall=10, gb_free=2.8, wall=29565
2021-06-19 02:51:42 | INFO | train_inner | epoch 001: 2671 / 3002 loss=2.94, ppl=7.67, wps=5840.1, ups=0.09, wpb=64765, bsz=128, num_updates=2650, lr=9.99868e-05, gnorm=2.36, loss_scale=1, train_wall=11, gb_free=2.8, wall=29576
2021-06-19 02:51:53 | INFO | train_inner | epoch 001: 2672 / 3002 loss=2.849, ppl=7.2, wps=5814.2, ups=0.09, wpb=64882, bsz=128, num_updates=2651, lr=9.99868e-05, gnorm=2.423, loss_scale=1, train_wall=11, gb_free=2.8, wall=29587
2021-06-19 02:52:04 | INFO | train_inner | epoch 001: 2673 / 3002 loss=2.716, ppl=6.57, wps=5930.5, ups=0.09, wpb=64927, bsz=128, num_updates=2652, lr=9.99868e-05, gnorm=2.26, loss_scale=1, train_wall=11, gb_free=2.8, wall=29598
2021-06-19 02:52:15 | INFO | train_inner | epoch 001: 2674 / 3002 loss=2.954, ppl=7.75, wps=5895.6, ups=0.09, wpb=64857, bsz=128, num_updates=2653, lr=9.99868e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=29609
2021-06-19 02:52:26 | INFO | train_inner | epoch 001: 2675 / 3002 loss=3.003, ppl=8.02, wps=5912.6, ups=0.09, wpb=64792, bsz=128, num_updates=2654, lr=9.99868e-05, gnorm=3.632, loss_scale=1, train_wall=11, gb_free=2.8, wall=29620
2021-06-19 02:52:37 | INFO | train_inner | epoch 001: 2676 / 3002 loss=2.886, ppl=7.39, wps=5916.6, ups=0.09, wpb=64850, bsz=128, num_updates=2655, lr=9.99868e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=29631
2021-06-19 02:52:48 | INFO | train_inner | epoch 001: 2677 / 3002 loss=3.069, ppl=8.39, wps=5788.5, ups=0.09, wpb=64736, bsz=128, num_updates=2656, lr=9.99868e-05, gnorm=2.319, loss_scale=1, train_wall=11, gb_free=2.8, wall=29642
2021-06-19 02:52:59 | INFO | train_inner | epoch 001: 2678 / 3002 loss=2.939, ppl=7.67, wps=6112.7, ups=0.09, wpb=64904, bsz=128, num_updates=2657, lr=9.99867e-05, gnorm=2.464, loss_scale=1, train_wall=10, gb_free=2.8, wall=29653
2021-06-19 02:53:10 | INFO | train_inner | epoch 001: 2679 / 3002 loss=2.788, ppl=6.91, wps=5826.2, ups=0.09, wpb=64829, bsz=128, num_updates=2658, lr=9.99867e-05, gnorm=2.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=29664
2021-06-19 02:53:21 | INFO | train_inner | epoch 001: 2680 / 3002 loss=2.875, ppl=7.33, wps=5834, ups=0.09, wpb=64884, bsz=128, num_updates=2659, lr=9.99867e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=29675
2021-06-19 02:53:32 | INFO | train_inner | epoch 001: 2681 / 3002 loss=2.812, ppl=7.02, wps=5883.7, ups=0.09, wpb=64879, bsz=128, num_updates=2660, lr=9.99867e-05, gnorm=2.339, loss_scale=1, train_wall=11, gb_free=2.8, wall=29686
2021-06-19 02:53:43 | INFO | train_inner | epoch 001: 2682 / 3002 loss=2.945, ppl=7.7, wps=5901.1, ups=0.09, wpb=64821, bsz=128, num_updates=2661, lr=9.99867e-05, gnorm=2.525, loss_scale=1, train_wall=11, gb_free=2.8, wall=29697
2021-06-19 02:53:54 | INFO | train_inner | epoch 001: 2683 / 3002 loss=2.98, ppl=7.89, wps=5842.5, ups=0.09, wpb=64900, bsz=128, num_updates=2662, lr=9.99867e-05, gnorm=2.36, loss_scale=1, train_wall=11, gb_free=2.8, wall=29708
2021-06-19 02:54:05 | INFO | train_inner | epoch 001: 2684 / 3002 loss=2.833, ppl=7.12, wps=5878.5, ups=0.09, wpb=64820, bsz=128, num_updates=2663, lr=9.99867e-05, gnorm=2.569, loss_scale=1, train_wall=11, gb_free=2.8, wall=29719
2021-06-19 02:54:16 | INFO | train_inner | epoch 001: 2685 / 3002 loss=2.873, ppl=7.32, wps=5884.1, ups=0.09, wpb=64828, bsz=128, num_updates=2664, lr=9.99867e-05, gnorm=2.341, loss_scale=1, train_wall=11, gb_free=2.8, wall=29730
2021-06-19 02:54:27 | INFO | train_inner | epoch 001: 2686 / 3002 loss=3.003, ppl=8.01, wps=5753.7, ups=0.09, wpb=64733, bsz=128, num_updates=2665, lr=9.99867e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=29742
2021-06-19 02:54:38 | INFO | train_inner | epoch 001: 2687 / 3002 loss=2.848, ppl=7.2, wps=5857.9, ups=0.09, wpb=64852, bsz=128, num_updates=2666, lr=9.99867e-05, gnorm=2.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=29753
2021-06-19 02:54:49 | INFO | train_inner | epoch 001: 2688 / 3002 loss=2.899, ppl=7.46, wps=5832.8, ups=0.09, wpb=64909, bsz=128, num_updates=2667, lr=9.99867e-05, gnorm=2.331, loss_scale=1, train_wall=11, gb_free=2.8, wall=29764
2021-06-19 02:55:01 | INFO | train_inner | epoch 001: 2689 / 3002 loss=2.984, ppl=7.91, wps=5841.4, ups=0.09, wpb=64851, bsz=128, num_updates=2668, lr=9.99867e-05, gnorm=2.687, loss_scale=1, train_wall=11, gb_free=2.8, wall=29775
2021-06-19 02:55:12 | INFO | train_inner | epoch 001: 2690 / 3002 loss=2.699, ppl=6.49, wps=5826.7, ups=0.09, wpb=64900, bsz=128, num_updates=2669, lr=9.99866e-05, gnorm=2.253, loss_scale=1, train_wall=11, gb_free=2.8, wall=29786
2021-06-19 02:55:23 | INFO | train_inner | epoch 001: 2691 / 3002 loss=2.806, ppl=6.99, wps=5866.6, ups=0.09, wpb=64851, bsz=128, num_updates=2670, lr=9.99866e-05, gnorm=2.375, loss_scale=1, train_wall=11, gb_free=2.8, wall=29797
2021-06-19 02:55:34 | INFO | train_inner | epoch 001: 2692 / 3002 loss=2.907, ppl=7.5, wps=5878.6, ups=0.09, wpb=64778, bsz=128, num_updates=2671, lr=9.99866e-05, gnorm=2.36, loss_scale=1, train_wall=11, gb_free=2.8, wall=29808
2021-06-19 02:55:45 | INFO | train_inner | epoch 001: 2693 / 3002 loss=2.809, ppl=7.01, wps=5926.4, ups=0.09, wpb=64806, bsz=128, num_updates=2672, lr=9.99866e-05, gnorm=2.428, loss_scale=1, train_wall=11, gb_free=2.8, wall=29819
2021-06-19 02:55:56 | INFO | train_inner | epoch 001: 2694 / 3002 loss=3.041, ppl=8.23, wps=5979.6, ups=0.09, wpb=64872, bsz=128, num_updates=2673, lr=9.99866e-05, gnorm=2.511, loss_scale=1, train_wall=10, gb_free=2.8, wall=29830
2021-06-19 02:56:07 | INFO | train_inner | epoch 001: 2695 / 3002 loss=2.698, ppl=6.49, wps=5709.7, ups=0.09, wpb=64843, bsz=128, num_updates=2674, lr=9.99866e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=29841
2021-06-19 02:56:18 | INFO | train_inner | epoch 001: 2696 / 3002 loss=2.892, ppl=7.42, wps=5948, ups=0.09, wpb=64842, bsz=128, num_updates=2675, lr=9.99866e-05, gnorm=2.418, loss_scale=1, train_wall=10, gb_free=2.8, wall=29852
2021-06-19 02:56:29 | INFO | train_inner | epoch 001: 2697 / 3002 loss=2.976, ppl=7.87, wps=5805, ups=0.09, wpb=64841, bsz=128, num_updates=2676, lr=9.99866e-05, gnorm=2.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=29863
2021-06-19 02:56:40 | INFO | train_inner | epoch 001: 2698 / 3002 loss=3.005, ppl=8.03, wps=5869.8, ups=0.09, wpb=64923, bsz=128, num_updates=2677, lr=9.99866e-05, gnorm=2.539, loss_scale=1, train_wall=11, gb_free=2.8, wall=29874
2021-06-19 02:56:51 | INFO | train_inner | epoch 001: 2699 / 3002 loss=2.866, ppl=7.29, wps=5891.5, ups=0.09, wpb=64819, bsz=128, num_updates=2678, lr=9.99866e-05, gnorm=5.749, loss_scale=1, train_wall=11, gb_free=2.8, wall=29885
2021-06-19 02:57:02 | INFO | train_inner | epoch 001: 2700 / 3002 loss=2.933, ppl=7.64, wps=5877.5, ups=0.09, wpb=64813, bsz=128, num_updates=2679, lr=9.99866e-05, gnorm=2.353, loss_scale=1, train_wall=11, gb_free=2.8, wall=29896
2021-06-19 02:57:13 | INFO | train_inner | epoch 001: 2701 / 3002 loss=2.899, ppl=7.46, wps=5843.1, ups=0.09, wpb=64844, bsz=128, num_updates=2680, lr=9.99866e-05, gnorm=2.341, loss_scale=1, train_wall=11, gb_free=2.8, wall=29908
2021-06-19 02:57:24 | INFO | train_inner | epoch 001: 2702 / 3002 loss=3.027, ppl=8.15, wps=5751.6, ups=0.09, wpb=64858, bsz=128, num_updates=2681, lr=9.99866e-05, gnorm=2.388, loss_scale=1, train_wall=11, gb_free=2.8, wall=29919
2021-06-19 02:57:36 | INFO | train_inner | epoch 001: 2703 / 3002 loss=2.794, ppl=6.93, wps=5824.7, ups=0.09, wpb=64856, bsz=128, num_updates=2682, lr=9.99865e-05, gnorm=2.415, loss_scale=1, train_wall=11, gb_free=2.8, wall=29930
2021-06-19 02:57:47 | INFO | train_inner | epoch 001: 2704 / 3002 loss=2.734, ppl=6.65, wps=5857.8, ups=0.09, wpb=64765, bsz=128, num_updates=2683, lr=9.99865e-05, gnorm=2.379, loss_scale=1, train_wall=11, gb_free=2.8, wall=29941
2021-06-19 02:57:58 | INFO | train_inner | epoch 001: 2705 / 3002 loss=2.917, ppl=7.55, wps=5883.7, ups=0.09, wpb=64912, bsz=128, num_updates=2684, lr=9.99865e-05, gnorm=2.54, loss_scale=1, train_wall=11, gb_free=2.8, wall=29952
2021-06-19 02:58:09 | INFO | train_inner | epoch 001: 2706 / 3002 loss=2.832, ppl=7.12, wps=5808.4, ups=0.09, wpb=64745, bsz=128, num_updates=2685, lr=9.99865e-05, gnorm=2.351, loss_scale=1, train_wall=11, gb_free=2.8, wall=29963
2021-06-19 02:58:20 | INFO | train_inner | epoch 001: 2707 / 3002 loss=2.929, ppl=7.62, wps=5900.7, ups=0.09, wpb=64870, bsz=128, num_updates=2686, lr=9.99865e-05, gnorm=2.337, loss_scale=1, train_wall=11, gb_free=2.8, wall=29974
2021-06-19 02:58:31 | INFO | train_inner | epoch 001: 2708 / 3002 loss=2.989, ppl=7.94, wps=5885.5, ups=0.09, wpb=64867, bsz=128, num_updates=2687, lr=9.99865e-05, gnorm=2.396, loss_scale=1, train_wall=11, gb_free=2.8, wall=29985
2021-06-19 02:58:42 | INFO | train_inner | epoch 001: 2709 / 3002 loss=2.772, ppl=6.83, wps=5797.9, ups=0.09, wpb=64776, bsz=128, num_updates=2688, lr=9.99865e-05, gnorm=2.684, loss_scale=1, train_wall=11, gb_free=2.8, wall=29996
2021-06-19 02:58:53 | INFO | train_inner | epoch 001: 2710 / 3002 loss=2.652, ppl=6.29, wps=5837.7, ups=0.09, wpb=64863, bsz=128, num_updates=2689, lr=9.99865e-05, gnorm=7.233, loss_scale=1, train_wall=11, gb_free=2.8, wall=30007
2021-06-19 02:59:04 | INFO | train_inner | epoch 001: 2711 / 3002 loss=2.733, ppl=6.65, wps=5876.7, ups=0.09, wpb=64845, bsz=128, num_updates=2690, lr=9.99865e-05, gnorm=2.355, loss_scale=1, train_wall=11, gb_free=2.8, wall=30019
2021-06-19 02:59:15 | INFO | train_inner | epoch 001: 2712 / 3002 loss=3.009, ppl=8.05, wps=5892.2, ups=0.09, wpb=64820, bsz=128, num_updates=2691, lr=9.99865e-05, gnorm=2.61, loss_scale=1, train_wall=11, gb_free=2.8, wall=30030
2021-06-19 02:59:26 | INFO | train_inner | epoch 001: 2713 / 3002 loss=2.786, ppl=6.9, wps=5895.4, ups=0.09, wpb=64814, bsz=128, num_updates=2692, lr=9.99865e-05, gnorm=7.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=30041
2021-06-19 02:59:37 | INFO | train_inner | epoch 001: 2714 / 3002 loss=2.635, ppl=6.21, wps=5897.9, ups=0.09, wpb=64884, bsz=128, num_updates=2693, lr=9.99865e-05, gnorm=2.778, loss_scale=1, train_wall=11, gb_free=2.8, wall=30052
2021-06-19 02:59:48 | INFO | train_inner | epoch 001: 2715 / 3002 loss=2.917, ppl=7.55, wps=5795.4, ups=0.09, wpb=64845, bsz=128, num_updates=2694, lr=9.99864e-05, gnorm=5.651, loss_scale=1, train_wall=11, gb_free=2.8, wall=30063
2021-06-19 03:00:00 | INFO | train_inner | epoch 001: 2716 / 3002 loss=2.88, ppl=7.36, wps=5796.6, ups=0.09, wpb=64822, bsz=128, num_updates=2695, lr=9.99864e-05, gnorm=2.78, loss_scale=1, train_wall=11, gb_free=2.8, wall=30074
2021-06-19 03:00:11 | INFO | train_inner | epoch 001: 2717 / 3002 loss=2.806, ppl=6.99, wps=5732.8, ups=0.09, wpb=64753, bsz=128, num_updates=2696, lr=9.99864e-05, gnorm=2.317, loss_scale=1, train_wall=11, gb_free=2.8, wall=30085
2021-06-19 03:00:22 | INFO | train_inner | epoch 001: 2718 / 3002 loss=2.773, ppl=6.84, wps=5868.8, ups=0.09, wpb=64821, bsz=128, num_updates=2697, lr=9.99864e-05, gnorm=2.424, loss_scale=1, train_wall=11, gb_free=2.8, wall=30096
2021-06-19 03:00:33 | INFO | train_inner | epoch 001: 2719 / 3002 loss=3.072, ppl=8.41, wps=5923.8, ups=0.09, wpb=64771, bsz=128, num_updates=2698, lr=9.99864e-05, gnorm=2.723, loss_scale=1, train_wall=11, gb_free=2.8, wall=30107
2021-06-19 03:00:44 | INFO | train_inner | epoch 001: 2720 / 3002 loss=2.982, ppl=7.9, wps=5830.4, ups=0.09, wpb=64735, bsz=128, num_updates=2699, lr=9.99864e-05, gnorm=2.319, loss_scale=1, train_wall=11, gb_free=2.8, wall=30118
2021-06-19 03:00:55 | INFO | train_inner | epoch 001: 2721 / 3002 loss=2.858, ppl=7.25, wps=5806.7, ups=0.09, wpb=64904, bsz=128, num_updates=2700, lr=9.99864e-05, gnorm=2.429, loss_scale=1, train_wall=11, gb_free=2.8, wall=30129
2021-06-19 03:01:06 | INFO | train_inner | epoch 001: 2722 / 3002 loss=2.943, ppl=7.69, wps=5822.6, ups=0.09, wpb=64806, bsz=128, num_updates=2701, lr=9.99864e-05, gnorm=2.331, loss_scale=1, train_wall=11, gb_free=2.8, wall=30141
2021-06-19 03:01:17 | INFO | train_inner | epoch 001: 2723 / 3002 loss=2.865, ppl=7.29, wps=5898.3, ups=0.09, wpb=64841, bsz=128, num_updates=2702, lr=9.99864e-05, gnorm=2.729, loss_scale=1, train_wall=11, gb_free=2.8, wall=30152
2021-06-19 03:01:28 | INFO | train_inner | epoch 001: 2724 / 3002 loss=2.871, ppl=7.32, wps=5913.3, ups=0.09, wpb=64846, bsz=128, num_updates=2703, lr=9.99864e-05, gnorm=2.544, loss_scale=1, train_wall=11, gb_free=2.8, wall=30163
2021-06-19 03:01:39 | INFO | train_inner | epoch 001: 2725 / 3002 loss=2.884, ppl=7.38, wps=5893.3, ups=0.09, wpb=64868, bsz=128, num_updates=2704, lr=9.99864e-05, gnorm=2.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=30174
2021-06-19 03:01:50 | INFO | train_inner | epoch 001: 2726 / 3002 loss=2.827, ppl=7.1, wps=5780.6, ups=0.09, wpb=64829, bsz=128, num_updates=2705, lr=9.99864e-05, gnorm=2.416, loss_scale=1, train_wall=11, gb_free=2.8, wall=30185
2021-06-19 03:02:01 | INFO | train_inner | epoch 001: 2727 / 3002 loss=2.932, ppl=7.63, wps=5851.7, ups=0.09, wpb=64824, bsz=128, num_updates=2706, lr=9.99864e-05, gnorm=2.475, loss_scale=1, train_wall=11, gb_free=2.8, wall=30196
2021-06-19 03:02:13 | INFO | train_inner | epoch 001: 2728 / 3002 loss=3.027, ppl=8.15, wps=5778.7, ups=0.09, wpb=64756, bsz=128, num_updates=2707, lr=9.99863e-05, gnorm=2.459, loss_scale=1, train_wall=11, gb_free=2.8, wall=30207
2021-06-19 03:02:24 | INFO | train_inner | epoch 001: 2729 / 3002 loss=2.816, ppl=7.04, wps=5907.8, ups=0.09, wpb=64882, bsz=128, num_updates=2708, lr=9.99863e-05, gnorm=2.477, loss_scale=1, train_wall=11, gb_free=2.8, wall=30218
2021-06-19 03:02:35 | INFO | train_inner | epoch 001: 2730 / 3002 loss=2.854, ppl=7.23, wps=5844.9, ups=0.09, wpb=64813, bsz=128, num_updates=2709, lr=9.99863e-05, gnorm=2.542, loss_scale=1, train_wall=11, gb_free=2.8, wall=30229
2021-06-19 03:02:46 | INFO | train_inner | epoch 001: 2731 / 3002 loss=2.859, ppl=7.25, wps=5867.8, ups=0.09, wpb=64831, bsz=128, num_updates=2710, lr=9.99863e-05, gnorm=2.623, loss_scale=1, train_wall=11, gb_free=2.8, wall=30240
2021-06-19 03:02:57 | INFO | train_inner | epoch 001: 2732 / 3002 loss=2.985, ppl=7.92, wps=5919.5, ups=0.09, wpb=64827, bsz=128, num_updates=2711, lr=9.99863e-05, gnorm=3.929, loss_scale=1, train_wall=10, gb_free=2.8, wall=30251
2021-06-19 03:03:08 | INFO | train_inner | epoch 001: 2733 / 3002 loss=2.922, ppl=7.58, wps=5825.5, ups=0.09, wpb=64814, bsz=128, num_updates=2712, lr=9.99863e-05, gnorm=2.682, loss_scale=1, train_wall=11, gb_free=2.8, wall=30262
2021-06-19 03:03:19 | INFO | train_inner | epoch 001: 2734 / 3002 loss=2.87, ppl=7.31, wps=5820.6, ups=0.09, wpb=64839, bsz=128, num_updates=2713, lr=9.99863e-05, gnorm=2.345, loss_scale=1, train_wall=11, gb_free=2.8, wall=30273
2021-06-19 03:03:30 | INFO | train_inner | epoch 001: 2735 / 3002 loss=2.874, ppl=7.33, wps=5779.6, ups=0.09, wpb=64874, bsz=128, num_updates=2714, lr=9.99863e-05, gnorm=2.408, loss_scale=1, train_wall=11, gb_free=2.8, wall=30285
2021-06-19 03:03:41 | INFO | train_inner | epoch 001: 2736 / 3002 loss=2.702, ppl=6.51, wps=5894.1, ups=0.09, wpb=64787, bsz=128, num_updates=2715, lr=9.99863e-05, gnorm=2.393, loss_scale=1, train_wall=11, gb_free=2.8, wall=30296
2021-06-19 03:03:52 | INFO | train_inner | epoch 001: 2737 / 3002 loss=2.758, ppl=6.76, wps=5929.9, ups=0.09, wpb=64922, bsz=128, num_updates=2716, lr=9.99863e-05, gnorm=2.322, loss_scale=1, train_wall=10, gb_free=2.8, wall=30307
2021-06-19 03:04:03 | INFO | train_inner | epoch 001: 2738 / 3002 loss=2.924, ppl=7.59, wps=5770.7, ups=0.09, wpb=64755, bsz=128, num_updates=2717, lr=9.99863e-05, gnorm=2.337, loss_scale=1, train_wall=11, gb_free=2.8, wall=30318
2021-06-19 03:04:15 | INFO | train_inner | epoch 001: 2739 / 3002 loss=2.832, ppl=7.12, wps=5739.7, ups=0.09, wpb=64803, bsz=128, num_updates=2718, lr=9.99863e-05, gnorm=2.48, loss_scale=1, train_wall=11, gb_free=2.8, wall=30329
2021-06-19 03:04:26 | INFO | train_inner | epoch 001: 2740 / 3002 loss=2.929, ppl=7.62, wps=5844.7, ups=0.09, wpb=64778, bsz=128, num_updates=2719, lr=9.99862e-05, gnorm=2.342, loss_scale=1, train_wall=11, gb_free=2.8, wall=30340
2021-06-19 03:04:37 | INFO | train_inner | epoch 001: 2741 / 3002 loss=2.929, ppl=7.62, wps=5930.8, ups=0.09, wpb=64845, bsz=128, num_updates=2720, lr=9.99862e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=30351
2021-06-19 03:04:48 | INFO | train_inner | epoch 001: 2742 / 3002 loss=2.805, ppl=6.99, wps=5851.7, ups=0.09, wpb=64928, bsz=128, num_updates=2721, lr=9.99862e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=30362
2021-06-19 03:04:59 | INFO | train_inner | epoch 001: 2743 / 3002 loss=2.732, ppl=6.64, wps=5871.2, ups=0.09, wpb=64798, bsz=128, num_updates=2722, lr=9.99862e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=30373
2021-06-19 03:05:10 | INFO | train_inner | epoch 001: 2744 / 3002 loss=2.833, ppl=7.13, wps=5768.7, ups=0.09, wpb=64777, bsz=128, num_updates=2723, lr=9.99862e-05, gnorm=2.411, loss_scale=1, train_wall=11, gb_free=2.8, wall=30384
2021-06-19 03:05:21 | INFO | train_inner | epoch 001: 2745 / 3002 loss=3.021, ppl=8.12, wps=5935.7, ups=0.09, wpb=64752, bsz=128, num_updates=2724, lr=9.99862e-05, gnorm=2.38, loss_scale=1, train_wall=10, gb_free=2.8, wall=30395
2021-06-19 03:05:32 | INFO | train_inner | epoch 001: 2746 / 3002 loss=2.805, ppl=6.99, wps=5816.5, ups=0.09, wpb=64788, bsz=128, num_updates=2725, lr=9.99862e-05, gnorm=3.495, loss_scale=1, train_wall=11, gb_free=2.8, wall=30406
2021-06-19 03:05:43 | INFO | train_inner | epoch 001: 2747 / 3002 loss=2.965, ppl=7.81, wps=5781.8, ups=0.09, wpb=64821, bsz=128, num_updates=2726, lr=9.99862e-05, gnorm=10.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=30418
2021-06-19 03:05:54 | INFO | train_inner | epoch 001: 2748 / 3002 loss=2.744, ppl=6.7, wps=5857.1, ups=0.09, wpb=64897, bsz=128, num_updates=2727, lr=9.99862e-05, gnorm=2.26, loss_scale=1, train_wall=11, gb_free=2.8, wall=30429
2021-06-19 03:06:05 | INFO | train_inner | epoch 001: 2749 / 3002 loss=2.953, ppl=7.74, wps=5932.7, ups=0.09, wpb=64799, bsz=128, num_updates=2728, lr=9.99862e-05, gnorm=2.393, loss_scale=1, train_wall=10, gb_free=2.8, wall=30440
2021-06-19 03:06:16 | INFO | train_inner | epoch 001: 2750 / 3002 loss=2.83, ppl=7.11, wps=5853.5, ups=0.09, wpb=64835, bsz=128, num_updates=2729, lr=9.99862e-05, gnorm=2.886, loss_scale=1, train_wall=11, gb_free=2.8, wall=30451
2021-06-19 03:06:27 | INFO | train_inner | epoch 001: 2751 / 3002 loss=2.937, ppl=7.66, wps=5880.4, ups=0.09, wpb=64719, bsz=128, num_updates=2730, lr=9.99862e-05, gnorm=2.303, loss_scale=1, train_wall=11, gb_free=2.8, wall=30462
2021-06-19 03:06:38 | INFO | train_inner | epoch 001: 2752 / 3002 loss=3.078, ppl=8.44, wps=5860.5, ups=0.09, wpb=64775, bsz=128, num_updates=2731, lr=9.99862e-05, gnorm=2.379, loss_scale=1, train_wall=11, gb_free=2.8, wall=30473
2021-06-19 03:06:49 | INFO | train_inner | epoch 001: 2753 / 3002 loss=2.89, ppl=7.41, wps=5981.7, ups=0.09, wpb=64865, bsz=128, num_updates=2732, lr=9.99861e-05, gnorm=4.006, loss_scale=1, train_wall=10, gb_free=2.8, wall=30484
2021-06-19 03:07:00 | INFO | train_inner | epoch 001: 2754 / 3002 loss=2.827, ppl=7.1, wps=5807.1, ups=0.09, wpb=64771, bsz=128, num_updates=2733, lr=9.99861e-05, gnorm=2.408, loss_scale=1, train_wall=11, gb_free=2.8, wall=30495
2021-06-19 03:07:12 | INFO | train_inner | epoch 001: 2755 / 3002 loss=2.88, ppl=7.36, wps=5747.8, ups=0.09, wpb=64790, bsz=128, num_updates=2734, lr=9.99861e-05, gnorm=2.34, loss_scale=1, train_wall=11, gb_free=2.8, wall=30506
2021-06-19 03:07:23 | INFO | train_inner | epoch 001: 2756 / 3002 loss=2.822, ppl=7.07, wps=5949.1, ups=0.09, wpb=64860, bsz=128, num_updates=2735, lr=9.99861e-05, gnorm=2.405, loss_scale=1, train_wall=10, gb_free=2.8, wall=30517
2021-06-19 03:07:34 | INFO | train_inner | epoch 001: 2757 / 3002 loss=2.959, ppl=7.77, wps=5745.9, ups=0.09, wpb=64771, bsz=128, num_updates=2736, lr=9.99861e-05, gnorm=2.61, loss_scale=1, train_wall=11, gb_free=2.8, wall=30528
2021-06-19 03:07:45 | INFO | train_inner | epoch 001: 2758 / 3002 loss=2.891, ppl=7.42, wps=5858.6, ups=0.09, wpb=64848, bsz=128, num_updates=2737, lr=9.99861e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=30539
2021-06-19 03:07:56 | INFO | train_inner | epoch 001: 2759 / 3002 loss=2.831, ppl=7.12, wps=5884.3, ups=0.09, wpb=64837, bsz=128, num_updates=2738, lr=9.99861e-05, gnorm=2.436, loss_scale=1, train_wall=11, gb_free=2.8, wall=30550
2021-06-19 03:08:07 | INFO | train_inner | epoch 001: 2760 / 3002 loss=2.908, ppl=7.51, wps=5881, ups=0.09, wpb=64818, bsz=128, num_updates=2739, lr=9.99861e-05, gnorm=2.648, loss_scale=1, train_wall=11, gb_free=2.8, wall=30561
2021-06-19 03:08:18 | INFO | train_inner | epoch 001: 2761 / 3002 loss=2.918, ppl=7.56, wps=5836.4, ups=0.09, wpb=64786, bsz=128, num_updates=2740, lr=9.99861e-05, gnorm=2.363, loss_scale=1, train_wall=11, gb_free=2.8, wall=30572
2021-06-19 03:08:29 | INFO | train_inner | epoch 001: 2762 / 3002 loss=2.86, ppl=7.26, wps=5909.6, ups=0.09, wpb=64869, bsz=128, num_updates=2741, lr=9.99861e-05, gnorm=2.409, loss_scale=1, train_wall=11, gb_free=2.8, wall=30583
2021-06-19 03:08:40 | INFO | train_inner | epoch 001: 2763 / 3002 loss=2.998, ppl=7.99, wps=5810.1, ups=0.09, wpb=64874, bsz=128, num_updates=2742, lr=9.99861e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=30595
2021-06-19 03:08:51 | INFO | train_inner | epoch 001: 2764 / 3002 loss=2.857, ppl=7.25, wps=5974.7, ups=0.09, wpb=64915, bsz=128, num_updates=2743, lr=9.99861e-05, gnorm=2.442, loss_scale=1, train_wall=10, gb_free=2.8, wall=30605
2021-06-19 03:09:02 | INFO | train_inner | epoch 001: 2765 / 3002 loss=2.785, ppl=6.89, wps=5954.2, ups=0.09, wpb=64880, bsz=128, num_updates=2744, lr=9.9986e-05, gnorm=2.379, loss_scale=1, train_wall=10, gb_free=2.8, wall=30616
2021-06-19 03:09:13 | INFO | train_inner | epoch 001: 2766 / 3002 loss=2.896, ppl=7.44, wps=5823.5, ups=0.09, wpb=64707, bsz=128, num_updates=2745, lr=9.9986e-05, gnorm=2.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=30628
2021-06-19 03:09:24 | INFO | train_inner | epoch 001: 2767 / 3002 loss=2.904, ppl=7.48, wps=5904.8, ups=0.09, wpb=64797, bsz=128, num_updates=2746, lr=9.9986e-05, gnorm=2.446, loss_scale=1, train_wall=11, gb_free=2.8, wall=30638
2021-06-19 03:09:35 | INFO | train_inner | epoch 001: 2768 / 3002 loss=2.788, ppl=6.91, wps=5790.2, ups=0.09, wpb=64735, bsz=128, num_updates=2747, lr=9.9986e-05, gnorm=2.343, loss_scale=1, train_wall=11, gb_free=2.8, wall=30650
2021-06-19 03:09:47 | INFO | train_inner | epoch 001: 2769 / 3002 loss=3.09, ppl=8.51, wps=5781.1, ups=0.09, wpb=64825, bsz=128, num_updates=2748, lr=9.9986e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=30661
2021-06-19 03:09:58 | INFO | train_inner | epoch 001: 2770 / 3002 loss=2.817, ppl=7.05, wps=5812.8, ups=0.09, wpb=64745, bsz=128, num_updates=2749, lr=9.9986e-05, gnorm=2.725, loss_scale=1, train_wall=11, gb_free=2.8, wall=30672
2021-06-19 03:10:09 | INFO | train_inner | epoch 001: 2771 / 3002 loss=2.782, ppl=6.88, wps=5724.1, ups=0.09, wpb=64840, bsz=128, num_updates=2750, lr=9.9986e-05, gnorm=2.387, loss_scale=1, train_wall=11, gb_free=2.8, wall=30683
2021-06-19 03:10:20 | INFO | train_inner | epoch 001: 2772 / 3002 loss=2.825, ppl=7.09, wps=5821.9, ups=0.09, wpb=64822, bsz=128, num_updates=2751, lr=9.9986e-05, gnorm=2.358, loss_scale=1, train_wall=11, gb_free=2.8, wall=30694
2021-06-19 03:10:31 | INFO | train_inner | epoch 001: 2773 / 3002 loss=2.926, ppl=7.6, wps=5843.2, ups=0.09, wpb=64819, bsz=128, num_updates=2752, lr=9.9986e-05, gnorm=2.602, loss_scale=1, train_wall=11, gb_free=2.8, wall=30706
2021-06-19 03:10:42 | INFO | train_inner | epoch 001: 2774 / 3002 loss=2.768, ppl=6.81, wps=5883.7, ups=0.09, wpb=64832, bsz=128, num_updates=2753, lr=9.9986e-05, gnorm=2.384, loss_scale=1, train_wall=11, gb_free=2.8, wall=30717
2021-06-19 03:10:53 | INFO | train_inner | epoch 001: 2775 / 3002 loss=2.788, ppl=6.91, wps=5836.4, ups=0.09, wpb=64877, bsz=128, num_updates=2754, lr=9.9986e-05, gnorm=2.39, loss_scale=1, train_wall=11, gb_free=2.8, wall=30728
2021-06-19 03:11:04 | INFO | train_inner | epoch 001: 2776 / 3002 loss=3.032, ppl=8.18, wps=5894.9, ups=0.09, wpb=64804, bsz=128, num_updates=2755, lr=9.9986e-05, gnorm=3.253, loss_scale=1, train_wall=11, gb_free=2.8, wall=30739
2021-06-19 03:11:16 | INFO | train_inner | epoch 001: 2777 / 3002 loss=2.962, ppl=7.79, wps=5731.2, ups=0.09, wpb=64786, bsz=128, num_updates=2756, lr=9.9986e-05, gnorm=2.475, loss_scale=1, train_wall=11, gb_free=2.8, wall=30750
2021-06-19 03:11:27 | INFO | train_inner | epoch 001: 2778 / 3002 loss=2.936, ppl=7.65, wps=5813.1, ups=0.09, wpb=64797, bsz=128, num_updates=2757, lr=9.99859e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=30761
2021-06-19 03:11:38 | INFO | train_inner | epoch 001: 2779 / 3002 loss=2.882, ppl=7.37, wps=6018.3, ups=0.09, wpb=64866, bsz=128, num_updates=2758, lr=9.99859e-05, gnorm=3.048, loss_scale=1, train_wall=10, gb_free=2.8, wall=30772
2021-06-19 03:11:49 | INFO | train_inner | epoch 001: 2780 / 3002 loss=2.928, ppl=7.61, wps=5785.7, ups=0.09, wpb=64812, bsz=128, num_updates=2759, lr=9.99859e-05, gnorm=2.541, loss_scale=1, train_wall=11, gb_free=2.8, wall=30783
2021-06-19 03:12:00 | INFO | train_inner | epoch 001: 2781 / 3002 loss=2.944, ppl=7.7, wps=5989.1, ups=0.09, wpb=64879, bsz=128, num_updates=2760, lr=9.99859e-05, gnorm=2.29, loss_scale=1, train_wall=10, gb_free=2.8, wall=30794
2021-06-19 03:12:11 | INFO | train_inner | epoch 001: 2782 / 3002 loss=2.899, ppl=7.46, wps=5871.7, ups=0.09, wpb=64876, bsz=128, num_updates=2761, lr=9.99859e-05, gnorm=2.235, loss_scale=1, train_wall=11, gb_free=2.8, wall=30805
2021-06-19 03:12:22 | INFO | train_inner | epoch 001: 2783 / 3002 loss=2.874, ppl=7.33, wps=5835.3, ups=0.09, wpb=64835, bsz=128, num_updates=2762, lr=9.99859e-05, gnorm=2.319, loss_scale=2, train_wall=11, gb_free=2.8, wall=30816
2021-06-19 03:12:33 | INFO | train_inner | epoch 001: 2784 / 3002 loss=2.97, ppl=7.84, wps=5819.3, ups=0.09, wpb=64837, bsz=128, num_updates=2763, lr=9.99859e-05, gnorm=2.581, loss_scale=2, train_wall=11, gb_free=2.8, wall=30827
2021-06-19 03:12:44 | INFO | train_inner | epoch 001: 2785 / 3002 loss=3.036, ppl=8.2, wps=5886.1, ups=0.09, wpb=64939, bsz=128, num_updates=2764, lr=9.99859e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=30838
2021-06-19 03:12:55 | INFO | train_inner | epoch 001: 2786 / 3002 loss=2.774, ppl=6.84, wps=5945, ups=0.09, wpb=64860, bsz=128, num_updates=2765, lr=9.99859e-05, gnorm=2.296, loss_scale=2, train_wall=10, gb_free=2.8, wall=30849
2021-06-19 03:13:06 | INFO | train_inner | epoch 001: 2787 / 3002 loss=2.8, ppl=6.97, wps=5802.1, ups=0.09, wpb=64860, bsz=128, num_updates=2766, lr=9.99859e-05, gnorm=2.276, loss_scale=2, train_wall=11, gb_free=2.8, wall=30860
2021-06-19 03:13:17 | INFO | train_inner | epoch 001: 2788 / 3002 loss=2.724, ppl=6.61, wps=5788.5, ups=0.09, wpb=64843, bsz=128, num_updates=2767, lr=9.99859e-05, gnorm=3.501, loss_scale=2, train_wall=11, gb_free=2.8, wall=30872
2021-06-19 03:13:28 | INFO | train_inner | epoch 001: 2789 / 3002 loss=2.832, ppl=7.12, wps=5984.8, ups=0.09, wpb=64879, bsz=128, num_updates=2768, lr=9.99859e-05, gnorm=2.317, loss_scale=2, train_wall=10, gb_free=2.8, wall=30882
2021-06-19 03:13:39 | INFO | train_inner | epoch 001: 2790 / 3002 loss=2.749, ppl=6.72, wps=5898.7, ups=0.09, wpb=64852, bsz=128, num_updates=2769, lr=9.99858e-05, gnorm=2.371, loss_scale=2, train_wall=11, gb_free=2.8, wall=30893
2021-06-19 03:13:50 | INFO | train_inner | epoch 001: 2791 / 3002 loss=3.067, ppl=8.38, wps=5927.1, ups=0.09, wpb=64868, bsz=128, num_updates=2770, lr=9.99858e-05, gnorm=2.454, loss_scale=2, train_wall=11, gb_free=2.8, wall=30904
2021-06-19 03:14:01 | INFO | train_inner | epoch 001: 2792 / 3002 loss=2.86, ppl=7.26, wps=5905.1, ups=0.09, wpb=64845, bsz=128, num_updates=2771, lr=9.99858e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=30915
2021-06-19 03:14:12 | INFO | train_inner | epoch 001: 2793 / 3002 loss=2.824, ppl=7.08, wps=5899.3, ups=0.09, wpb=64827, bsz=128, num_updates=2772, lr=9.99858e-05, gnorm=2.384, loss_scale=2, train_wall=11, gb_free=2.8, wall=30926
2021-06-19 03:14:23 | INFO | train_inner | epoch 001: 2794 / 3002 loss=2.817, ppl=7.05, wps=5881, ups=0.09, wpb=64826, bsz=128, num_updates=2773, lr=9.99858e-05, gnorm=2.283, loss_scale=2, train_wall=11, gb_free=2.8, wall=30937
2021-06-19 03:14:34 | INFO | train_inner | epoch 001: 2795 / 3002 loss=2.863, ppl=7.28, wps=5962.9, ups=0.09, wpb=64874, bsz=128, num_updates=2774, lr=9.99858e-05, gnorm=2.252, loss_scale=2, train_wall=10, gb_free=2.8, wall=30948
2021-06-19 03:14:45 | INFO | train_inner | epoch 001: 2796 / 3002 loss=2.874, ppl=7.33, wps=5791.7, ups=0.09, wpb=64834, bsz=128, num_updates=2775, lr=9.99858e-05, gnorm=2.817, loss_scale=2, train_wall=11, gb_free=2.8, wall=30959
2021-06-19 03:14:56 | INFO | train_inner | epoch 001: 2797 / 3002 loss=2.928, ppl=7.61, wps=5903.9, ups=0.09, wpb=64830, bsz=128, num_updates=2776, lr=9.99858e-05, gnorm=3.619, loss_scale=2, train_wall=11, gb_free=2.8, wall=30970
2021-06-19 03:15:07 | INFO | train_inner | epoch 001: 2798 / 3002 loss=2.711, ppl=6.55, wps=5820, ups=0.09, wpb=64872, bsz=128, num_updates=2777, lr=9.99858e-05, gnorm=7.553, loss_scale=2, train_wall=11, gb_free=2.8, wall=30982
2021-06-19 03:15:18 | INFO | train_inner | epoch 001: 2799 / 3002 loss=2.863, ppl=7.27, wps=5924.6, ups=0.09, wpb=64729, bsz=128, num_updates=2778, lr=9.99858e-05, gnorm=2.299, loss_scale=2, train_wall=10, gb_free=2.8, wall=30992
2021-06-19 03:15:29 | INFO | train_inner | epoch 001: 2800 / 3002 loss=3.063, ppl=8.36, wps=5860.8, ups=0.09, wpb=64807, bsz=128, num_updates=2779, lr=9.99858e-05, gnorm=2.434, loss_scale=2, train_wall=11, gb_free=2.8, wall=31004
2021-06-19 03:15:40 | INFO | train_inner | epoch 001: 2801 / 3002 loss=2.844, ppl=7.18, wps=5808.6, ups=0.09, wpb=64669, bsz=128, num_updates=2780, lr=9.99858e-05, gnorm=2.287, loss_scale=2, train_wall=11, gb_free=2.8, wall=31015
2021-06-19 03:15:51 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-19 03:16:03 | INFO | train_inner | epoch 001: 2803 / 3002 loss=2.881, ppl=7.37, wps=2914.7, ups=0.04, wpb=64832, bsz=128, num_updates=2781, lr=9.99858e-05, gnorm=2.316, loss_scale=1, train_wall=21, gb_free=2.8, wall=31037
2021-06-19 03:16:14 | INFO | train_inner | epoch 001: 2804 / 3002 loss=2.768, ppl=6.81, wps=5931.1, ups=0.09, wpb=64863, bsz=128, num_updates=2782, lr=9.99857e-05, gnorm=2.374, loss_scale=1, train_wall=10, gb_free=2.8, wall=31048
2021-06-19 03:16:25 | INFO | train_inner | epoch 001: 2805 / 3002 loss=2.874, ppl=7.33, wps=5808.6, ups=0.09, wpb=64914, bsz=128, num_updates=2783, lr=9.99857e-05, gnorm=4.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=31059
2021-06-19 03:16:36 | INFO | train_inner | epoch 001: 2806 / 3002 loss=2.963, ppl=7.8, wps=5841.5, ups=0.09, wpb=64787, bsz=128, num_updates=2784, lr=9.99857e-05, gnorm=2.556, loss_scale=1, train_wall=11, gb_free=2.8, wall=31070
2021-06-19 03:16:47 | INFO | train_inner | epoch 001: 2807 / 3002 loss=2.802, ppl=6.98, wps=5921.2, ups=0.09, wpb=64801, bsz=128, num_updates=2785, lr=9.99857e-05, gnorm=2.409, loss_scale=1, train_wall=10, gb_free=2.8, wall=31081
2021-06-19 03:16:58 | INFO | train_inner | epoch 001: 2808 / 3002 loss=2.751, ppl=6.73, wps=5849.1, ups=0.09, wpb=64893, bsz=128, num_updates=2786, lr=9.99857e-05, gnorm=2.383, loss_scale=1, train_wall=11, gb_free=2.8, wall=31092
2021-06-19 03:17:09 | INFO | train_inner | epoch 001: 2809 / 3002 loss=2.89, ppl=7.41, wps=5900.6, ups=0.09, wpb=64895, bsz=128, num_updates=2787, lr=9.99857e-05, gnorm=2.446, loss_scale=1, train_wall=11, gb_free=2.8, wall=31103
2021-06-19 03:17:20 | INFO | train_inner | epoch 001: 2810 / 3002 loss=2.966, ppl=7.82, wps=5950.6, ups=0.09, wpb=64875, bsz=128, num_updates=2788, lr=9.99857e-05, gnorm=2.778, loss_scale=1, train_wall=10, gb_free=2.8, wall=31114
2021-06-19 03:17:31 | INFO | train_inner | epoch 001: 2811 / 3002 loss=2.789, ppl=6.91, wps=5873.4, ups=0.09, wpb=64798, bsz=128, num_updates=2789, lr=9.99857e-05, gnorm=2.391, loss_scale=1, train_wall=11, gb_free=2.8, wall=31125
2021-06-19 03:17:42 | INFO | train_inner | epoch 001: 2812 / 3002 loss=2.753, ppl=6.74, wps=5826.1, ups=0.09, wpb=64843, bsz=128, num_updates=2790, lr=9.99857e-05, gnorm=2.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=31136
2021-06-19 03:17:53 | INFO | train_inner | epoch 001: 2813 / 3002 loss=3.016, ppl=8.09, wps=5723.8, ups=0.09, wpb=64777, bsz=128, num_updates=2791, lr=9.99857e-05, gnorm=7.411, loss_scale=1, train_wall=11, gb_free=2.8, wall=31148
2021-06-19 03:18:04 | INFO | train_inner | epoch 001: 2814 / 3002 loss=2.929, ppl=7.62, wps=5855.2, ups=0.09, wpb=64827, bsz=128, num_updates=2792, lr=9.99857e-05, gnorm=2.551, loss_scale=1, train_wall=11, gb_free=2.8, wall=31159
2021-06-19 03:18:15 | INFO | train_inner | epoch 001: 2815 / 3002 loss=2.98, ppl=7.89, wps=5885.7, ups=0.09, wpb=64843, bsz=128, num_updates=2793, lr=9.99857e-05, gnorm=6.066, loss_scale=1, train_wall=11, gb_free=2.8, wall=31170
2021-06-19 03:18:26 | INFO | train_inner | epoch 001: 2816 / 3002 loss=2.817, ppl=7.05, wps=5927.3, ups=0.09, wpb=64837, bsz=128, num_updates=2794, lr=9.99856e-05, gnorm=2.499, loss_scale=1, train_wall=11, gb_free=2.8, wall=31181
2021-06-19 03:18:37 | INFO | train_inner | epoch 001: 2817 / 3002 loss=2.715, ppl=6.57, wps=5840.5, ups=0.09, wpb=64867, bsz=128, num_updates=2795, lr=9.99856e-05, gnorm=3.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=31192
2021-06-19 03:18:49 | INFO | train_inner | epoch 001: 2818 / 3002 loss=2.866, ppl=7.29, wps=5807.7, ups=0.09, wpb=64814, bsz=128, num_updates=2796, lr=9.99856e-05, gnorm=2.385, loss_scale=1, train_wall=11, gb_free=2.8, wall=31203
2021-06-19 03:19:00 | INFO | train_inner | epoch 001: 2819 / 3002 loss=2.905, ppl=7.49, wps=5883, ups=0.09, wpb=64810, bsz=128, num_updates=2797, lr=9.99856e-05, gnorm=4.789, loss_scale=1, train_wall=11, gb_free=2.8, wall=31214
2021-06-19 03:19:11 | INFO | train_inner | epoch 001: 2820 / 3002 loss=2.916, ppl=7.55, wps=5852.8, ups=0.09, wpb=64704, bsz=128, num_updates=2798, lr=9.99856e-05, gnorm=2.539, loss_scale=1, train_wall=11, gb_free=2.8, wall=31225
2021-06-19 03:19:22 | INFO | train_inner | epoch 001: 2821 / 3002 loss=2.853, ppl=7.23, wps=5897.1, ups=0.09, wpb=64889, bsz=128, num_updates=2799, lr=9.99856e-05, gnorm=2.401, loss_scale=1, train_wall=11, gb_free=2.8, wall=31236
2021-06-19 03:19:33 | INFO | train_inner | epoch 001: 2822 / 3002 loss=2.79, ppl=6.92, wps=5847.6, ups=0.09, wpb=64806, bsz=128, num_updates=2800, lr=9.99856e-05, gnorm=2.302, loss_scale=1, train_wall=11, gb_free=2.8, wall=31247
2021-06-19 03:19:44 | INFO | train_inner | epoch 001: 2823 / 3002 loss=2.91, ppl=7.52, wps=5930.9, ups=0.09, wpb=64866, bsz=128, num_updates=2801, lr=9.99856e-05, gnorm=2.438, loss_scale=1, train_wall=10, gb_free=2.8, wall=31258
2021-06-19 03:19:55 | INFO | train_inner | epoch 001: 2824 / 3002 loss=2.805, ppl=6.99, wps=5898, ups=0.09, wpb=64855, bsz=128, num_updates=2802, lr=9.99856e-05, gnorm=9.287, loss_scale=1, train_wall=11, gb_free=2.8, wall=31269
2021-06-19 03:20:06 | INFO | train_inner | epoch 001: 2825 / 3002 loss=2.863, ppl=7.28, wps=5836.8, ups=0.09, wpb=64779, bsz=128, num_updates=2803, lr=9.99856e-05, gnorm=2.579, loss_scale=1, train_wall=11, gb_free=2.8, wall=31280
2021-06-19 03:20:17 | INFO | train_inner | epoch 001: 2826 / 3002 loss=2.886, ppl=7.39, wps=5842.5, ups=0.09, wpb=64848, bsz=128, num_updates=2804, lr=9.99856e-05, gnorm=2.399, loss_scale=1, train_wall=11, gb_free=2.8, wall=31291
2021-06-19 03:20:28 | INFO | train_inner | epoch 001: 2827 / 3002 loss=2.646, ppl=6.26, wps=5908.2, ups=0.09, wpb=64814, bsz=128, num_updates=2805, lr=9.99856e-05, gnorm=2.626, loss_scale=1, train_wall=11, gb_free=2.8, wall=31302
2021-06-19 03:20:39 | INFO | train_inner | epoch 001: 2828 / 3002 loss=2.986, ppl=7.92, wps=5877.2, ups=0.09, wpb=64790, bsz=128, num_updates=2806, lr=9.99856e-05, gnorm=2.457, loss_scale=1, train_wall=11, gb_free=2.8, wall=31313
2021-06-19 03:20:50 | INFO | train_inner | epoch 001: 2829 / 3002 loss=2.84, ppl=7.16, wps=5874.7, ups=0.09, wpb=64804, bsz=128, num_updates=2807, lr=9.99855e-05, gnorm=2.656, loss_scale=1, train_wall=11, gb_free=2.8, wall=31324
2021-06-19 03:21:01 | INFO | train_inner | epoch 001: 2830 / 3002 loss=2.841, ppl=7.17, wps=5872.9, ups=0.09, wpb=64862, bsz=128, num_updates=2808, lr=9.99855e-05, gnorm=2.486, loss_scale=1, train_wall=11, gb_free=2.8, wall=31335
2021-06-19 03:21:12 | INFO | train_inner | epoch 001: 2831 / 3002 loss=2.992, ppl=7.95, wps=5861.6, ups=0.09, wpb=64764, bsz=128, num_updates=2809, lr=9.99855e-05, gnorm=3.391, loss_scale=1, train_wall=11, gb_free=2.8, wall=31346
2021-06-19 03:21:23 | INFO | train_inner | epoch 001: 2832 / 3002 loss=3.055, ppl=8.31, wps=5767.3, ups=0.09, wpb=64875, bsz=128, num_updates=2810, lr=9.99855e-05, gnorm=9.738, loss_scale=1, train_wall=11, gb_free=2.8, wall=31358
2021-06-19 03:21:34 | INFO | train_inner | epoch 001: 2833 / 3002 loss=2.787, ppl=6.9, wps=5840.9, ups=0.09, wpb=64869, bsz=128, num_updates=2811, lr=9.99855e-05, gnorm=17.12, loss_scale=1, train_wall=11, gb_free=2.8, wall=31369
2021-06-19 03:21:45 | INFO | train_inner | epoch 001: 2834 / 3002 loss=3.018, ppl=8.1, wps=5827.8, ups=0.09, wpb=64841, bsz=128, num_updates=2812, lr=9.99855e-05, gnorm=10.946, loss_scale=1, train_wall=11, gb_free=2.8, wall=31380
2021-06-19 03:21:56 | INFO | train_inner | epoch 001: 2835 / 3002 loss=2.883, ppl=7.38, wps=5903.5, ups=0.09, wpb=64837, bsz=128, num_updates=2813, lr=9.99855e-05, gnorm=4.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=31391
2021-06-19 03:22:07 | INFO | train_inner | epoch 001: 2836 / 3002 loss=2.792, ppl=6.93, wps=5839.9, ups=0.09, wpb=64833, bsz=128, num_updates=2814, lr=9.99855e-05, gnorm=9.498, loss_scale=1, train_wall=11, gb_free=2.8, wall=31402
2021-06-19 03:22:19 | INFO | train_inner | epoch 001: 2837 / 3002 loss=2.774, ppl=6.84, wps=5831.1, ups=0.09, wpb=64967, bsz=128, num_updates=2815, lr=9.99855e-05, gnorm=2.905, loss_scale=1, train_wall=11, gb_free=2.8, wall=31413
2021-06-19 03:22:30 | INFO | train_inner | epoch 001: 2838 / 3002 loss=3.016, ppl=8.09, wps=5842.8, ups=0.09, wpb=64794, bsz=128, num_updates=2816, lr=9.99855e-05, gnorm=4.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=31424
2021-06-19 03:22:41 | INFO | train_inner | epoch 001: 2839 / 3002 loss=3.004, ppl=8.02, wps=5796, ups=0.09, wpb=64822, bsz=128, num_updates=2817, lr=9.99855e-05, gnorm=3.534, loss_scale=1, train_wall=11, gb_free=2.8, wall=31435
2021-06-19 03:22:52 | INFO | train_inner | epoch 001: 2840 / 3002 loss=2.86, ppl=7.26, wps=5838.6, ups=0.09, wpb=64859, bsz=128, num_updates=2818, lr=9.99855e-05, gnorm=2.5, loss_scale=1, train_wall=11, gb_free=2.8, wall=31446
2021-06-19 03:23:03 | INFO | train_inner | epoch 001: 2841 / 3002 loss=2.795, ppl=6.94, wps=5866.4, ups=0.09, wpb=64766, bsz=128, num_updates=2819, lr=9.99854e-05, gnorm=2.52, loss_scale=1, train_wall=11, gb_free=2.8, wall=31457
2021-06-19 03:23:14 | INFO | train_inner | epoch 001: 2842 / 3002 loss=2.764, ppl=6.79, wps=5811.3, ups=0.09, wpb=64813, bsz=128, num_updates=2820, lr=9.99854e-05, gnorm=2.573, loss_scale=1, train_wall=11, gb_free=2.8, wall=31469
2021-06-19 03:23:25 | INFO | train_inner | epoch 001: 2843 / 3002 loss=2.875, ppl=7.34, wps=5898.3, ups=0.09, wpb=64887, bsz=128, num_updates=2821, lr=9.99854e-05, gnorm=2.756, loss_scale=1, train_wall=11, gb_free=2.8, wall=31480
2021-06-19 03:23:36 | INFO | train_inner | epoch 001: 2844 / 3002 loss=2.802, ppl=6.97, wps=5983, ups=0.09, wpb=64774, bsz=128, num_updates=2822, lr=9.99854e-05, gnorm=2.397, loss_scale=1, train_wall=10, gb_free=2.8, wall=31490
2021-06-19 03:23:47 | INFO | train_inner | epoch 001: 2845 / 3002 loss=2.908, ppl=7.51, wps=5858.9, ups=0.09, wpb=64765, bsz=128, num_updates=2823, lr=9.99854e-05, gnorm=2.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=31501
2021-06-19 03:23:58 | INFO | train_inner | epoch 001: 2846 / 3002 loss=2.988, ppl=7.93, wps=5876, ups=0.09, wpb=64850, bsz=128, num_updates=2824, lr=9.99854e-05, gnorm=2.536, loss_scale=1, train_wall=11, gb_free=2.8, wall=31512
2021-06-19 03:24:09 | INFO | train_inner | epoch 001: 2847 / 3002 loss=2.963, ppl=7.8, wps=5895.5, ups=0.09, wpb=64827, bsz=128, num_updates=2825, lr=9.99854e-05, gnorm=2.463, loss_scale=1, train_wall=11, gb_free=2.8, wall=31523
2021-06-19 03:24:20 | INFO | train_inner | epoch 001: 2848 / 3002 loss=3.048, ppl=8.27, wps=5910.3, ups=0.09, wpb=64890, bsz=128, num_updates=2826, lr=9.99854e-05, gnorm=3.473, loss_scale=1, train_wall=10, gb_free=2.8, wall=31534
2021-06-19 03:24:31 | INFO | train_inner | epoch 001: 2849 / 3002 loss=2.733, ppl=6.65, wps=5868.5, ups=0.09, wpb=64874, bsz=128, num_updates=2827, lr=9.99854e-05, gnorm=2.323, loss_scale=1, train_wall=11, gb_free=2.8, wall=31545
2021-06-19 03:24:42 | INFO | train_inner | epoch 001: 2850 / 3002 loss=2.985, ppl=7.92, wps=5919.5, ups=0.09, wpb=64879, bsz=128, num_updates=2828, lr=9.99854e-05, gnorm=2.557, loss_scale=1, train_wall=11, gb_free=2.8, wall=31556
2021-06-19 03:24:53 | INFO | train_inner | epoch 001: 2851 / 3002 loss=3.176, ppl=9.04, wps=5850.6, ups=0.09, wpb=64791, bsz=128, num_updates=2829, lr=9.99854e-05, gnorm=2.536, loss_scale=1, train_wall=11, gb_free=2.8, wall=31568
2021-06-19 03:25:04 | INFO | train_inner | epoch 001: 2852 / 3002 loss=2.918, ppl=7.56, wps=5848.7, ups=0.09, wpb=64817, bsz=128, num_updates=2830, lr=9.99854e-05, gnorm=2.567, loss_scale=1, train_wall=11, gb_free=2.8, wall=31579
2021-06-19 03:25:16 | INFO | train_inner | epoch 001: 2853 / 3002 loss=2.956, ppl=7.76, wps=5767.5, ups=0.09, wpb=64817, bsz=128, num_updates=2831, lr=9.99854e-05, gnorm=2.482, loss_scale=1, train_wall=11, gb_free=2.8, wall=31590
2021-06-19 03:25:26 | INFO | train_inner | epoch 001: 2854 / 3002 loss=2.844, ppl=7.18, wps=5978, ups=0.09, wpb=64917, bsz=128, num_updates=2832, lr=9.99853e-05, gnorm=2.466, loss_scale=1, train_wall=10, gb_free=2.8, wall=31601
2021-06-19 03:25:37 | INFO | train_inner | epoch 001: 2855 / 3002 loss=2.912, ppl=7.52, wps=5880.2, ups=0.09, wpb=64758, bsz=128, num_updates=2833, lr=9.99853e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=31612
2021-06-19 03:25:49 | INFO | train_inner | epoch 001: 2856 / 3002 loss=2.913, ppl=7.53, wps=5737.3, ups=0.09, wpb=64832, bsz=128, num_updates=2834, lr=9.99853e-05, gnorm=2.408, loss_scale=1, train_wall=11, gb_free=2.8, wall=31623
2021-06-19 03:26:00 | INFO | train_inner | epoch 001: 2857 / 3002 loss=2.938, ppl=7.67, wps=5824, ups=0.09, wpb=64778, bsz=128, num_updates=2835, lr=9.99853e-05, gnorm=2.448, loss_scale=1, train_wall=11, gb_free=2.8, wall=31634
2021-06-19 03:26:11 | INFO | train_inner | epoch 001: 2858 / 3002 loss=2.869, ppl=7.3, wps=5808.9, ups=0.09, wpb=64810, bsz=128, num_updates=2836, lr=9.99853e-05, gnorm=2.343, loss_scale=1, train_wall=11, gb_free=2.8, wall=31645
2021-06-19 03:26:22 | INFO | train_inner | epoch 001: 2859 / 3002 loss=2.986, ppl=7.92, wps=5877.5, ups=0.09, wpb=64827, bsz=128, num_updates=2837, lr=9.99853e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=31656
2021-06-19 03:26:33 | INFO | train_inner | epoch 001: 2860 / 3002 loss=2.867, ppl=7.3, wps=5833.9, ups=0.09, wpb=64840, bsz=128, num_updates=2838, lr=9.99853e-05, gnorm=2.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=31667
2021-06-19 03:26:44 | INFO | train_inner | epoch 001: 2861 / 3002 loss=2.838, ppl=7.15, wps=5817.1, ups=0.09, wpb=64783, bsz=128, num_updates=2839, lr=9.99853e-05, gnorm=2.366, loss_scale=1, train_wall=11, gb_free=2.8, wall=31679
2021-06-19 03:26:55 | INFO | train_inner | epoch 001: 2862 / 3002 loss=3.039, ppl=8.22, wps=5863.4, ups=0.09, wpb=64768, bsz=128, num_updates=2840, lr=9.99853e-05, gnorm=3.058, loss_scale=1, train_wall=11, gb_free=2.8, wall=31690
2021-06-19 03:27:07 | INFO | train_inner | epoch 001: 2863 / 3002 loss=2.799, ppl=6.96, wps=5708.8, ups=0.09, wpb=64825, bsz=128, num_updates=2841, lr=9.99853e-05, gnorm=2.554, loss_scale=1, train_wall=11, gb_free=2.8, wall=31701
2021-06-19 03:27:18 | INFO | train_inner | epoch 001: 2864 / 3002 loss=2.957, ppl=7.77, wps=5890.2, ups=0.09, wpb=64837, bsz=128, num_updates=2842, lr=9.99853e-05, gnorm=3.069, loss_scale=1, train_wall=11, gb_free=2.8, wall=31712
2021-06-19 03:27:29 | INFO | train_inner | epoch 001: 2865 / 3002 loss=2.831, ppl=7.12, wps=5882.6, ups=0.09, wpb=64801, bsz=128, num_updates=2843, lr=9.99853e-05, gnorm=2.493, loss_scale=1, train_wall=11, gb_free=2.8, wall=31723
2021-06-19 03:27:40 | INFO | train_inner | epoch 001: 2866 / 3002 loss=2.884, ppl=7.38, wps=5901.8, ups=0.09, wpb=64786, bsz=128, num_updates=2844, lr=9.99852e-05, gnorm=2.326, loss_scale=1, train_wall=11, gb_free=2.8, wall=31734
2021-06-19 03:27:51 | INFO | train_inner | epoch 001: 2867 / 3002 loss=2.914, ppl=7.54, wps=5908.5, ups=0.09, wpb=64887, bsz=128, num_updates=2845, lr=9.99852e-05, gnorm=2.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=31745
2021-06-19 03:28:02 | INFO | train_inner | epoch 001: 2868 / 3002 loss=2.926, ppl=7.6, wps=5826.4, ups=0.09, wpb=64884, bsz=128, num_updates=2846, lr=9.99852e-05, gnorm=2.393, loss_scale=1, train_wall=11, gb_free=2.8, wall=31756
2021-06-19 03:28:13 | INFO | train_inner | epoch 001: 2869 / 3002 loss=2.961, ppl=7.79, wps=5829.7, ups=0.09, wpb=64815, bsz=128, num_updates=2847, lr=9.99852e-05, gnorm=2.773, loss_scale=1, train_wall=11, gb_free=2.8, wall=31767
2021-06-19 03:28:24 | INFO | train_inner | epoch 001: 2870 / 3002 loss=2.896, ppl=7.44, wps=5711.8, ups=0.09, wpb=64875, bsz=128, num_updates=2848, lr=9.99852e-05, gnorm=2.611, loss_scale=1, train_wall=11, gb_free=2.8, wall=31779
2021-06-19 03:28:35 | INFO | train_inner | epoch 001: 2871 / 3002 loss=2.952, ppl=7.74, wps=5911.8, ups=0.09, wpb=64874, bsz=128, num_updates=2849, lr=9.99852e-05, gnorm=2.401, loss_scale=1, train_wall=10, gb_free=2.8, wall=31790
2021-06-19 03:28:46 | INFO | train_inner | epoch 001: 2872 / 3002 loss=2.908, ppl=7.51, wps=5901.8, ups=0.09, wpb=64930, bsz=128, num_updates=2850, lr=9.99852e-05, gnorm=2.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=31801
2021-06-19 03:28:57 | INFO | train_inner | epoch 001: 2873 / 3002 loss=2.845, ppl=7.19, wps=5855.7, ups=0.09, wpb=64854, bsz=128, num_updates=2851, lr=9.99852e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=31812
2021-06-19 03:29:08 | INFO | train_inner | epoch 001: 2874 / 3002 loss=3.032, ppl=8.18, wps=5809, ups=0.09, wpb=64856, bsz=128, num_updates=2852, lr=9.99852e-05, gnorm=3.107, loss_scale=1, train_wall=11, gb_free=2.8, wall=31823
2021-06-19 03:29:20 | INFO | train_inner | epoch 001: 2875 / 3002 loss=2.813, ppl=7.03, wps=5743.1, ups=0.09, wpb=64820, bsz=128, num_updates=2853, lr=9.99852e-05, gnorm=2.859, loss_scale=1, train_wall=11, gb_free=2.8, wall=31834
2021-06-19 03:29:31 | INFO | train_inner | epoch 001: 2876 / 3002 loss=3.02, ppl=8.11, wps=5745.2, ups=0.09, wpb=64720, bsz=128, num_updates=2854, lr=9.99852e-05, gnorm=2.513, loss_scale=1, train_wall=11, gb_free=2.8, wall=31845
2021-06-19 03:29:42 | INFO | train_inner | epoch 001: 2877 / 3002 loss=2.792, ppl=6.92, wps=5848.3, ups=0.09, wpb=64819, bsz=128, num_updates=2855, lr=9.99852e-05, gnorm=2.33, loss_scale=1, train_wall=11, gb_free=2.8, wall=31856
2021-06-19 03:29:53 | INFO | train_inner | epoch 001: 2878 / 3002 loss=2.809, ppl=7.01, wps=5907.1, ups=0.09, wpb=64847, bsz=128, num_updates=2856, lr=9.99852e-05, gnorm=2.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=31867
2021-06-19 03:30:04 | INFO | train_inner | epoch 001: 2879 / 3002 loss=2.669, ppl=6.36, wps=5847.3, ups=0.09, wpb=64861, bsz=128, num_updates=2857, lr=9.99851e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=31879
2021-06-19 03:30:15 | INFO | train_inner | epoch 001: 2880 / 3002 loss=2.864, ppl=7.28, wps=5775.6, ups=0.09, wpb=64856, bsz=128, num_updates=2858, lr=9.99851e-05, gnorm=2.322, loss_scale=1, train_wall=11, gb_free=2.8, wall=31890
2021-06-19 03:30:27 | INFO | train_inner | epoch 001: 2881 / 3002 loss=2.791, ppl=6.92, wps=5745.4, ups=0.09, wpb=64839, bsz=128, num_updates=2859, lr=9.99851e-05, gnorm=4.43, loss_scale=1, train_wall=11, gb_free=2.8, wall=31901
2021-06-19 03:30:38 | INFO | train_inner | epoch 001: 2882 / 3002 loss=2.922, ppl=7.58, wps=5829.9, ups=0.09, wpb=64731, bsz=128, num_updates=2860, lr=9.99851e-05, gnorm=2.334, loss_scale=1, train_wall=11, gb_free=2.8, wall=31912
2021-06-19 03:30:49 | INFO | train_inner | epoch 001: 2883 / 3002 loss=2.797, ppl=6.95, wps=5980.4, ups=0.09, wpb=64837, bsz=128, num_updates=2861, lr=9.99851e-05, gnorm=2.351, loss_scale=1, train_wall=10, gb_free=2.8, wall=31923
2021-06-19 03:31:00 | INFO | train_inner | epoch 001: 2884 / 3002 loss=2.852, ppl=7.22, wps=5846.1, ups=0.09, wpb=64795, bsz=128, num_updates=2862, lr=9.99851e-05, gnorm=2.869, loss_scale=1, train_wall=11, gb_free=2.8, wall=31934
2021-06-19 03:31:11 | INFO | train_inner | epoch 001: 2885 / 3002 loss=2.907, ppl=7.5, wps=5886.4, ups=0.09, wpb=64799, bsz=128, num_updates=2863, lr=9.99851e-05, gnorm=2.38, loss_scale=1, train_wall=11, gb_free=2.8, wall=31945
2021-06-19 03:31:22 | INFO | train_inner | epoch 001: 2886 / 3002 loss=2.84, ppl=7.16, wps=5884.8, ups=0.09, wpb=64823, bsz=128, num_updates=2864, lr=9.99851e-05, gnorm=2.338, loss_scale=1, train_wall=11, gb_free=2.8, wall=31956
2021-06-19 03:31:33 | INFO | train_inner | epoch 001: 2887 / 3002 loss=2.879, ppl=7.36, wps=5906.9, ups=0.09, wpb=64779, bsz=128, num_updates=2865, lr=9.99851e-05, gnorm=6.771, loss_scale=1, train_wall=11, gb_free=2.8, wall=31967
2021-06-19 03:31:44 | INFO | train_inner | epoch 001: 2888 / 3002 loss=2.926, ppl=7.6, wps=5893.6, ups=0.09, wpb=64819, bsz=128, num_updates=2866, lr=9.99851e-05, gnorm=2.376, loss_scale=1, train_wall=11, gb_free=2.8, wall=31978
2021-06-19 03:31:55 | INFO | train_inner | epoch 001: 2889 / 3002 loss=2.944, ppl=7.7, wps=5896.3, ups=0.09, wpb=64750, bsz=128, num_updates=2867, lr=9.99851e-05, gnorm=2.352, loss_scale=1, train_wall=11, gb_free=2.8, wall=31989
2021-06-19 03:32:06 | INFO | train_inner | epoch 001: 2890 / 3002 loss=3.051, ppl=8.29, wps=5849.7, ups=0.09, wpb=64783, bsz=128, num_updates=2868, lr=9.99851e-05, gnorm=2.469, loss_scale=1, train_wall=11, gb_free=2.8, wall=32000
2021-06-19 03:32:17 | INFO | train_inner | epoch 001: 2891 / 3002 loss=2.845, ppl=7.18, wps=5840.8, ups=0.09, wpb=64901, bsz=128, num_updates=2869, lr=9.9985e-05, gnorm=2.177, loss_scale=1, train_wall=11, gb_free=2.8, wall=32011
2021-06-19 03:32:28 | INFO | train_inner | epoch 001: 2892 / 3002 loss=2.928, ppl=7.61, wps=5783.3, ups=0.09, wpb=64782, bsz=128, num_updates=2870, lr=9.9985e-05, gnorm=4.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=32022
2021-06-19 03:32:39 | INFO | train_inner | epoch 001: 2893 / 3002 loss=2.907, ppl=7.5, wps=5919.9, ups=0.09, wpb=64912, bsz=128, num_updates=2871, lr=9.9985e-05, gnorm=2.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=32033
2021-06-19 03:32:50 | INFO | train_inner | epoch 001: 2894 / 3002 loss=2.906, ppl=7.5, wps=5764.3, ups=0.09, wpb=64885, bsz=128, num_updates=2872, lr=9.9985e-05, gnorm=2.346, loss_scale=1, train_wall=11, gb_free=2.8, wall=32045
2021-06-19 03:33:01 | INFO | train_inner | epoch 001: 2895 / 3002 loss=2.839, ppl=7.15, wps=5965.3, ups=0.09, wpb=64832, bsz=128, num_updates=2873, lr=9.9985e-05, gnorm=2.671, loss_scale=1, train_wall=10, gb_free=2.8, wall=32056
2021-06-19 03:33:12 | INFO | train_inner | epoch 001: 2896 / 3002 loss=3.044, ppl=8.24, wps=5798.4, ups=0.09, wpb=64737, bsz=128, num_updates=2874, lr=9.9985e-05, gnorm=6.355, loss_scale=1, train_wall=11, gb_free=2.8, wall=32067
2021-06-19 03:33:24 | INFO | train_inner | epoch 001: 2897 / 3002 loss=2.711, ppl=6.55, wps=5723, ups=0.09, wpb=64820, bsz=128, num_updates=2875, lr=9.9985e-05, gnorm=2.596, loss_scale=1, train_wall=11, gb_free=2.8, wall=32078
2021-06-19 03:33:35 | INFO | train_inner | epoch 001: 2898 / 3002 loss=2.74, ppl=6.68, wps=5764.6, ups=0.09, wpb=64829, bsz=128, num_updates=2876, lr=9.9985e-05, gnorm=2.559, loss_scale=1, train_wall=11, gb_free=2.8, wall=32089
2021-06-19 03:33:46 | INFO | train_inner | epoch 001: 2899 / 3002 loss=2.817, ppl=7.05, wps=5884.8, ups=0.09, wpb=64730, bsz=128, num_updates=2877, lr=9.9985e-05, gnorm=2.696, loss_scale=1, train_wall=11, gb_free=2.8, wall=32100
2021-06-19 03:33:57 | INFO | train_inner | epoch 001: 2900 / 3002 loss=2.96, ppl=7.78, wps=5818.3, ups=0.09, wpb=64733, bsz=128, num_updates=2878, lr=9.9985e-05, gnorm=2.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=32111
2021-06-19 03:34:08 | INFO | train_inner | epoch 001: 2901 / 3002 loss=2.803, ppl=6.98, wps=5743.3, ups=0.09, wpb=64806, bsz=128, num_updates=2879, lr=9.9985e-05, gnorm=2.324, loss_scale=1, train_wall=11, gb_free=2.8, wall=32123
2021-06-19 03:34:19 | INFO | train_inner | epoch 001: 2902 / 3002 loss=2.862, ppl=7.27, wps=5911.5, ups=0.09, wpb=64856, bsz=128, num_updates=2880, lr=9.9985e-05, gnorm=2.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=32134
2021-06-19 03:34:31 | INFO | train_inner | epoch 001: 2903 / 3002 loss=2.934, ppl=7.64, wps=5777.8, ups=0.09, wpb=64824, bsz=128, num_updates=2881, lr=9.9985e-05, gnorm=2.415, loss_scale=1, train_wall=11, gb_free=2.8, wall=32145
2021-06-19 03:34:42 | INFO | train_inner | epoch 001: 2904 / 3002 loss=2.807, ppl=7, wps=5817.8, ups=0.09, wpb=64833, bsz=128, num_updates=2882, lr=9.99849e-05, gnorm=3.174, loss_scale=1, train_wall=11, gb_free=2.8, wall=32156
2021-06-19 03:34:53 | INFO | train_inner | epoch 001: 2905 / 3002 loss=2.744, ppl=6.7, wps=5893.6, ups=0.09, wpb=64868, bsz=128, num_updates=2883, lr=9.99849e-05, gnorm=2.315, loss_scale=1, train_wall=11, gb_free=2.8, wall=32167
2021-06-19 03:35:04 | INFO | train_inner | epoch 001: 2906 / 3002 loss=2.998, ppl=7.99, wps=5919, ups=0.09, wpb=64812, bsz=128, num_updates=2884, lr=9.99849e-05, gnorm=2.368, loss_scale=1, train_wall=10, gb_free=2.8, wall=32178
2021-06-19 03:35:15 | INFO | train_inner | epoch 001: 2907 / 3002 loss=2.941, ppl=7.68, wps=5678.1, ups=0.09, wpb=64874, bsz=128, num_updates=2885, lr=9.99849e-05, gnorm=2.346, loss_scale=1, train_wall=11, gb_free=2.8, wall=32189
2021-06-19 03:35:26 | INFO | train_inner | epoch 001: 2908 / 3002 loss=2.851, ppl=7.22, wps=5884.7, ups=0.09, wpb=64861, bsz=128, num_updates=2886, lr=9.99849e-05, gnorm=2.482, loss_scale=1, train_wall=11, gb_free=2.8, wall=32200
2021-06-19 03:35:37 | INFO | train_inner | epoch 001: 2909 / 3002 loss=2.88, ppl=7.36, wps=5970.5, ups=0.09, wpb=64827, bsz=128, num_updates=2887, lr=9.99849e-05, gnorm=2.411, loss_scale=1, train_wall=10, gb_free=2.8, wall=32211
2021-06-19 03:35:48 | INFO | train_inner | epoch 001: 2910 / 3002 loss=2.941, ppl=7.68, wps=5886.2, ups=0.09, wpb=64759, bsz=128, num_updates=2888, lr=9.99849e-05, gnorm=2.505, loss_scale=1, train_wall=11, gb_free=2.8, wall=32222
2021-06-19 03:35:59 | INFO | train_inner | epoch 001: 2911 / 3002 loss=2.924, ppl=7.59, wps=5927.9, ups=0.09, wpb=64810, bsz=128, num_updates=2889, lr=9.99849e-05, gnorm=2.326, loss_scale=1, train_wall=10, gb_free=2.8, wall=32233
2021-06-19 03:36:10 | INFO | train_inner | epoch 001: 2912 / 3002 loss=2.773, ppl=6.84, wps=5950.1, ups=0.09, wpb=64788, bsz=128, num_updates=2890, lr=9.99849e-05, gnorm=2.323, loss_scale=1, train_wall=10, gb_free=2.8, wall=32244
2021-06-19 03:36:21 | INFO | train_inner | epoch 001: 2913 / 3002 loss=2.84, ppl=7.16, wps=5933.5, ups=0.09, wpb=64894, bsz=128, num_updates=2891, lr=9.99849e-05, gnorm=2.414, loss_scale=1, train_wall=10, gb_free=2.8, wall=32255
2021-06-19 03:36:31 | INFO | train_inner | epoch 001: 2914 / 3002 loss=2.774, ppl=6.84, wps=6011.8, ups=0.09, wpb=64862, bsz=128, num_updates=2892, lr=9.99849e-05, gnorm=2.268, loss_scale=1, train_wall=10, gb_free=2.8, wall=32266
2021-06-19 03:36:43 | INFO | train_inner | epoch 001: 2915 / 3002 loss=3.009, ppl=8.05, wps=5833, ups=0.09, wpb=64859, bsz=128, num_updates=2893, lr=9.99849e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=32277
2021-06-19 03:36:54 | INFO | train_inner | epoch 001: 2916 / 3002 loss=2.94, ppl=7.68, wps=5751, ups=0.09, wpb=64951, bsz=128, num_updates=2894, lr=9.99848e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=32288
2021-06-19 03:37:05 | INFO | train_inner | epoch 001: 2917 / 3002 loss=2.721, ppl=6.59, wps=5902.2, ups=0.09, wpb=64875, bsz=128, num_updates=2895, lr=9.99848e-05, gnorm=2.395, loss_scale=1, train_wall=11, gb_free=2.8, wall=32299
2021-06-19 03:37:16 | INFO | train_inner | epoch 001: 2918 / 3002 loss=2.979, ppl=7.88, wps=5711.7, ups=0.09, wpb=64873, bsz=128, num_updates=2896, lr=9.99848e-05, gnorm=2.409, loss_scale=1, train_wall=11, gb_free=2.8, wall=32311
2021-06-19 03:37:28 | INFO | train_inner | epoch 001: 2919 / 3002 loss=2.968, ppl=7.83, wps=5644.5, ups=0.09, wpb=64809, bsz=128, num_updates=2897, lr=9.99848e-05, gnorm=2.402, loss_scale=1, train_wall=11, gb_free=2.8, wall=32322
2021-06-19 03:37:39 | INFO | train_inner | epoch 001: 2920 / 3002 loss=2.657, ppl=6.31, wps=5891.5, ups=0.09, wpb=64783, bsz=128, num_updates=2898, lr=9.99848e-05, gnorm=72.113, loss_scale=1, train_wall=11, gb_free=2.8, wall=32333
2021-06-19 03:37:50 | INFO | train_inner | epoch 001: 2921 / 3002 loss=2.816, ppl=7.04, wps=5830.2, ups=0.09, wpb=64836, bsz=128, num_updates=2899, lr=9.99848e-05, gnorm=2.413, loss_scale=1, train_wall=11, gb_free=2.8, wall=32344
2021-06-19 03:38:01 | INFO | train_inner | epoch 001: 2922 / 3002 loss=2.917, ppl=7.55, wps=5860.7, ups=0.09, wpb=64794, bsz=128, num_updates=2900, lr=9.99848e-05, gnorm=2.673, loss_scale=1, train_wall=11, gb_free=2.8, wall=32355
2021-06-19 03:38:12 | INFO | train_inner | epoch 001: 2923 / 3002 loss=2.873, ppl=7.32, wps=5895.6, ups=0.09, wpb=64807, bsz=128, num_updates=2901, lr=9.99848e-05, gnorm=2.363, loss_scale=1, train_wall=11, gb_free=2.8, wall=32366
2021-06-19 03:38:23 | INFO | train_inner | epoch 001: 2924 / 3002 loss=3.037, ppl=8.21, wps=5924.1, ups=0.09, wpb=64820, bsz=128, num_updates=2902, lr=9.99848e-05, gnorm=2.54, loss_scale=1, train_wall=10, gb_free=2.8, wall=32377
2021-06-19 03:38:34 | INFO | train_inner | epoch 001: 2925 / 3002 loss=2.869, ppl=7.31, wps=5824.9, ups=0.09, wpb=64825, bsz=128, num_updates=2903, lr=9.99848e-05, gnorm=2.508, loss_scale=1, train_wall=11, gb_free=2.8, wall=32388
2021-06-19 03:38:45 | INFO | train_inner | epoch 001: 2926 / 3002 loss=2.797, ppl=6.95, wps=5884.1, ups=0.09, wpb=64745, bsz=128, num_updates=2904, lr=9.99848e-05, gnorm=2.569, loss_scale=1, train_wall=11, gb_free=2.8, wall=32399
2021-06-19 03:38:56 | INFO | train_inner | epoch 001: 2927 / 3002 loss=2.919, ppl=7.57, wps=5793.3, ups=0.09, wpb=64822, bsz=128, num_updates=2905, lr=9.99848e-05, gnorm=2.478, loss_scale=1, train_wall=11, gb_free=2.8, wall=32410
2021-06-19 03:39:07 | INFO | train_inner | epoch 001: 2928 / 3002 loss=2.799, ppl=6.96, wps=5723.2, ups=0.09, wpb=64826, bsz=128, num_updates=2906, lr=9.99848e-05, gnorm=2.918, loss_scale=1, train_wall=11, gb_free=2.8, wall=32422
2021-06-19 03:39:18 | INFO | train_inner | epoch 001: 2929 / 3002 loss=2.784, ppl=6.89, wps=5895, ups=0.09, wpb=64829, bsz=128, num_updates=2907, lr=9.99847e-05, gnorm=2.442, loss_scale=1, train_wall=11, gb_free=2.8, wall=32433
2021-06-19 03:39:30 | INFO | train_inner | epoch 001: 2930 / 3002 loss=2.778, ppl=6.86, wps=5721.2, ups=0.09, wpb=64868, bsz=128, num_updates=2908, lr=9.99847e-05, gnorm=2.571, loss_scale=2, train_wall=11, gb_free=2.8, wall=32444
2021-06-19 03:39:41 | INFO | train_inner | epoch 001: 2931 / 3002 loss=2.934, ppl=7.64, wps=5863.4, ups=0.09, wpb=64800, bsz=128, num_updates=2909, lr=9.99847e-05, gnorm=2.625, loss_scale=2, train_wall=11, gb_free=2.8, wall=32455
2021-06-19 03:39:52 | INFO | train_inner | epoch 001: 2932 / 3002 loss=2.674, ppl=6.38, wps=5891.5, ups=0.09, wpb=64864, bsz=128, num_updates=2910, lr=9.99847e-05, gnorm=2.698, loss_scale=2, train_wall=11, gb_free=2.8, wall=32466
2021-06-19 03:40:03 | INFO | train_inner | epoch 001: 2933 / 3002 loss=2.9, ppl=7.47, wps=5768.8, ups=0.09, wpb=64890, bsz=128, num_updates=2911, lr=9.99847e-05, gnorm=2.477, loss_scale=2, train_wall=11, gb_free=2.8, wall=32477
2021-06-19 03:40:14 | INFO | train_inner | epoch 001: 2934 / 3002 loss=2.779, ppl=6.87, wps=5784, ups=0.09, wpb=64758, bsz=128, num_updates=2912, lr=9.99847e-05, gnorm=2.477, loss_scale=2, train_wall=11, gb_free=2.8, wall=32489
2021-06-19 03:40:25 | INFO | train_inner | epoch 001: 2935 / 3002 loss=2.934, ppl=7.64, wps=5988.3, ups=0.09, wpb=64896, bsz=128, num_updates=2913, lr=9.99847e-05, gnorm=2.536, loss_scale=2, train_wall=10, gb_free=2.8, wall=32499
2021-06-19 03:40:36 | INFO | train_inner | epoch 001: 2936 / 3002 loss=2.929, ppl=7.62, wps=5774, ups=0.09, wpb=64814, bsz=128, num_updates=2914, lr=9.99847e-05, gnorm=2.544, loss_scale=2, train_wall=11, gb_free=2.8, wall=32511
2021-06-19 03:40:48 | INFO | train_inner | epoch 001: 2937 / 3002 loss=2.843, ppl=7.17, wps=5792.6, ups=0.09, wpb=64804, bsz=128, num_updates=2915, lr=9.99847e-05, gnorm=16.926, loss_scale=2, train_wall=11, gb_free=2.8, wall=32522
2021-06-19 03:40:59 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-19 03:41:10 | INFO | train_inner | epoch 001: 2939 / 3002 loss=2.918, ppl=7.56, wps=2941.8, ups=0.05, wpb=64782, bsz=128, num_updates=2916, lr=9.99847e-05, gnorm=2.486, loss_scale=1, train_wall=21, gb_free=2.8, wall=32544
2021-06-19 03:41:20 | INFO | train_inner | epoch 001: 2940 / 3002 loss=2.853, ppl=7.22, wps=5931.8, ups=0.09, wpb=64739, bsz=128, num_updates=2917, lr=9.99847e-05, gnorm=2.538, loss_scale=1, train_wall=10, gb_free=2.8, wall=32555
2021-06-19 03:41:32 | INFO | train_inner | epoch 001: 2941 / 3002 loss=2.982, ppl=7.9, wps=5804, ups=0.09, wpb=64812, bsz=128, num_updates=2918, lr=9.99847e-05, gnorm=2.475, loss_scale=1, train_wall=11, gb_free=2.8, wall=32566
2021-06-19 03:41:43 | INFO | train_inner | epoch 001: 2942 / 3002 loss=2.902, ppl=7.47, wps=5828.6, ups=0.09, wpb=64826, bsz=128, num_updates=2919, lr=9.99846e-05, gnorm=2.618, loss_scale=1, train_wall=11, gb_free=2.8, wall=32577
2021-06-19 03:41:54 | INFO | train_inner | epoch 001: 2943 / 3002 loss=2.954, ppl=7.75, wps=5943.7, ups=0.09, wpb=64781, bsz=128, num_updates=2920, lr=9.99846e-05, gnorm=2.444, loss_scale=1, train_wall=10, gb_free=2.8, wall=32588
2021-06-19 03:42:04 | INFO | train_inner | epoch 001: 2944 / 3002 loss=2.673, ppl=6.38, wps=6022.1, ups=0.09, wpb=64821, bsz=128, num_updates=2921, lr=9.99846e-05, gnorm=2.834, loss_scale=1, train_wall=10, gb_free=2.8, wall=32599
2021-06-19 03:42:16 | INFO | train_inner | epoch 001: 2945 / 3002 loss=2.967, ppl=7.82, wps=5793.1, ups=0.09, wpb=64837, bsz=128, num_updates=2922, lr=9.99846e-05, gnorm=2.562, loss_scale=1, train_wall=11, gb_free=2.8, wall=32610
2021-06-19 03:42:27 | INFO | train_inner | epoch 001: 2946 / 3002 loss=2.997, ppl=7.98, wps=5822.1, ups=0.09, wpb=64761, bsz=128, num_updates=2923, lr=9.99846e-05, gnorm=2.345, loss_scale=1, train_wall=11, gb_free=2.8, wall=32621
2021-06-19 03:42:38 | INFO | train_inner | epoch 001: 2947 / 3002 loss=2.851, ppl=7.21, wps=5914.2, ups=0.09, wpb=64825, bsz=128, num_updates=2924, lr=9.99846e-05, gnorm=2.524, loss_scale=1, train_wall=10, gb_free=2.8, wall=32632
2021-06-19 03:42:49 | INFO | train_inner | epoch 001: 2948 / 3002 loss=2.897, ppl=7.45, wps=5865.1, ups=0.09, wpb=64834, bsz=128, num_updates=2925, lr=9.99846e-05, gnorm=2.363, loss_scale=1, train_wall=11, gb_free=2.8, wall=32643
2021-06-19 03:43:00 | INFO | train_inner | epoch 001: 2949 / 3002 loss=2.893, ppl=7.43, wps=5980.5, ups=0.09, wpb=64822, bsz=128, num_updates=2926, lr=9.99846e-05, gnorm=2.503, loss_scale=1, train_wall=10, gb_free=2.8, wall=32654
2021-06-19 03:43:11 | INFO | train_inner | epoch 001: 2950 / 3002 loss=2.838, ppl=7.15, wps=5902.7, ups=0.09, wpb=64887, bsz=128, num_updates=2927, lr=9.99846e-05, gnorm=2.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=32665
2021-06-19 03:43:22 | INFO | train_inner | epoch 001: 2951 / 3002 loss=2.857, ppl=7.25, wps=5809.1, ups=0.09, wpb=64863, bsz=128, num_updates=2928, lr=9.99846e-05, gnorm=2.463, loss_scale=1, train_wall=11, gb_free=2.8, wall=32676
2021-06-19 03:43:33 | INFO | train_inner | epoch 001: 2952 / 3002 loss=3.059, ppl=8.33, wps=5953, ups=0.09, wpb=64887, bsz=128, num_updates=2929, lr=9.99846e-05, gnorm=2.456, loss_scale=1, train_wall=10, gb_free=2.8, wall=32687
2021-06-19 03:43:44 | INFO | train_inner | epoch 001: 2953 / 3002 loss=2.827, ppl=7.09, wps=5723.8, ups=0.09, wpb=64748, bsz=128, num_updates=2930, lr=9.99846e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=32698
2021-06-19 03:43:55 | INFO | train_inner | epoch 001: 2954 / 3002 loss=2.786, ppl=6.89, wps=5856.8, ups=0.09, wpb=64872, bsz=128, num_updates=2931, lr=9.99846e-05, gnorm=2.285, loss_scale=1, train_wall=11, gb_free=2.8, wall=32709
2021-06-19 03:44:06 | INFO | train_inner | epoch 001: 2955 / 3002 loss=2.974, ppl=7.86, wps=5802.2, ups=0.09, wpb=64827, bsz=128, num_updates=2932, lr=9.99845e-05, gnorm=2.412, loss_scale=1, train_wall=11, gb_free=2.8, wall=32721
2021-06-19 03:44:17 | INFO | train_inner | epoch 001: 2956 / 3002 loss=2.845, ppl=7.19, wps=5935, ups=0.09, wpb=64907, bsz=128, num_updates=2933, lr=9.99845e-05, gnorm=2.268, loss_scale=1, train_wall=10, gb_free=2.8, wall=32732
2021-06-19 03:44:28 | INFO | train_inner | epoch 001: 2957 / 3002 loss=2.817, ppl=7.05, wps=5869.2, ups=0.09, wpb=64844, bsz=128, num_updates=2934, lr=9.99845e-05, gnorm=2.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=32743
2021-06-19 03:44:39 | INFO | train_inner | epoch 001: 2958 / 3002 loss=2.937, ppl=7.66, wps=5797.3, ups=0.09, wpb=64880, bsz=128, num_updates=2935, lr=9.99845e-05, gnorm=2.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=32754
2021-06-19 03:44:50 | INFO | train_inner | epoch 001: 2959 / 3002 loss=2.991, ppl=7.95, wps=5876.2, ups=0.09, wpb=64822, bsz=128, num_updates=2936, lr=9.99845e-05, gnorm=2.426, loss_scale=1, train_wall=11, gb_free=2.8, wall=32765
2021-06-19 03:45:01 | INFO | train_inner | epoch 001: 2960 / 3002 loss=2.864, ppl=7.28, wps=6013.8, ups=0.09, wpb=64902, bsz=128, num_updates=2937, lr=9.99845e-05, gnorm=2.593, loss_scale=1, train_wall=10, gb_free=2.8, wall=32776
2021-06-19 03:45:12 | INFO | train_inner | epoch 001: 2961 / 3002 loss=2.729, ppl=6.63, wps=5929.6, ups=0.09, wpb=64764, bsz=128, num_updates=2938, lr=9.99845e-05, gnorm=2.336, loss_scale=1, train_wall=10, gb_free=2.8, wall=32787
2021-06-19 03:45:23 | INFO | train_inner | epoch 001: 2962 / 3002 loss=2.901, ppl=7.47, wps=5808.9, ups=0.09, wpb=64792, bsz=128, num_updates=2939, lr=9.99845e-05, gnorm=2.264, loss_scale=1, train_wall=11, gb_free=2.8, wall=32798
2021-06-19 03:45:34 | INFO | train_inner | epoch 001: 2963 / 3002 loss=2.887, ppl=7.4, wps=5931.4, ups=0.09, wpb=64800, bsz=128, num_updates=2940, lr=9.99845e-05, gnorm=2.421, loss_scale=1, train_wall=10, gb_free=2.8, wall=32809
2021-06-19 03:45:45 | INFO | train_inner | epoch 001: 2964 / 3002 loss=2.92, ppl=7.57, wps=5786.2, ups=0.09, wpb=64795, bsz=128, num_updates=2941, lr=9.99845e-05, gnorm=2.702, loss_scale=1, train_wall=11, gb_free=2.8, wall=32820
2021-06-19 03:45:57 | INFO | train_inner | epoch 001: 2965 / 3002 loss=2.846, ppl=7.19, wps=5817.3, ups=0.09, wpb=64875, bsz=128, num_updates=2942, lr=9.99845e-05, gnorm=2.408, loss_scale=1, train_wall=11, gb_free=2.8, wall=32831
2021-06-19 03:46:08 | INFO | train_inner | epoch 001: 2966 / 3002 loss=2.634, ppl=6.21, wps=5790.8, ups=0.09, wpb=64871, bsz=128, num_updates=2943, lr=9.99845e-05, gnorm=2.305, loss_scale=1, train_wall=11, gb_free=2.8, wall=32842
2021-06-19 03:46:19 | INFO | train_inner | epoch 001: 2967 / 3002 loss=2.798, ppl=6.96, wps=5885.4, ups=0.09, wpb=64799, bsz=128, num_updates=2944, lr=9.99844e-05, gnorm=2.48, loss_scale=1, train_wall=11, gb_free=2.8, wall=32853
2021-06-19 03:46:30 | INFO | train_inner | epoch 001: 2968 / 3002 loss=2.776, ppl=6.85, wps=5739.3, ups=0.09, wpb=64815, bsz=128, num_updates=2945, lr=9.99844e-05, gnorm=2.453, loss_scale=1, train_wall=11, gb_free=2.8, wall=32864
2021-06-19 03:46:41 | INFO | train_inner | epoch 001: 2969 / 3002 loss=2.838, ppl=7.15, wps=5894.2, ups=0.09, wpb=64818, bsz=128, num_updates=2946, lr=9.99844e-05, gnorm=2.449, loss_scale=1, train_wall=11, gb_free=2.8, wall=32875
2021-06-19 03:46:52 | INFO | train_inner | epoch 001: 2970 / 3002 loss=2.83, ppl=7.11, wps=5842.5, ups=0.09, wpb=64803, bsz=128, num_updates=2947, lr=9.99844e-05, gnorm=2.364, loss_scale=1, train_wall=11, gb_free=2.8, wall=32887
2021-06-19 03:47:03 | INFO | train_inner | epoch 001: 2971 / 3002 loss=2.869, ppl=7.31, wps=5767.9, ups=0.09, wpb=64787, bsz=128, num_updates=2948, lr=9.99844e-05, gnorm=2.441, loss_scale=1, train_wall=11, gb_free=2.8, wall=32898
2021-06-19 03:47:15 | INFO | train_inner | epoch 001: 2972 / 3002 loss=2.831, ppl=7.11, wps=5758.5, ups=0.09, wpb=64841, bsz=128, num_updates=2949, lr=9.99844e-05, gnorm=2.464, loss_scale=1, train_wall=11, gb_free=2.8, wall=32909
2021-06-19 03:47:26 | INFO | train_inner | epoch 001: 2973 / 3002 loss=2.779, ppl=6.86, wps=5887.9, ups=0.09, wpb=64805, bsz=128, num_updates=2950, lr=9.99844e-05, gnorm=2.622, loss_scale=1, train_wall=11, gb_free=2.8, wall=32920
2021-06-19 03:47:37 | INFO | train_inner | epoch 001: 2974 / 3002 loss=2.779, ppl=6.86, wps=5941.9, ups=0.09, wpb=64809, bsz=128, num_updates=2951, lr=9.99844e-05, gnorm=3.213, loss_scale=1, train_wall=10, gb_free=2.8, wall=32931
2021-06-19 03:47:48 | INFO | train_inner | epoch 001: 2975 / 3002 loss=2.923, ppl=7.59, wps=5787.2, ups=0.09, wpb=64767, bsz=128, num_updates=2952, lr=9.99844e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=32942
2021-06-19 03:47:59 | INFO | train_inner | epoch 001: 2976 / 3002 loss=2.874, ppl=7.33, wps=5746.1, ups=0.09, wpb=64807, bsz=128, num_updates=2953, lr=9.99844e-05, gnorm=2.457, loss_scale=1, train_wall=11, gb_free=2.8, wall=32953
2021-06-19 03:48:10 | INFO | train_inner | epoch 001: 2977 / 3002 loss=2.891, ppl=7.42, wps=5778.3, ups=0.09, wpb=64783, bsz=128, num_updates=2954, lr=9.99844e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=32965
2021-06-19 03:48:21 | INFO | train_inner | epoch 001: 2978 / 3002 loss=2.82, ppl=7.06, wps=5878.4, ups=0.09, wpb=64909, bsz=128, num_updates=2955, lr=9.99844e-05, gnorm=2.261, loss_scale=1, train_wall=11, gb_free=2.8, wall=32976
2021-06-19 03:48:32 | INFO | train_inner | epoch 001: 2979 / 3002 loss=2.928, ppl=7.61, wps=5851.9, ups=0.09, wpb=64846, bsz=128, num_updates=2956, lr=9.99844e-05, gnorm=2.406, loss_scale=1, train_wall=11, gb_free=2.8, wall=32987
2021-06-19 03:48:44 | INFO | train_inner | epoch 001: 2980 / 3002 loss=2.754, ppl=6.75, wps=5783.8, ups=0.09, wpb=64819, bsz=128, num_updates=2957, lr=9.99843e-05, gnorm=2.319, loss_scale=1, train_wall=11, gb_free=2.8, wall=32998
2021-06-19 03:48:55 | INFO | train_inner | epoch 001: 2981 / 3002 loss=2.847, ppl=7.2, wps=5846.3, ups=0.09, wpb=64850, bsz=128, num_updates=2958, lr=9.99843e-05, gnorm=2.381, loss_scale=1, train_wall=11, gb_free=2.8, wall=33009
2021-06-19 03:49:05 | INFO | train_inner | epoch 001: 2982 / 3002 loss=2.977, ppl=7.87, wps=6053.9, ups=0.09, wpb=64902, bsz=128, num_updates=2959, lr=9.99843e-05, gnorm=2.363, loss_scale=1, train_wall=10, gb_free=2.8, wall=33020
2021-06-19 03:49:16 | INFO | train_inner | epoch 001: 2983 / 3002 loss=2.799, ppl=6.96, wps=5961.9, ups=0.09, wpb=64865, bsz=128, num_updates=2960, lr=9.99843e-05, gnorm=2.392, loss_scale=1, train_wall=10, gb_free=2.8, wall=33031
2021-06-19 03:49:27 | INFO | train_inner | epoch 001: 2984 / 3002 loss=2.757, ppl=6.76, wps=5909.7, ups=0.09, wpb=64855, bsz=128, num_updates=2961, lr=9.99843e-05, gnorm=2.509, loss_scale=1, train_wall=11, gb_free=2.8, wall=33042
2021-06-19 03:49:38 | INFO | train_inner | epoch 001: 2985 / 3002 loss=2.591, ppl=6.02, wps=5822.8, ups=0.09, wpb=64848, bsz=128, num_updates=2962, lr=9.99843e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=33053
2021-06-19 03:49:50 | INFO | train_inner | epoch 001: 2986 / 3002 loss=2.863, ppl=7.27, wps=5798.3, ups=0.09, wpb=64825, bsz=128, num_updates=2963, lr=9.99843e-05, gnorm=2.418, loss_scale=1, train_wall=11, gb_free=2.8, wall=33064
2021-06-19 03:50:01 | INFO | train_inner | epoch 001: 2987 / 3002 loss=2.879, ppl=7.36, wps=5862, ups=0.09, wpb=64834, bsz=128, num_updates=2964, lr=9.99843e-05, gnorm=4.748, loss_scale=1, train_wall=11, gb_free=2.8, wall=33075
2021-06-19 03:50:12 | INFO | train_inner | epoch 001: 2988 / 3002 loss=2.954, ppl=7.75, wps=5823.7, ups=0.09, wpb=64811, bsz=128, num_updates=2965, lr=9.99843e-05, gnorm=2.289, loss_scale=1, train_wall=11, gb_free=2.8, wall=33086
2021-06-19 03:50:23 | INFO | train_inner | epoch 001: 2989 / 3002 loss=2.858, ppl=7.25, wps=5810.8, ups=0.09, wpb=64744, bsz=128, num_updates=2966, lr=9.99843e-05, gnorm=2.329, loss_scale=1, train_wall=11, gb_free=2.8, wall=33097
2021-06-19 03:50:34 | INFO | train_inner | epoch 001: 2990 / 3002 loss=2.823, ppl=7.08, wps=5859.6, ups=0.09, wpb=64768, bsz=128, num_updates=2967, lr=9.99843e-05, gnorm=2.525, loss_scale=1, train_wall=11, gb_free=2.8, wall=33108
2021-06-19 03:50:45 | INFO | train_inner | epoch 001: 2991 / 3002 loss=2.895, ppl=7.44, wps=5903.9, ups=0.09, wpb=64837, bsz=128, num_updates=2968, lr=9.99843e-05, gnorm=2.299, loss_scale=1, train_wall=11, gb_free=2.8, wall=33119
2021-06-19 03:50:56 | INFO | train_inner | epoch 001: 2992 / 3002 loss=3.091, ppl=8.52, wps=5767.4, ups=0.09, wpb=64819, bsz=128, num_updates=2969, lr=9.99842e-05, gnorm=2.315, loss_scale=1, train_wall=11, gb_free=2.8, wall=33131
2021-06-19 03:51:07 | INFO | train_inner | epoch 001: 2993 / 3002 loss=2.952, ppl=7.74, wps=5902.2, ups=0.09, wpb=64854, bsz=128, num_updates=2970, lr=9.99842e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=33142
2021-06-19 03:51:18 | INFO | train_inner | epoch 001: 2994 / 3002 loss=2.83, ppl=7.11, wps=5825, ups=0.09, wpb=64895, bsz=128, num_updates=2971, lr=9.99842e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=33153
2021-06-19 03:51:29 | INFO | train_inner | epoch 001: 2995 / 3002 loss=2.963, ppl=7.8, wps=5852.6, ups=0.09, wpb=64777, bsz=128, num_updates=2972, lr=9.99842e-05, gnorm=2.438, loss_scale=1, train_wall=11, gb_free=2.8, wall=33164
2021-06-19 03:51:40 | INFO | train_inner | epoch 001: 2996 / 3002 loss=2.981, ppl=7.9, wps=5882.9, ups=0.09, wpb=64877, bsz=128, num_updates=2973, lr=9.99842e-05, gnorm=2.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=33175
2021-06-19 03:51:51 | INFO | train_inner | epoch 001: 2997 / 3002 loss=2.887, ppl=7.4, wps=5870.5, ups=0.09, wpb=64867, bsz=128, num_updates=2974, lr=9.99842e-05, gnorm=2.235, loss_scale=1, train_wall=11, gb_free=2.8, wall=33186
2021-06-19 03:52:02 | INFO | train_inner | epoch 001: 2998 / 3002 loss=2.845, ppl=7.18, wps=5931.2, ups=0.09, wpb=64865, bsz=128, num_updates=2975, lr=9.99842e-05, gnorm=2.266, loss_scale=1, train_wall=10, gb_free=2.8, wall=33197
2021-06-19 03:52:14 | INFO | train_inner | epoch 001: 2999 / 3002 loss=2.772, ppl=6.83, wps=5746.4, ups=0.09, wpb=64811, bsz=128, num_updates=2976, lr=9.99842e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=33208
2021-06-19 03:52:25 | INFO | train_inner | epoch 001: 3000 / 3002 loss=2.727, ppl=6.62, wps=5921.5, ups=0.09, wpb=64831, bsz=128, num_updates=2977, lr=9.99842e-05, gnorm=2.179, loss_scale=1, train_wall=10, gb_free=2.8, wall=33219
2021-06-19 03:52:36 | INFO | train_inner | epoch 001: 3001 / 3002 loss=2.723, ppl=6.6, wps=5817.7, ups=0.09, wpb=64783, bsz=128, num_updates=2978, lr=9.99842e-05, gnorm=2.285, loss_scale=1, train_wall=11, gb_free=2.8, wall=33230
2021-06-19 03:52:42 | INFO | train_inner | epoch 001: 3002 / 3002 loss=2.979, ppl=7.89, wps=5835.1, ups=0.16, wpb=36447, bsz=72, num_updates=2979, lr=9.99842e-05, gnorm=3.173, loss_scale=1, train_wall=6, gb_free=2.8, wall=33236
2021-06-19 03:52:42 | INFO | fairseq_cli.train | begin validation on "valid" subset
2021-06-19 04:07:33 | INFO | valid | epoch 001 | valid on 'valid' subset | loss 2.7 | ppl 6.5 | wps 19818.9 | wpb 506.5 | bsz 1 | num_updates 2979
2021-06-19 04:07:33 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 1 @ 2979 updates
2021-06-19 04:07:33 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint1.pt
2021-06-19 04:07:47 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint1.pt
2021-06-19 04:10:24 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint1.pt (epoch 1 @ 2979 updates, score 2.7) (writing took 171.51024247800524 seconds)
2021-06-19 04:10:24 | INFO | fairseq_cli.train | end of epoch 1 (average epoch stats below)
2021-06-19 04:10:24 | INFO | train | epoch 001 | loss 3.084 | ppl 8.48 | wps 5640.6 | ups 0.09 | wpb 64819.4 | bsz 128 | num_updates 2979 | lr 9.99842e-05 | gnorm 3.456 | loss_scale 1 | train_wall 31865 | gb_free 2.8 | wall 34298
2021-06-19 04:10:24 | INFO | fairseq.trainer | begin training epoch 2
2021-06-19 04:10:24 | INFO | fairseq_cli.train | Start iterating over samples
2021-06-19 04:10:35 | INFO | train_inner | epoch 002: 1 / 3002 loss=2.719, ppl=6.59, wps=60.3, ups=0, wpb=64738, bsz=128, num_updates=2980, lr=9.99842e-05, gnorm=2.404, loss_scale=1, train_wall=10, gb_free=2.8, wall=34309
2021-06-19 04:10:46 | INFO | train_inner | epoch 002: 2 / 3002 loss=2.681, ppl=6.41, wps=5934.7, ups=0.09, wpb=64845, bsz=128, num_updates=2981, lr=9.99842e-05, gnorm=2.33, loss_scale=1, train_wall=10, gb_free=2.8, wall=34320
2021-06-19 04:10:57 | INFO | train_inner | epoch 002: 3 / 3002 loss=2.763, ppl=6.79, wps=5966.7, ups=0.09, wpb=64892, bsz=128, num_updates=2982, lr=9.99841e-05, gnorm=2.417, loss_scale=1, train_wall=10, gb_free=2.8, wall=34331
2021-06-19 04:11:08 | INFO | train_inner | epoch 002: 4 / 3002 loss=2.893, ppl=7.43, wps=6039.8, ups=0.09, wpb=64824, bsz=128, num_updates=2983, lr=9.99841e-05, gnorm=2.373, loss_scale=1, train_wall=10, gb_free=2.8, wall=34342
2021-06-19 04:11:19 | INFO | train_inner | epoch 002: 5 / 3002 loss=2.813, ppl=7.03, wps=5902.8, ups=0.09, wpb=64913, bsz=128, num_updates=2984, lr=9.99841e-05, gnorm=2.482, loss_scale=1, train_wall=10, gb_free=2.8, wall=34353
2021-06-19 04:11:29 | INFO | train_inner | epoch 002: 6 / 3002 loss=2.763, ppl=6.79, wps=6029.1, ups=0.09, wpb=64879, bsz=128, num_updates=2985, lr=9.99841e-05, gnorm=2.247, loss_scale=1, train_wall=10, gb_free=2.8, wall=34364
2021-06-19 04:11:40 | INFO | train_inner | epoch 002: 7 / 3002 loss=2.792, ppl=6.92, wps=6039.6, ups=0.09, wpb=64804, bsz=128, num_updates=2986, lr=9.99841e-05, gnorm=2.369, loss_scale=1, train_wall=10, gb_free=2.8, wall=34375
2021-06-19 04:11:51 | INFO | train_inner | epoch 002: 8 / 3002 loss=2.77, ppl=6.82, wps=5991.4, ups=0.09, wpb=64764, bsz=128, num_updates=2987, lr=9.99841e-05, gnorm=2.568, loss_scale=1, train_wall=10, gb_free=2.8, wall=34385
2021-06-19 04:12:02 | INFO | train_inner | epoch 002: 9 / 3002 loss=2.822, ppl=7.07, wps=6013.2, ups=0.09, wpb=64792, bsz=128, num_updates=2988, lr=9.99841e-05, gnorm=2.42, loss_scale=1, train_wall=10, gb_free=2.8, wall=34396
2021-06-19 04:12:13 | INFO | train_inner | epoch 002: 10 / 3002 loss=2.905, ppl=7.49, wps=5978.8, ups=0.09, wpb=64763, bsz=128, num_updates=2989, lr=9.99841e-05, gnorm=2.263, loss_scale=1, train_wall=10, gb_free=2.8, wall=34407
2021-06-19 04:12:24 | INFO | train_inner | epoch 002: 11 / 3002 loss=2.929, ppl=7.62, wps=5796.3, ups=0.09, wpb=64776, bsz=128, num_updates=2990, lr=9.99841e-05, gnorm=2.487, loss_scale=1, train_wall=11, gb_free=2.8, wall=34418
2021-06-19 04:12:35 | INFO | train_inner | epoch 002: 12 / 3002 loss=2.876, ppl=7.34, wps=5848.8, ups=0.09, wpb=64880, bsz=128, num_updates=2991, lr=9.99841e-05, gnorm=2.283, loss_scale=1, train_wall=11, gb_free=2.8, wall=34429
2021-06-19 04:12:46 | INFO | train_inner | epoch 002: 13 / 3002 loss=2.759, ppl=6.77, wps=6036.2, ups=0.09, wpb=64829, bsz=128, num_updates=2992, lr=9.99841e-05, gnorm=2.194, loss_scale=1, train_wall=10, gb_free=2.8, wall=34440
2021-06-19 04:12:57 | INFO | train_inner | epoch 002: 14 / 3002 loss=2.946, ppl=7.71, wps=5802.9, ups=0.09, wpb=64808, bsz=128, num_updates=2993, lr=9.99841e-05, gnorm=7.363, loss_scale=1, train_wall=11, gb_free=2.8, wall=34451
2021-06-19 04:13:08 | INFO | train_inner | epoch 002: 15 / 3002 loss=2.838, ppl=7.15, wps=5908.3, ups=0.09, wpb=64788, bsz=128, num_updates=2994, lr=9.9984e-05, gnorm=2.432, loss_scale=1, train_wall=11, gb_free=2.8, wall=34462
2021-06-19 04:13:19 | INFO | train_inner | epoch 002: 16 / 3002 loss=2.784, ppl=6.89, wps=5916, ups=0.09, wpb=64840, bsz=128, num_updates=2995, lr=9.9984e-05, gnorm=2.535, loss_scale=1, train_wall=11, gb_free=2.8, wall=34473
2021-06-19 04:13:30 | INFO | train_inner | epoch 002: 17 / 3002 loss=2.77, ppl=6.82, wps=5874.5, ups=0.09, wpb=64828, bsz=128, num_updates=2996, lr=9.9984e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=34484
2021-06-19 04:13:41 | INFO | train_inner | epoch 002: 18 / 3002 loss=3.046, ppl=8.26, wps=5787.6, ups=0.09, wpb=64788, bsz=128, num_updates=2997, lr=9.9984e-05, gnorm=2.402, loss_scale=1, train_wall=11, gb_free=2.8, wall=34495
2021-06-19 04:13:52 | INFO | train_inner | epoch 002: 19 / 3002 loss=2.898, ppl=7.46, wps=5709.7, ups=0.09, wpb=64742, bsz=128, num_updates=2998, lr=9.9984e-05, gnorm=2.434, loss_scale=1, train_wall=11, gb_free=2.8, wall=34507
2021-06-19 04:14:03 | INFO | train_inner | epoch 002: 20 / 3002 loss=3.067, ppl=8.38, wps=5869.2, ups=0.09, wpb=64851, bsz=128, num_updates=2999, lr=9.9984e-05, gnorm=2.886, loss_scale=1, train_wall=11, gb_free=2.8, wall=34518
2021-06-19 04:14:14 | INFO | train_inner | epoch 002: 21 / 3002 loss=2.846, ppl=7.19, wps=5803.9, ups=0.09, wpb=64800, bsz=128, num_updates=3000, lr=9.9984e-05, gnorm=2.3, loss_scale=1, train_wall=11, gb_free=2.8, wall=34529
2021-06-19 04:14:26 | INFO | train_inner | epoch 002: 22 / 3002 loss=2.83, ppl=7.11, wps=5757.6, ups=0.09, wpb=64869, bsz=128, num_updates=3001, lr=9.9984e-05, gnorm=32.828, loss_scale=1, train_wall=11, gb_free=2.8, wall=34540
2021-06-19 04:14:37 | INFO | train_inner | epoch 002: 23 / 3002 loss=2.892, ppl=7.42, wps=5818.4, ups=0.09, wpb=64760, bsz=128, num_updates=3002, lr=9.9984e-05, gnorm=2.287, loss_scale=1, train_wall=11, gb_free=2.8, wall=34551
2021-06-19 04:14:48 | INFO | train_inner | epoch 002: 24 / 3002 loss=2.897, ppl=7.45, wps=5864.3, ups=0.09, wpb=64847, bsz=128, num_updates=3003, lr=9.9984e-05, gnorm=4.044, loss_scale=1, train_wall=11, gb_free=2.8, wall=34562
2021-06-19 04:14:59 | INFO | train_inner | epoch 002: 25 / 3002 loss=2.962, ppl=7.79, wps=5861.1, ups=0.09, wpb=64792, bsz=128, num_updates=3004, lr=9.9984e-05, gnorm=4.734, loss_scale=1, train_wall=11, gb_free=2.8, wall=34573
2021-06-19 04:15:10 | INFO | train_inner | epoch 002: 26 / 3002 loss=2.901, ppl=7.47, wps=5859.2, ups=0.09, wpb=64804, bsz=128, num_updates=3005, lr=9.9984e-05, gnorm=2.297, loss_scale=1, train_wall=11, gb_free=2.8, wall=34584
2021-06-19 04:15:21 | INFO | train_inner | epoch 002: 27 / 3002 loss=2.915, ppl=7.54, wps=5789.7, ups=0.09, wpb=64828, bsz=128, num_updates=3006, lr=9.9984e-05, gnorm=2.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=34596
2021-06-19 04:15:32 | INFO | train_inner | epoch 002: 28 / 3002 loss=3.03, ppl=8.17, wps=5823.7, ups=0.09, wpb=64750, bsz=128, num_updates=3007, lr=9.99839e-05, gnorm=2.33, loss_scale=1, train_wall=11, gb_free=2.8, wall=34607
2021-06-19 04:15:43 | INFO | train_inner | epoch 002: 29 / 3002 loss=2.887, ppl=7.4, wps=5911.2, ups=0.09, wpb=64806, bsz=128, num_updates=3008, lr=9.99839e-05, gnorm=2.325, loss_scale=1, train_wall=11, gb_free=2.8, wall=34618
2021-06-19 04:15:55 | INFO | train_inner | epoch 002: 30 / 3002 loss=2.975, ppl=7.86, wps=5789.1, ups=0.09, wpb=64753, bsz=128, num_updates=3009, lr=9.99839e-05, gnorm=2.356, loss_scale=1, train_wall=11, gb_free=2.8, wall=34629
2021-06-19 04:16:06 | INFO | train_inner | epoch 002: 31 / 3002 loss=2.812, ppl=7.02, wps=5857.4, ups=0.09, wpb=64810, bsz=128, num_updates=3010, lr=9.99839e-05, gnorm=2.481, loss_scale=1, train_wall=11, gb_free=2.8, wall=34640
2021-06-19 04:16:17 | INFO | train_inner | epoch 002: 32 / 3002 loss=2.901, ppl=7.47, wps=5828.2, ups=0.09, wpb=64815, bsz=128, num_updates=3011, lr=9.99839e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=34651
2021-06-19 04:16:28 | INFO | train_inner | epoch 002: 33 / 3002 loss=2.885, ppl=7.39, wps=5887.3, ups=0.09, wpb=64822, bsz=128, num_updates=3012, lr=9.99839e-05, gnorm=2.508, loss_scale=1, train_wall=11, gb_free=2.8, wall=34662
2021-06-19 04:16:39 | INFO | train_inner | epoch 002: 34 / 3002 loss=2.938, ppl=7.66, wps=5900.6, ups=0.09, wpb=64917, bsz=128, num_updates=3013, lr=9.99839e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=34673
2021-06-19 04:16:50 | INFO | train_inner | epoch 002: 35 / 3002 loss=2.894, ppl=7.44, wps=5849.6, ups=0.09, wpb=64767, bsz=128, num_updates=3014, lr=9.99839e-05, gnorm=3.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=34684
2021-06-19 04:17:01 | INFO | train_inner | epoch 002: 36 / 3002 loss=2.933, ppl=7.64, wps=5695.8, ups=0.09, wpb=64744, bsz=128, num_updates=3015, lr=9.99839e-05, gnorm=2.459, loss_scale=1, train_wall=11, gb_free=2.8, wall=34696
2021-06-19 04:17:12 | INFO | train_inner | epoch 002: 37 / 3002 loss=2.918, ppl=7.56, wps=5783.9, ups=0.09, wpb=64744, bsz=128, num_updates=3016, lr=9.99839e-05, gnorm=2.448, loss_scale=1, train_wall=11, gb_free=2.8, wall=34707
2021-06-19 04:17:24 | INFO | train_inner | epoch 002: 38 / 3002 loss=2.896, ppl=7.44, wps=5809.3, ups=0.09, wpb=64851, bsz=128, num_updates=3017, lr=9.99839e-05, gnorm=2.537, loss_scale=1, train_wall=11, gb_free=2.8, wall=34718
2021-06-19 04:17:35 | INFO | train_inner | epoch 002: 39 / 3002 loss=2.852, ppl=7.22, wps=5836.9, ups=0.09, wpb=64783, bsz=128, num_updates=3018, lr=9.99839e-05, gnorm=2.313, loss_scale=1, train_wall=11, gb_free=2.8, wall=34729
2021-06-19 04:17:46 | INFO | train_inner | epoch 002: 40 / 3002 loss=2.931, ppl=7.63, wps=5899.1, ups=0.09, wpb=64836, bsz=128, num_updates=3019, lr=9.99838e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=34740
2021-06-19 04:17:57 | INFO | train_inner | epoch 002: 41 / 3002 loss=2.902, ppl=7.48, wps=5879.1, ups=0.09, wpb=64854, bsz=128, num_updates=3020, lr=9.99838e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=34751
2021-06-19 04:18:08 | INFO | train_inner | epoch 002: 42 / 3002 loss=2.725, ppl=6.61, wps=5834.3, ups=0.09, wpb=64857, bsz=128, num_updates=3021, lr=9.99838e-05, gnorm=2.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=34762
2021-06-19 04:18:19 | INFO | train_inner | epoch 002: 43 / 3002 loss=2.984, ppl=7.91, wps=5947.6, ups=0.09, wpb=64827, bsz=128, num_updates=3022, lr=9.99838e-05, gnorm=2.425, loss_scale=1, train_wall=10, gb_free=2.8, wall=34773
2021-06-19 04:18:30 | INFO | train_inner | epoch 002: 44 / 3002 loss=3.075, ppl=8.43, wps=5823.8, ups=0.09, wpb=64785, bsz=128, num_updates=3023, lr=9.99838e-05, gnorm=2.49, loss_scale=1, train_wall=11, gb_free=2.8, wall=34784
2021-06-19 04:18:41 | INFO | train_inner | epoch 002: 45 / 3002 loss=2.653, ppl=6.29, wps=5908.6, ups=0.09, wpb=64889, bsz=128, num_updates=3024, lr=9.99838e-05, gnorm=2.271, loss_scale=1, train_wall=11, gb_free=2.8, wall=34795
2021-06-19 04:18:52 | INFO | train_inner | epoch 002: 46 / 3002 loss=2.902, ppl=7.47, wps=5815, ups=0.09, wpb=64841, bsz=128, num_updates=3025, lr=9.99838e-05, gnorm=2.697, loss_scale=1, train_wall=11, gb_free=2.8, wall=34806
2021-06-19 04:19:03 | INFO | train_inner | epoch 002: 47 / 3002 loss=2.884, ppl=7.38, wps=5796.6, ups=0.09, wpb=64800, bsz=128, num_updates=3026, lr=9.99838e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=34817
2021-06-19 04:19:14 | INFO | train_inner | epoch 002: 48 / 3002 loss=2.963, ppl=7.79, wps=5777.6, ups=0.09, wpb=64762, bsz=128, num_updates=3027, lr=9.99838e-05, gnorm=2.415, loss_scale=1, train_wall=11, gb_free=2.8, wall=34829
2021-06-19 04:19:26 | INFO | train_inner | epoch 002: 49 / 3002 loss=2.877, ppl=7.35, wps=5743, ups=0.09, wpb=64780, bsz=128, num_updates=3028, lr=9.99838e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=34840
2021-06-19 04:19:37 | INFO | train_inner | epoch 002: 50 / 3002 loss=2.845, ppl=7.19, wps=5847.3, ups=0.09, wpb=64822, bsz=128, num_updates=3029, lr=9.99838e-05, gnorm=2.356, loss_scale=1, train_wall=11, gb_free=2.8, wall=34851
2021-06-19 04:19:48 | INFO | train_inner | epoch 002: 51 / 3002 loss=2.971, ppl=7.84, wps=5972.2, ups=0.09, wpb=64844, bsz=128, num_updates=3030, lr=9.99838e-05, gnorm=2.366, loss_scale=1, train_wall=10, gb_free=2.8, wall=34862
2021-06-19 04:19:59 | INFO | train_inner | epoch 002: 52 / 3002 loss=2.732, ppl=6.64, wps=5732.3, ups=0.09, wpb=64823, bsz=128, num_updates=3031, lr=9.99838e-05, gnorm=2.368, loss_scale=1, train_wall=11, gb_free=2.8, wall=34873
2021-06-19 04:20:10 | INFO | train_inner | epoch 002: 53 / 3002 loss=2.791, ppl=6.92, wps=5869.4, ups=0.09, wpb=64823, bsz=128, num_updates=3032, lr=9.99837e-05, gnorm=2.431, loss_scale=1, train_wall=11, gb_free=2.8, wall=34884
2021-06-19 04:20:21 | INFO | train_inner | epoch 002: 54 / 3002 loss=2.836, ppl=7.14, wps=5992.6, ups=0.09, wpb=64839, bsz=128, num_updates=3033, lr=9.99837e-05, gnorm=2.355, loss_scale=1, train_wall=10, gb_free=2.8, wall=34895
2021-06-19 04:20:32 | INFO | train_inner | epoch 002: 55 / 3002 loss=2.993, ppl=7.96, wps=5830.3, ups=0.09, wpb=64845, bsz=128, num_updates=3034, lr=9.99837e-05, gnorm=2.448, loss_scale=1, train_wall=11, gb_free=2.8, wall=34906
2021-06-19 04:20:43 | INFO | train_inner | epoch 002: 56 / 3002 loss=2.694, ppl=6.47, wps=5901.9, ups=0.09, wpb=64885, bsz=128, num_updates=3035, lr=9.99837e-05, gnorm=2.324, loss_scale=1, train_wall=11, gb_free=2.8, wall=34917
2021-06-19 04:20:54 | INFO | train_inner | epoch 002: 57 / 3002 loss=2.998, ppl=7.99, wps=5861.6, ups=0.09, wpb=64872, bsz=128, num_updates=3036, lr=9.99837e-05, gnorm=2.514, loss_scale=1, train_wall=11, gb_free=2.8, wall=34928
2021-06-19 04:21:05 | INFO | train_inner | epoch 002: 58 / 3002 loss=2.893, ppl=7.43, wps=5799.9, ups=0.09, wpb=64827, bsz=128, num_updates=3037, lr=9.99837e-05, gnorm=2.374, loss_scale=1, train_wall=11, gb_free=2.8, wall=34939
2021-06-19 04:21:16 | INFO | train_inner | epoch 002: 59 / 3002 loss=2.894, ppl=7.44, wps=5916.1, ups=0.09, wpb=64880, bsz=128, num_updates=3038, lr=9.99837e-05, gnorm=2.426, loss_scale=1, train_wall=11, gb_free=2.8, wall=34950
2021-06-19 04:21:27 | INFO | train_inner | epoch 002: 60 / 3002 loss=2.891, ppl=7.42, wps=5771.6, ups=0.09, wpb=64805, bsz=128, num_updates=3039, lr=9.99837e-05, gnorm=5.77, loss_scale=1, train_wall=11, gb_free=2.8, wall=34962
2021-06-19 04:21:39 | INFO | train_inner | epoch 002: 61 / 3002 loss=2.796, ppl=6.95, wps=5711.5, ups=0.09, wpb=64788, bsz=128, num_updates=3040, lr=9.99837e-05, gnorm=2.794, loss_scale=1, train_wall=11, gb_free=2.8, wall=34973
2021-06-19 04:21:50 | INFO | train_inner | epoch 002: 62 / 3002 loss=2.837, ppl=7.14, wps=5877.8, ups=0.09, wpb=64909, bsz=128, num_updates=3041, lr=9.99837e-05, gnorm=2.339, loss_scale=1, train_wall=11, gb_free=2.8, wall=34984
2021-06-19 04:22:01 | INFO | train_inner | epoch 002: 63 / 3002 loss=2.846, ppl=7.19, wps=5895.1, ups=0.09, wpb=64803, bsz=128, num_updates=3042, lr=9.99837e-05, gnorm=2.389, loss_scale=1, train_wall=11, gb_free=2.8, wall=34995
2021-06-19 04:22:12 | INFO | train_inner | epoch 002: 64 / 3002 loss=2.945, ppl=7.7, wps=5789, ups=0.09, wpb=64783, bsz=128, num_updates=3043, lr=9.99837e-05, gnorm=3.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=35006
2021-06-19 04:22:23 | INFO | train_inner | epoch 002: 65 / 3002 loss=2.871, ppl=7.32, wps=5769.2, ups=0.09, wpb=64703, bsz=128, num_updates=3044, lr=9.99836e-05, gnorm=2.356, loss_scale=2, train_wall=11, gb_free=2.8, wall=35017
2021-06-19 04:22:34 | INFO | train_inner | epoch 002: 66 / 3002 loss=2.829, ppl=7.11, wps=5842.8, ups=0.09, wpb=64804, bsz=128, num_updates=3045, lr=9.99836e-05, gnorm=3.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=35028
2021-06-19 04:22:45 | INFO | train_inner | epoch 002: 67 / 3002 loss=2.825, ppl=7.09, wps=5887.7, ups=0.09, wpb=64792, bsz=128, num_updates=3046, lr=9.99836e-05, gnorm=2.379, loss_scale=2, train_wall=11, gb_free=2.8, wall=35039
2021-06-19 04:22:56 | INFO | train_inner | epoch 002: 68 / 3002 loss=2.964, ppl=7.8, wps=5955.7, ups=0.09, wpb=64749, bsz=128, num_updates=3047, lr=9.99836e-05, gnorm=2.602, loss_scale=2, train_wall=10, gb_free=2.8, wall=35050
2021-06-19 04:23:07 | INFO | train_inner | epoch 002: 69 / 3002 loss=2.925, ppl=7.59, wps=5856.6, ups=0.09, wpb=64819, bsz=128, num_updates=3048, lr=9.99836e-05, gnorm=5.751, loss_scale=2, train_wall=11, gb_free=2.8, wall=35061
2021-06-19 04:23:18 | INFO | train_inner | epoch 002: 70 / 3002 loss=3.018, ppl=8.1, wps=5762.9, ups=0.09, wpb=64788, bsz=128, num_updates=3049, lr=9.99836e-05, gnorm=2.759, loss_scale=2, train_wall=11, gb_free=2.8, wall=35073
2021-06-19 04:23:29 | INFO | train_inner | epoch 002: 71 / 3002 loss=2.928, ppl=7.61, wps=5882.5, ups=0.09, wpb=64822, bsz=128, num_updates=3050, lr=9.99836e-05, gnorm=2.616, loss_scale=2, train_wall=11, gb_free=2.8, wall=35084
2021-06-19 04:23:40 | INFO | train_inner | epoch 002: 72 / 3002 loss=2.883, ppl=7.38, wps=5939.2, ups=0.09, wpb=64876, bsz=128, num_updates=3051, lr=9.99836e-05, gnorm=4.896, loss_scale=2, train_wall=10, gb_free=2.8, wall=35095
2021-06-19 04:23:52 | INFO | train_inner | epoch 002: 73 / 3002 loss=2.696, ppl=6.48, wps=5752.3, ups=0.09, wpb=64814, bsz=128, num_updates=3052, lr=9.99836e-05, gnorm=2.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=35106
2021-06-19 04:24:03 | INFO | train_inner | epoch 002: 74 / 3002 loss=2.884, ppl=7.38, wps=5804.6, ups=0.09, wpb=64821, bsz=128, num_updates=3053, lr=9.99836e-05, gnorm=6.645, loss_scale=2, train_wall=11, gb_free=2.8, wall=35117
2021-06-19 04:24:14 | INFO | train_inner | epoch 002: 75 / 3002 loss=2.726, ppl=6.62, wps=5825.6, ups=0.09, wpb=64924, bsz=128, num_updates=3054, lr=9.99836e-05, gnorm=2.44, loss_scale=2, train_wall=11, gb_free=2.8, wall=35128
2021-06-19 04:24:25 | INFO | train_inner | epoch 002: 76 / 3002 loss=3.016, ppl=8.09, wps=5951.8, ups=0.09, wpb=64816, bsz=128, num_updates=3055, lr=9.99836e-05, gnorm=2.549, loss_scale=2, train_wall=10, gb_free=2.8, wall=35139
2021-06-19 04:24:36 | INFO | train_inner | epoch 002: 77 / 3002 loss=2.964, ppl=7.8, wps=5903, ups=0.09, wpb=64850, bsz=128, num_updates=3056, lr=9.99836e-05, gnorm=2.657, loss_scale=2, train_wall=11, gb_free=2.8, wall=35150
2021-06-19 04:24:47 | INFO | train_inner | epoch 002: 78 / 3002 loss=2.891, ppl=7.42, wps=5831.5, ups=0.09, wpb=64829, bsz=128, num_updates=3057, lr=9.99835e-05, gnorm=2.482, loss_scale=2, train_wall=11, gb_free=2.8, wall=35161
2021-06-19 04:24:58 | INFO | train_inner | epoch 002: 79 / 3002 loss=2.819, ppl=7.06, wps=5805, ups=0.09, wpb=64855, bsz=128, num_updates=3058, lr=9.99835e-05, gnorm=2.457, loss_scale=2, train_wall=11, gb_free=2.8, wall=35172
2021-06-19 04:25:09 | INFO | train_inner | epoch 002: 80 / 3002 loss=2.873, ppl=7.32, wps=5899.7, ups=0.09, wpb=64835, bsz=128, num_updates=3059, lr=9.99835e-05, gnorm=2.746, loss_scale=2, train_wall=11, gb_free=2.8, wall=35183
2021-06-19 04:25:20 | INFO | train_inner | epoch 002: 81 / 3002 loss=2.79, ppl=6.91, wps=5916.4, ups=0.09, wpb=64825, bsz=128, num_updates=3060, lr=9.99835e-05, gnorm=2.541, loss_scale=2, train_wall=10, gb_free=2.8, wall=35194
2021-06-19 04:25:31 | INFO | train_inner | epoch 002: 82 / 3002 loss=2.872, ppl=7.32, wps=5959.6, ups=0.09, wpb=64894, bsz=128, num_updates=3061, lr=9.99835e-05, gnorm=2.623, loss_scale=2, train_wall=10, gb_free=2.8, wall=35205
2021-06-19 04:25:42 | INFO | train_inner | epoch 002: 83 / 3002 loss=2.834, ppl=7.13, wps=5866.8, ups=0.09, wpb=64848, bsz=128, num_updates=3062, lr=9.99835e-05, gnorm=2.704, loss_scale=2, train_wall=11, gb_free=2.8, wall=35216
2021-06-19 04:25:53 | INFO | train_inner | epoch 002: 84 / 3002 loss=2.75, ppl=6.73, wps=5820.6, ups=0.09, wpb=64784, bsz=128, num_updates=3063, lr=9.99835e-05, gnorm=2.419, loss_scale=2, train_wall=11, gb_free=2.8, wall=35227
2021-06-19 04:26:04 | INFO | train_inner | epoch 002: 85 / 3002 loss=2.86, ppl=7.26, wps=5776.8, ups=0.09, wpb=64818, bsz=128, num_updates=3064, lr=9.99835e-05, gnorm=2.323, loss_scale=2, train_wall=11, gb_free=2.8, wall=35239
2021-06-19 04:26:15 | INFO | train_inner | epoch 002: 86 / 3002 loss=2.72, ppl=6.59, wps=5895.5, ups=0.09, wpb=64901, bsz=128, num_updates=3065, lr=9.99835e-05, gnorm=2.484, loss_scale=2, train_wall=11, gb_free=2.8, wall=35250
2021-06-19 04:26:26 | INFO | train_inner | epoch 002: 87 / 3002 loss=2.996, ppl=7.98, wps=5839.5, ups=0.09, wpb=64836, bsz=128, num_updates=3066, lr=9.99835e-05, gnorm=2.548, loss_scale=2, train_wall=11, gb_free=2.8, wall=35261
2021-06-19 04:26:38 | INFO | train_inner | epoch 002: 88 / 3002 loss=2.862, ppl=7.27, wps=5821, ups=0.09, wpb=64806, bsz=128, num_updates=3067, lr=9.99835e-05, gnorm=2.428, loss_scale=2, train_wall=11, gb_free=2.8, wall=35272
2021-06-19 04:26:48 | INFO | train_inner | epoch 002: 89 / 3002 loss=2.808, ppl=7, wps=5948.9, ups=0.09, wpb=64821, bsz=128, num_updates=3068, lr=9.99835e-05, gnorm=2.386, loss_scale=2, train_wall=10, gb_free=2.8, wall=35283
2021-06-19 04:26:59 | INFO | train_inner | epoch 002: 90 / 3002 loss=2.842, ppl=7.17, wps=5867.3, ups=0.09, wpb=64822, bsz=128, num_updates=3069, lr=9.99834e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=35294
2021-06-19 04:27:10 | INFO | train_inner | epoch 002: 91 / 3002 loss=3.049, ppl=8.28, wps=5922, ups=0.09, wpb=64831, bsz=128, num_updates=3070, lr=9.99834e-05, gnorm=2.444, loss_scale=2, train_wall=10, gb_free=2.8, wall=35305
2021-06-19 04:27:21 | INFO | train_inner | epoch 002: 92 / 3002 loss=2.821, ppl=7.06, wps=5924.3, ups=0.09, wpb=64851, bsz=128, num_updates=3071, lr=9.99834e-05, gnorm=2.288, loss_scale=2, train_wall=10, gb_free=2.8, wall=35316
2021-06-19 04:27:32 | INFO | train_inner | epoch 002: 93 / 3002 loss=2.872, ppl=7.32, wps=5881.8, ups=0.09, wpb=64823, bsz=128, num_updates=3072, lr=9.99834e-05, gnorm=2.34, loss_scale=2, train_wall=11, gb_free=2.8, wall=35327
2021-06-19 04:27:43 | INFO | train_inner | epoch 002: 94 / 3002 loss=2.933, ppl=7.64, wps=5879.7, ups=0.09, wpb=64853, bsz=128, num_updates=3073, lr=9.99834e-05, gnorm=2.302, loss_scale=2, train_wall=11, gb_free=2.8, wall=35338
2021-06-19 04:27:54 | INFO | train_inner | epoch 002: 95 / 3002 loss=2.822, ppl=7.07, wps=5896.2, ups=0.09, wpb=64892, bsz=128, num_updates=3074, lr=9.99834e-05, gnorm=5.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=35349
2021-06-19 04:28:06 | INFO | train_inner | epoch 002: 96 / 3002 loss=2.836, ppl=7.14, wps=5833, ups=0.09, wpb=64832, bsz=128, num_updates=3075, lr=9.99834e-05, gnorm=2.35, loss_scale=2, train_wall=11, gb_free=2.8, wall=35360
2021-06-19 04:28:16 | INFO | train_inner | epoch 002: 97 / 3002 loss=2.845, ppl=7.19, wps=6038.4, ups=0.09, wpb=64836, bsz=128, num_updates=3076, lr=9.99834e-05, gnorm=6.876, loss_scale=2, train_wall=10, gb_free=2.8, wall=35371
2021-06-19 04:28:27 | INFO | train_inner | epoch 002: 98 / 3002 loss=2.816, ppl=7.04, wps=5963.8, ups=0.09, wpb=64876, bsz=128, num_updates=3077, lr=9.99834e-05, gnorm=2.397, loss_scale=2, train_wall=10, gb_free=2.8, wall=35381
2021-06-19 04:28:38 | INFO | train_inner | epoch 002: 99 / 3002 loss=2.885, ppl=7.39, wps=5736, ups=0.09, wpb=64769, bsz=128, num_updates=3078, lr=9.99834e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=35393
2021-06-19 04:28:50 | INFO | train_inner | epoch 002: 100 / 3002 loss=3.075, ppl=8.43, wps=5843.9, ups=0.09, wpb=64795, bsz=128, num_updates=3079, lr=9.99834e-05, gnorm=2.499, loss_scale=2, train_wall=11, gb_free=2.8, wall=35404
2021-06-19 04:29:00 | INFO | train_inner | epoch 002: 101 / 3002 loss=2.93, ppl=7.62, wps=5966.9, ups=0.09, wpb=64787, bsz=128, num_updates=3080, lr=9.99834e-05, gnorm=2.469, loss_scale=2, train_wall=10, gb_free=2.8, wall=35415
2021-06-19 04:29:12 | INFO | train_inner | epoch 002: 102 / 3002 loss=2.745, ppl=6.7, wps=5772.6, ups=0.09, wpb=64801, bsz=128, num_updates=3081, lr=9.99834e-05, gnorm=2.432, loss_scale=2, train_wall=11, gb_free=2.8, wall=35426
2021-06-19 04:29:23 | INFO | train_inner | epoch 002: 103 / 3002 loss=2.877, ppl=7.35, wps=5784, ups=0.09, wpb=64887, bsz=128, num_updates=3082, lr=9.99833e-05, gnorm=2.481, loss_scale=2, train_wall=11, gb_free=2.8, wall=35437
2021-06-19 04:29:34 | INFO | train_inner | epoch 002: 104 / 3002 loss=2.792, ppl=6.92, wps=5788.6, ups=0.09, wpb=64851, bsz=128, num_updates=3083, lr=9.99833e-05, gnorm=2.329, loss_scale=2, train_wall=11, gb_free=2.8, wall=35448
2021-06-19 04:29:45 | INFO | train_inner | epoch 002: 105 / 3002 loss=2.889, ppl=7.41, wps=5817.2, ups=0.09, wpb=64793, bsz=128, num_updates=3084, lr=9.99833e-05, gnorm=2.307, loss_scale=2, train_wall=11, gb_free=2.8, wall=35459
2021-06-19 04:29:56 | INFO | train_inner | epoch 002: 106 / 3002 loss=2.655, ppl=6.3, wps=5800.8, ups=0.09, wpb=64913, bsz=128, num_updates=3085, lr=9.99833e-05, gnorm=2.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=35471
2021-06-19 04:30:07 | INFO | train_inner | epoch 002: 107 / 3002 loss=2.902, ppl=7.47, wps=5903.6, ups=0.09, wpb=64803, bsz=128, num_updates=3086, lr=9.99833e-05, gnorm=2.557, loss_scale=2, train_wall=11, gb_free=2.8, wall=35482
2021-06-19 04:30:18 | INFO | train_inner | epoch 002: 108 / 3002 loss=2.768, ppl=6.81, wps=5927, ups=0.09, wpb=64847, bsz=128, num_updates=3087, lr=9.99833e-05, gnorm=2.379, loss_scale=2, train_wall=10, gb_free=2.8, wall=35493
2021-06-19 04:30:29 | INFO | train_inner | epoch 002: 109 / 3002 loss=2.814, ppl=7.03, wps=5840.3, ups=0.09, wpb=64818, bsz=128, num_updates=3088, lr=9.99833e-05, gnorm=2.298, loss_scale=2, train_wall=11, gb_free=2.8, wall=35504
2021-06-19 04:30:40 | INFO | train_inner | epoch 002: 110 / 3002 loss=2.856, ppl=7.24, wps=5886.6, ups=0.09, wpb=64823, bsz=128, num_updates=3089, lr=9.99833e-05, gnorm=2.358, loss_scale=2, train_wall=11, gb_free=2.8, wall=35515
2021-06-19 04:30:51 | INFO | train_inner | epoch 002: 111 / 3002 loss=2.936, ppl=7.65, wps=5828.8, ups=0.09, wpb=64776, bsz=128, num_updates=3090, lr=9.99833e-05, gnorm=2.328, loss_scale=2, train_wall=11, gb_free=2.8, wall=35526
2021-06-19 04:31:03 | INFO | train_inner | epoch 002: 112 / 3002 loss=3.037, ppl=8.21, wps=5855.8, ups=0.09, wpb=64797, bsz=128, num_updates=3091, lr=9.99833e-05, gnorm=2.365, loss_scale=2, train_wall=11, gb_free=2.8, wall=35537
2021-06-19 04:31:14 | INFO | train_inner | epoch 002: 113 / 3002 loss=2.786, ppl=6.9, wps=5880.8, ups=0.09, wpb=64794, bsz=128, num_updates=3092, lr=9.99833e-05, gnorm=17.447, loss_scale=2, train_wall=11, gb_free=2.8, wall=35548
2021-06-19 04:31:25 | INFO | train_inner | epoch 002: 114 / 3002 loss=2.833, ppl=7.12, wps=5767.7, ups=0.09, wpb=64716, bsz=128, num_updates=3093, lr=9.99833e-05, gnorm=2.661, loss_scale=2, train_wall=11, gb_free=2.8, wall=35559
2021-06-19 04:31:36 | INFO | train_inner | epoch 002: 115 / 3002 loss=2.578, ppl=5.97, wps=5923.2, ups=0.09, wpb=64924, bsz=128, num_updates=3094, lr=9.99832e-05, gnorm=2.37, loss_scale=2, train_wall=11, gb_free=2.8, wall=35570
2021-06-19 04:31:47 | INFO | train_inner | epoch 002: 116 / 3002 loss=2.713, ppl=6.56, wps=5896.7, ups=0.09, wpb=64815, bsz=128, num_updates=3095, lr=9.99832e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=35581
2021-06-19 04:31:58 | INFO | train_inner | epoch 002: 117 / 3002 loss=2.67, ppl=6.36, wps=5857.8, ups=0.09, wpb=64795, bsz=128, num_updates=3096, lr=9.99832e-05, gnorm=2.499, loss_scale=2, train_wall=11, gb_free=2.8, wall=35592
2021-06-19 04:32:09 | INFO | train_inner | epoch 002: 118 / 3002 loss=2.793, ppl=6.93, wps=5940.7, ups=0.09, wpb=64805, bsz=128, num_updates=3097, lr=9.99832e-05, gnorm=2.445, loss_scale=2, train_wall=10, gb_free=2.8, wall=35603
2021-06-19 04:32:20 | INFO | train_inner | epoch 002: 119 / 3002 loss=2.843, ppl=7.17, wps=5903.3, ups=0.09, wpb=64864, bsz=128, num_updates=3098, lr=9.99832e-05, gnorm=2.438, loss_scale=2, train_wall=11, gb_free=2.8, wall=35614
2021-06-19 04:32:31 | INFO | train_inner | epoch 002: 120 / 3002 loss=2.853, ppl=7.22, wps=5814.8, ups=0.09, wpb=64752, bsz=128, num_updates=3099, lr=9.99832e-05, gnorm=2.314, loss_scale=2, train_wall=11, gb_free=2.8, wall=35625
2021-06-19 04:32:42 | INFO | train_inner | epoch 002: 121 / 3002 loss=2.656, ppl=6.3, wps=5815.4, ups=0.09, wpb=64821, bsz=128, num_updates=3100, lr=9.99832e-05, gnorm=2.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=35636
2021-06-19 04:32:53 | INFO | train_inner | epoch 002: 122 / 3002 loss=2.982, ppl=7.9, wps=5903.2, ups=0.09, wpb=64841, bsz=128, num_updates=3101, lr=9.99832e-05, gnorm=2.372, loss_scale=2, train_wall=11, gb_free=2.8, wall=35647
2021-06-19 04:33:04 | INFO | train_inner | epoch 002: 123 / 3002 loss=2.796, ppl=6.95, wps=5819.6, ups=0.09, wpb=64837, bsz=128, num_updates=3102, lr=9.99832e-05, gnorm=2.33, loss_scale=2, train_wall=11, gb_free=2.8, wall=35658
2021-06-19 04:33:15 | INFO | train_inner | epoch 002: 124 / 3002 loss=2.697, ppl=6.48, wps=5926.3, ups=0.09, wpb=64840, bsz=128, num_updates=3103, lr=9.99832e-05, gnorm=3.092, loss_scale=2, train_wall=10, gb_free=2.8, wall=35669
2021-06-19 04:33:26 | INFO | train_inner | epoch 002: 125 / 3002 loss=2.872, ppl=7.32, wps=5727.7, ups=0.09, wpb=64832, bsz=128, num_updates=3104, lr=9.99832e-05, gnorm=2.306, loss_scale=2, train_wall=11, gb_free=2.8, wall=35681
2021-06-19 04:33:38 | INFO | train_inner | epoch 002: 126 / 3002 loss=2.888, ppl=7.4, wps=5756.9, ups=0.09, wpb=64798, bsz=128, num_updates=3105, lr=9.99832e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=35692
2021-06-19 04:33:49 | INFO | train_inner | epoch 002: 127 / 3002 loss=2.859, ppl=7.26, wps=5812.2, ups=0.09, wpb=64806, bsz=128, num_updates=3106, lr=9.99832e-05, gnorm=2.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=35703
2021-06-19 04:34:00 | INFO | train_inner | epoch 002: 128 / 3002 loss=2.809, ppl=7.01, wps=5918, ups=0.09, wpb=64835, bsz=128, num_updates=3107, lr=9.99831e-05, gnorm=3.282, loss_scale=2, train_wall=10, gb_free=2.8, wall=35714
2021-06-19 04:34:11 | INFO | train_inner | epoch 002: 129 / 3002 loss=2.963, ppl=7.8, wps=5847.5, ups=0.09, wpb=64838, bsz=128, num_updates=3108, lr=9.99831e-05, gnorm=8.76, loss_scale=2, train_wall=11, gb_free=2.8, wall=35725
2021-06-19 04:34:22 | INFO | train_inner | epoch 002: 130 / 3002 loss=2.878, ppl=7.35, wps=5862, ups=0.09, wpb=64894, bsz=128, num_updates=3109, lr=9.99831e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=35736
2021-06-19 04:34:33 | INFO | train_inner | epoch 002: 131 / 3002 loss=3.012, ppl=8.07, wps=5868, ups=0.09, wpb=64813, bsz=128, num_updates=3110, lr=9.99831e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=35747
2021-06-19 04:34:44 | INFO | train_inner | epoch 002: 132 / 3002 loss=2.764, ppl=6.79, wps=5814.8, ups=0.09, wpb=64875, bsz=128, num_updates=3111, lr=9.99831e-05, gnorm=2.36, loss_scale=2, train_wall=11, gb_free=2.8, wall=35758
2021-06-19 04:34:55 | INFO | train_inner | epoch 002: 133 / 3002 loss=2.799, ppl=6.96, wps=5766.1, ups=0.09, wpb=64830, bsz=128, num_updates=3112, lr=9.99831e-05, gnorm=2.333, loss_scale=2, train_wall=11, gb_free=2.8, wall=35770
2021-06-19 04:35:06 | INFO | train_inner | epoch 002: 134 / 3002 loss=2.687, ppl=6.44, wps=5837, ups=0.09, wpb=64829, bsz=128, num_updates=3113, lr=9.99831e-05, gnorm=2.516, loss_scale=2, train_wall=11, gb_free=2.8, wall=35781
2021-06-19 04:35:18 | INFO | train_inner | epoch 002: 135 / 3002 loss=2.871, ppl=7.32, wps=5858.1, ups=0.09, wpb=64874, bsz=128, num_updates=3114, lr=9.99831e-05, gnorm=2.413, loss_scale=2, train_wall=11, gb_free=2.8, wall=35792
2021-06-19 04:35:29 | INFO | train_inner | epoch 002: 136 / 3002 loss=2.876, ppl=7.34, wps=5829.9, ups=0.09, wpb=64865, bsz=128, num_updates=3115, lr=9.99831e-05, gnorm=2.474, loss_scale=2, train_wall=11, gb_free=2.8, wall=35803
2021-06-19 04:35:40 | INFO | train_inner | epoch 002: 137 / 3002 loss=2.804, ppl=6.98, wps=5915, ups=0.09, wpb=64790, bsz=128, num_updates=3116, lr=9.99831e-05, gnorm=2.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=35814
2021-06-19 04:35:51 | INFO | train_inner | epoch 002: 138 / 3002 loss=2.966, ppl=7.81, wps=5948, ups=0.09, wpb=64878, bsz=128, num_updates=3117, lr=9.99831e-05, gnorm=2.809, loss_scale=2, train_wall=10, gb_free=2.8, wall=35825
2021-06-19 04:36:01 | INFO | train_inner | epoch 002: 139 / 3002 loss=2.666, ppl=6.35, wps=5985.3, ups=0.09, wpb=64802, bsz=128, num_updates=3118, lr=9.99831e-05, gnorm=4.02, loss_scale=2, train_wall=10, gb_free=2.8, wall=35836
2021-06-19 04:36:13 | INFO | train_inner | epoch 002: 140 / 3002 loss=2.81, ppl=7.01, wps=5776.1, ups=0.09, wpb=64892, bsz=128, num_updates=3119, lr=9.9983e-05, gnorm=2.402, loss_scale=2, train_wall=11, gb_free=2.8, wall=35847
2021-06-19 04:36:23 | INFO | train_inner | epoch 002: 141 / 3002 loss=2.848, ppl=7.2, wps=5943, ups=0.09, wpb=64835, bsz=128, num_updates=3120, lr=9.9983e-05, gnorm=2.442, loss_scale=2, train_wall=10, gb_free=2.8, wall=35858
2021-06-19 04:36:34 | INFO | train_inner | epoch 002: 142 / 3002 loss=2.781, ppl=6.87, wps=5937.4, ups=0.09, wpb=64838, bsz=128, num_updates=3121, lr=9.9983e-05, gnorm=2.861, loss_scale=2, train_wall=10, gb_free=2.8, wall=35869
2021-06-19 04:36:45 | INFO | train_inner | epoch 002: 143 / 3002 loss=2.817, ppl=7.05, wps=5882.4, ups=0.09, wpb=64845, bsz=128, num_updates=3122, lr=9.9983e-05, gnorm=18.499, loss_scale=2, train_wall=11, gb_free=2.8, wall=35880
2021-06-19 04:36:56 | INFO | train_inner | epoch 002: 144 / 3002 loss=2.65, ppl=6.28, wps=5861.5, ups=0.09, wpb=64853, bsz=128, num_updates=3123, lr=9.9983e-05, gnorm=2.316, loss_scale=2, train_wall=11, gb_free=2.8, wall=35891
2021-06-19 04:37:08 | INFO | train_inner | epoch 002: 145 / 3002 loss=2.84, ppl=7.16, wps=5869.5, ups=0.09, wpb=64806, bsz=128, num_updates=3124, lr=9.9983e-05, gnorm=2.436, loss_scale=2, train_wall=11, gb_free=2.8, wall=35902
2021-06-19 04:37:18 | INFO | train_inner | epoch 002: 146 / 3002 loss=2.758, ppl=6.76, wps=5942.9, ups=0.09, wpb=64885, bsz=128, num_updates=3125, lr=9.9983e-05, gnorm=2.435, loss_scale=2, train_wall=10, gb_free=2.8, wall=35913
2021-06-19 04:37:30 | INFO | train_inner | epoch 002: 147 / 3002 loss=2.941, ppl=7.68, wps=5848.3, ups=0.09, wpb=64831, bsz=128, num_updates=3126, lr=9.9983e-05, gnorm=10.465, loss_scale=2, train_wall=11, gb_free=2.8, wall=35924
2021-06-19 04:37:41 | INFO | train_inner | epoch 002: 148 / 3002 loss=2.911, ppl=7.52, wps=5853.3, ups=0.09, wpb=64809, bsz=128, num_updates=3127, lr=9.9983e-05, gnorm=3.173, loss_scale=2, train_wall=11, gb_free=2.8, wall=35935
2021-06-19 04:37:52 | INFO | train_inner | epoch 002: 149 / 3002 loss=2.946, ppl=7.7, wps=5733.3, ups=0.09, wpb=64793, bsz=128, num_updates=3128, lr=9.9983e-05, gnorm=3.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=35946
2021-06-19 04:38:03 | INFO | train_inner | epoch 002: 150 / 3002 loss=2.815, ppl=7.04, wps=5813.3, ups=0.09, wpb=64861, bsz=128, num_updates=3129, lr=9.9983e-05, gnorm=3.696, loss_scale=2, train_wall=11, gb_free=2.8, wall=35957
2021-06-19 04:38:14 | INFO | train_inner | epoch 002: 151 / 3002 loss=2.829, ppl=7.1, wps=5809.6, ups=0.09, wpb=64907, bsz=128, num_updates=3130, lr=9.9983e-05, gnorm=3.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=35969
2021-06-19 04:38:25 | INFO | train_inner | epoch 002: 152 / 3002 loss=2.928, ppl=7.61, wps=5875.9, ups=0.09, wpb=64807, bsz=128, num_updates=3131, lr=9.9983e-05, gnorm=2.945, loss_scale=2, train_wall=11, gb_free=2.8, wall=35980
2021-06-19 04:38:36 | INFO | train_inner | epoch 002: 153 / 3002 loss=2.794, ppl=6.93, wps=5966.3, ups=0.09, wpb=64860, bsz=128, num_updates=3132, lr=9.99829e-05, gnorm=2.924, loss_scale=2, train_wall=10, gb_free=2.8, wall=35990
2021-06-19 04:38:47 | INFO | train_inner | epoch 002: 154 / 3002 loss=2.959, ppl=7.78, wps=5842.4, ups=0.09, wpb=64898, bsz=128, num_updates=3133, lr=9.99829e-05, gnorm=2.844, loss_scale=2, train_wall=11, gb_free=2.8, wall=36002
2021-06-19 04:38:58 | INFO | train_inner | epoch 002: 155 / 3002 loss=2.916, ppl=7.55, wps=5842.5, ups=0.09, wpb=64809, bsz=128, num_updates=3134, lr=9.99829e-05, gnorm=2.834, loss_scale=2, train_wall=11, gb_free=2.8, wall=36013
2021-06-19 04:39:09 | INFO | train_inner | epoch 002: 156 / 3002 loss=2.695, ppl=6.48, wps=5855.8, ups=0.09, wpb=64780, bsz=128, num_updates=3135, lr=9.99829e-05, gnorm=2.879, loss_scale=2, train_wall=11, gb_free=2.8, wall=36024
2021-06-19 04:39:21 | INFO | train_inner | epoch 002: 157 / 3002 loss=2.907, ppl=7.5, wps=5760.5, ups=0.09, wpb=64797, bsz=128, num_updates=3136, lr=9.99829e-05, gnorm=2.818, loss_scale=2, train_wall=11, gb_free=2.8, wall=36035
2021-06-19 04:39:32 | INFO | train_inner | epoch 002: 158 / 3002 loss=3.055, ppl=8.31, wps=5810.7, ups=0.09, wpb=64762, bsz=128, num_updates=3137, lr=9.99829e-05, gnorm=2.812, loss_scale=2, train_wall=11, gb_free=2.8, wall=36046
2021-06-19 04:39:43 | INFO | train_inner | epoch 002: 159 / 3002 loss=3.004, ppl=8.02, wps=5836.6, ups=0.09, wpb=64853, bsz=128, num_updates=3138, lr=9.99829e-05, gnorm=2.659, loss_scale=2, train_wall=11, gb_free=2.8, wall=36057
2021-06-19 04:39:54 | INFO | train_inner | epoch 002: 160 / 3002 loss=2.803, ppl=6.98, wps=5748.7, ups=0.09, wpb=64854, bsz=128, num_updates=3139, lr=9.99829e-05, gnorm=3.476, loss_scale=2, train_wall=11, gb_free=2.8, wall=36069
2021-06-19 04:40:05 | INFO | train_inner | epoch 002: 161 / 3002 loss=2.796, ppl=6.94, wps=5860.3, ups=0.09, wpb=64822, bsz=128, num_updates=3140, lr=9.99829e-05, gnorm=2.478, loss_scale=2, train_wall=11, gb_free=2.8, wall=36080
2021-06-19 04:40:16 | INFO | train_inner | epoch 002: 162 / 3002 loss=2.888, ppl=7.4, wps=5783, ups=0.09, wpb=64883, bsz=128, num_updates=3141, lr=9.99829e-05, gnorm=2.576, loss_scale=2, train_wall=11, gb_free=2.8, wall=36091
2021-06-19 04:40:28 | INFO | train_inner | epoch 002: 163 / 3002 loss=2.874, ppl=7.33, wps=5785.6, ups=0.09, wpb=64815, bsz=128, num_updates=3142, lr=9.99829e-05, gnorm=6.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=36102
2021-06-19 04:40:39 | INFO | train_inner | epoch 002: 164 / 3002 loss=2.784, ppl=6.89, wps=5841.9, ups=0.09, wpb=64832, bsz=128, num_updates=3143, lr=9.99829e-05, gnorm=2.645, loss_scale=2, train_wall=11, gb_free=2.8, wall=36113
2021-06-19 04:40:50 | INFO | train_inner | epoch 002: 165 / 3002 loss=2.971, ppl=7.84, wps=5963, ups=0.09, wpb=64867, bsz=128, num_updates=3144, lr=9.99828e-05, gnorm=2.581, loss_scale=2, train_wall=10, gb_free=2.8, wall=36124
2021-06-19 04:41:01 | INFO | train_inner | epoch 002: 166 / 3002 loss=2.961, ppl=7.79, wps=5877.5, ups=0.09, wpb=64838, bsz=128, num_updates=3145, lr=9.99828e-05, gnorm=2.491, loss_scale=2, train_wall=11, gb_free=2.8, wall=36135
2021-06-19 04:41:12 | INFO | train_inner | epoch 002: 167 / 3002 loss=2.881, ppl=7.36, wps=5817.6, ups=0.09, wpb=64852, bsz=128, num_updates=3146, lr=9.99828e-05, gnorm=2.777, loss_scale=2, train_wall=11, gb_free=2.8, wall=36146
2021-06-19 04:41:23 | INFO | train_inner | epoch 002: 168 / 3002 loss=2.743, ppl=6.69, wps=5770.5, ups=0.09, wpb=64885, bsz=128, num_updates=3147, lr=9.99828e-05, gnorm=2.849, loss_scale=2, train_wall=11, gb_free=2.8, wall=36157
2021-06-19 04:41:34 | INFO | train_inner | epoch 002: 169 / 3002 loss=2.802, ppl=6.98, wps=5920, ups=0.09, wpb=64857, bsz=128, num_updates=3148, lr=9.99828e-05, gnorm=2.351, loss_scale=2, train_wall=11, gb_free=2.8, wall=36168
2021-06-19 04:41:45 | INFO | train_inner | epoch 002: 170 / 3002 loss=3.024, ppl=8.13, wps=5837.5, ups=0.09, wpb=64790, bsz=128, num_updates=3149, lr=9.99828e-05, gnorm=2.713, loss_scale=2, train_wall=11, gb_free=2.8, wall=36179
2021-06-19 04:41:56 | INFO | train_inner | epoch 002: 171 / 3002 loss=2.929, ppl=7.62, wps=5882.3, ups=0.09, wpb=64843, bsz=128, num_updates=3150, lr=9.99828e-05, gnorm=4.364, loss_scale=2, train_wall=11, gb_free=2.8, wall=36191
2021-06-19 04:42:07 | INFO | train_inner | epoch 002: 172 / 3002 loss=3.037, ppl=8.21, wps=5824.2, ups=0.09, wpb=64799, bsz=128, num_updates=3151, lr=9.99828e-05, gnorm=3.016, loss_scale=2, train_wall=11, gb_free=2.8, wall=36202
2021-06-19 04:42:18 | INFO | train_inner | epoch 002: 173 / 3002 loss=2.847, ppl=7.2, wps=5854.5, ups=0.09, wpb=64786, bsz=128, num_updates=3152, lr=9.99828e-05, gnorm=2.386, loss_scale=2, train_wall=11, gb_free=2.8, wall=36213
2021-06-19 04:42:29 | INFO | train_inner | epoch 002: 174 / 3002 loss=2.9, ppl=7.46, wps=5926.8, ups=0.09, wpb=64859, bsz=128, num_updates=3153, lr=9.99828e-05, gnorm=11.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=36224
2021-06-19 04:42:40 | INFO | train_inner | epoch 002: 175 / 3002 loss=2.756, ppl=6.75, wps=5804.3, ups=0.09, wpb=64846, bsz=128, num_updates=3154, lr=9.99828e-05, gnorm=2.363, loss_scale=2, train_wall=11, gb_free=2.8, wall=36235
2021-06-19 04:42:52 | INFO | train_inner | epoch 002: 176 / 3002 loss=2.928, ppl=7.61, wps=5863.8, ups=0.09, wpb=64825, bsz=128, num_updates=3155, lr=9.99828e-05, gnorm=2.766, loss_scale=2, train_wall=11, gb_free=2.8, wall=36246
2021-06-19 04:43:02 | INFO | train_inner | epoch 002: 177 / 3002 loss=2.804, ppl=6.99, wps=6075.1, ups=0.09, wpb=64861, bsz=128, num_updates=3156, lr=9.99828e-05, gnorm=5.703, loss_scale=2, train_wall=10, gb_free=2.8, wall=36257
2021-06-19 04:43:13 | INFO | train_inner | epoch 002: 178 / 3002 loss=2.897, ppl=7.45, wps=5791.6, ups=0.09, wpb=64823, bsz=128, num_updates=3157, lr=9.99827e-05, gnorm=2.418, loss_scale=2, train_wall=11, gb_free=2.8, wall=36268
2021-06-19 04:43:24 | INFO | train_inner | epoch 002: 179 / 3002 loss=2.734, ppl=6.65, wps=5850.7, ups=0.09, wpb=64830, bsz=128, num_updates=3158, lr=9.99827e-05, gnorm=9.348, loss_scale=2, train_wall=11, gb_free=2.8, wall=36279
2021-06-19 04:43:36 | INFO | train_inner | epoch 002: 180 / 3002 loss=2.836, ppl=7.14, wps=5782.3, ups=0.09, wpb=64807, bsz=128, num_updates=3159, lr=9.99827e-05, gnorm=2.507, loss_scale=2, train_wall=11, gb_free=2.8, wall=36290
2021-06-19 04:43:47 | INFO | train_inner | epoch 002: 181 / 3002 loss=2.818, ppl=7.05, wps=5821, ups=0.09, wpb=64836, bsz=128, num_updates=3160, lr=9.99827e-05, gnorm=2.488, loss_scale=2, train_wall=11, gb_free=2.8, wall=36301
2021-06-19 04:43:58 | INFO | train_inner | epoch 002: 182 / 3002 loss=3.038, ppl=8.22, wps=5921.7, ups=0.09, wpb=64836, bsz=128, num_updates=3161, lr=9.99827e-05, gnorm=3.683, loss_scale=2, train_wall=11, gb_free=2.8, wall=36312
2021-06-19 04:44:09 | INFO | train_inner | epoch 002: 183 / 3002 loss=2.942, ppl=7.68, wps=5931.9, ups=0.09, wpb=64863, bsz=128, num_updates=3162, lr=9.99827e-05, gnorm=2.534, loss_scale=2, train_wall=10, gb_free=2.8, wall=36323
2021-06-19 04:44:20 | INFO | train_inner | epoch 002: 184 / 3002 loss=2.77, ppl=6.82, wps=5769.4, ups=0.09, wpb=64802, bsz=128, num_updates=3163, lr=9.99827e-05, gnorm=4.392, loss_scale=2, train_wall=11, gb_free=2.8, wall=36334
2021-06-19 04:44:31 | INFO | train_inner | epoch 002: 185 / 3002 loss=2.974, ppl=7.86, wps=5810, ups=0.09, wpb=64868, bsz=128, num_updates=3164, lr=9.99827e-05, gnorm=2.551, loss_scale=2, train_wall=11, gb_free=2.8, wall=36345
2021-06-19 04:44:42 | INFO | train_inner | epoch 002: 186 / 3002 loss=2.72, ppl=6.59, wps=5982, ups=0.09, wpb=64956, bsz=128, num_updates=3165, lr=9.99827e-05, gnorm=2.547, loss_scale=2, train_wall=10, gb_free=2.8, wall=36356
2021-06-19 04:44:53 | INFO | train_inner | epoch 002: 187 / 3002 loss=2.92, ppl=7.57, wps=5726.1, ups=0.09, wpb=64809, bsz=128, num_updates=3166, lr=9.99827e-05, gnorm=2.971, loss_scale=2, train_wall=11, gb_free=2.8, wall=36368
2021-06-19 04:45:04 | INFO | train_inner | epoch 002: 188 / 3002 loss=2.857, ppl=7.25, wps=5783.8, ups=0.09, wpb=64831, bsz=128, num_updates=3167, lr=9.99827e-05, gnorm=2.683, loss_scale=2, train_wall=11, gb_free=2.8, wall=36379
2021-06-19 04:45:16 | INFO | train_inner | epoch 002: 189 / 3002 loss=2.908, ppl=7.5, wps=5777.9, ups=0.09, wpb=64853, bsz=128, num_updates=3168, lr=9.99827e-05, gnorm=2.609, loss_scale=2, train_wall=11, gb_free=2.8, wall=36390
2021-06-19 04:45:27 | INFO | train_inner | epoch 002: 190 / 3002 loss=2.95, ppl=7.73, wps=5859.1, ups=0.09, wpb=64888, bsz=128, num_updates=3169, lr=9.99826e-05, gnorm=2.514, loss_scale=2, train_wall=11, gb_free=2.8, wall=36401
2021-06-19 04:45:38 | INFO | train_inner | epoch 002: 191 / 3002 loss=2.826, ppl=7.09, wps=5821.6, ups=0.09, wpb=64790, bsz=128, num_updates=3170, lr=9.99826e-05, gnorm=3.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=36412
2021-06-19 04:45:49 | INFO | train_inner | epoch 002: 192 / 3002 loss=2.834, ppl=7.13, wps=5844.1, ups=0.09, wpb=64849, bsz=128, num_updates=3171, lr=9.99826e-05, gnorm=2.711, loss_scale=4, train_wall=11, gb_free=2.8, wall=36423
2021-06-19 04:46:00 | INFO | train_inner | epoch 002: 193 / 3002 loss=2.845, ppl=7.19, wps=5826.2, ups=0.09, wpb=64862, bsz=128, num_updates=3172, lr=9.99826e-05, gnorm=2.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=36434
2021-06-19 04:46:11 | INFO | train_inner | epoch 002: 194 / 3002 loss=2.812, ppl=7.02, wps=5788.8, ups=0.09, wpb=64812, bsz=128, num_updates=3173, lr=9.99826e-05, gnorm=3.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=36446
2021-06-19 04:46:22 | INFO | train_inner | epoch 002: 195 / 3002 loss=2.856, ppl=7.24, wps=5882.8, ups=0.09, wpb=64757, bsz=128, num_updates=3174, lr=9.99826e-05, gnorm=2.842, loss_scale=4, train_wall=11, gb_free=2.8, wall=36457
2021-06-19 04:46:34 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 04:46:45 | INFO | train_inner | epoch 002: 197 / 3002 loss=2.866, ppl=7.29, wps=2917.4, ups=0.04, wpb=64853, bsz=128, num_updates=3175, lr=9.99826e-05, gnorm=2.636, loss_scale=2, train_wall=21, gb_free=2.8, wall=36479
2021-06-19 04:46:56 | INFO | train_inner | epoch 002: 198 / 3002 loss=3.001, ppl=8.01, wps=5871.9, ups=0.09, wpb=64755, bsz=128, num_updates=3176, lr=9.99826e-05, gnorm=2.476, loss_scale=2, train_wall=11, gb_free=2.8, wall=36490
2021-06-19 04:47:07 | INFO | train_inner | epoch 002: 199 / 3002 loss=2.868, ppl=7.3, wps=5783.8, ups=0.09, wpb=64804, bsz=128, num_updates=3177, lr=9.99826e-05, gnorm=2.526, loss_scale=2, train_wall=11, gb_free=2.8, wall=36501
2021-06-19 04:47:18 | INFO | train_inner | epoch 002: 200 / 3002 loss=2.82, ppl=7.06, wps=5902.9, ups=0.09, wpb=64881, bsz=128, num_updates=3178, lr=9.99826e-05, gnorm=2.5, loss_scale=2, train_wall=11, gb_free=2.8, wall=36512
2021-06-19 04:47:29 | INFO | train_inner | epoch 002: 201 / 3002 loss=3.124, ppl=8.72, wps=5870.3, ups=0.09, wpb=64838, bsz=128, num_updates=3179, lr=9.99826e-05, gnorm=2.802, loss_scale=2, train_wall=11, gb_free=2.8, wall=36523
2021-06-19 04:47:40 | INFO | train_inner | epoch 002: 202 / 3002 loss=2.855, ppl=7.24, wps=5902.8, ups=0.09, wpb=64848, bsz=128, num_updates=3180, lr=9.99826e-05, gnorm=2.423, loss_scale=2, train_wall=11, gb_free=2.8, wall=36534
2021-06-19 04:47:51 | INFO | train_inner | epoch 002: 203 / 3002 loss=2.765, ppl=6.8, wps=5937.8, ups=0.09, wpb=64856, bsz=128, num_updates=3181, lr=9.99826e-05, gnorm=2.394, loss_scale=2, train_wall=10, gb_free=2.8, wall=36545
2021-06-19 04:48:02 | INFO | train_inner | epoch 002: 204 / 3002 loss=2.832, ppl=7.12, wps=5872.7, ups=0.09, wpb=64831, bsz=128, num_updates=3182, lr=9.99825e-05, gnorm=5.874, loss_scale=2, train_wall=11, gb_free=2.8, wall=36556
2021-06-19 04:48:13 | INFO | train_inner | epoch 002: 205 / 3002 loss=2.764, ppl=6.79, wps=5857, ups=0.09, wpb=64898, bsz=128, num_updates=3183, lr=9.99825e-05, gnorm=2.448, loss_scale=2, train_wall=11, gb_free=2.8, wall=36567
2021-06-19 04:48:24 | INFO | train_inner | epoch 002: 206 / 3002 loss=2.84, ppl=7.16, wps=5837.5, ups=0.09, wpb=64823, bsz=128, num_updates=3184, lr=9.99825e-05, gnorm=3.674, loss_scale=2, train_wall=11, gb_free=2.8, wall=36578
2021-06-19 04:48:35 | INFO | train_inner | epoch 002: 207 / 3002 loss=2.845, ppl=7.19, wps=5939, ups=0.09, wpb=64893, bsz=128, num_updates=3185, lr=9.99825e-05, gnorm=2.366, loss_scale=2, train_wall=10, gb_free=2.8, wall=36589
2021-06-19 04:48:46 | INFO | train_inner | epoch 002: 208 / 3002 loss=2.892, ppl=7.42, wps=5900.8, ups=0.09, wpb=64912, bsz=128, num_updates=3186, lr=9.99825e-05, gnorm=2.479, loss_scale=2, train_wall=11, gb_free=2.8, wall=36600
2021-06-19 04:48:57 | INFO | train_inner | epoch 002: 209 / 3002 loss=2.89, ppl=7.41, wps=5740.5, ups=0.09, wpb=64845, bsz=128, num_updates=3187, lr=9.99825e-05, gnorm=3.482, loss_scale=2, train_wall=11, gb_free=2.8, wall=36612
2021-06-19 04:49:08 | INFO | train_inner | epoch 002: 210 / 3002 loss=2.744, ppl=6.7, wps=5838.5, ups=0.09, wpb=64820, bsz=128, num_updates=3188, lr=9.99825e-05, gnorm=2.414, loss_scale=2, train_wall=11, gb_free=2.8, wall=36623
2021-06-19 04:49:19 | INFO | train_inner | epoch 002: 211 / 3002 loss=2.754, ppl=6.75, wps=5821.3, ups=0.09, wpb=64819, bsz=128, num_updates=3189, lr=9.99825e-05, gnorm=2.361, loss_scale=2, train_wall=11, gb_free=2.8, wall=36634
2021-06-19 04:49:30 | INFO | train_inner | epoch 002: 212 / 3002 loss=2.916, ppl=7.55, wps=5998.6, ups=0.09, wpb=64780, bsz=128, num_updates=3190, lr=9.99825e-05, gnorm=2.417, loss_scale=2, train_wall=10, gb_free=2.8, wall=36645
2021-06-19 04:49:41 | INFO | train_inner | epoch 002: 213 / 3002 loss=2.86, ppl=7.26, wps=5918.4, ups=0.09, wpb=64857, bsz=128, num_updates=3191, lr=9.99825e-05, gnorm=2.643, loss_scale=2, train_wall=11, gb_free=2.8, wall=36656
2021-06-19 04:49:52 | INFO | train_inner | epoch 002: 214 / 3002 loss=2.784, ppl=6.89, wps=5914.2, ups=0.09, wpb=64856, bsz=128, num_updates=3192, lr=9.99825e-05, gnorm=2.665, loss_scale=2, train_wall=11, gb_free=2.8, wall=36667
2021-06-19 04:50:03 | INFO | train_inner | epoch 002: 215 / 3002 loss=2.839, ppl=7.15, wps=5839.3, ups=0.09, wpb=64830, bsz=128, num_updates=3193, lr=9.99825e-05, gnorm=5.391, loss_scale=2, train_wall=11, gb_free=2.8, wall=36678
2021-06-19 04:50:14 | INFO | train_inner | epoch 002: 216 / 3002 loss=2.874, ppl=7.33, wps=5918.7, ups=0.09, wpb=64870, bsz=128, num_updates=3194, lr=9.99824e-05, gnorm=2.647, loss_scale=2, train_wall=10, gb_free=2.8, wall=36689
2021-06-19 04:50:25 | INFO | train_inner | epoch 002: 217 / 3002 loss=2.907, ppl=7.5, wps=5996.2, ups=0.09, wpb=64826, bsz=128, num_updates=3195, lr=9.99824e-05, gnorm=3.309, loss_scale=2, train_wall=10, gb_free=2.8, wall=36699
2021-06-19 04:50:36 | INFO | train_inner | epoch 002: 218 / 3002 loss=2.867, ppl=7.3, wps=5882.8, ups=0.09, wpb=64816, bsz=128, num_updates=3196, lr=9.99824e-05, gnorm=2.52, loss_scale=2, train_wall=11, gb_free=2.8, wall=36710
2021-06-19 04:50:47 | INFO | train_inner | epoch 002: 219 / 3002 loss=2.879, ppl=7.36, wps=5824.6, ups=0.09, wpb=64839, bsz=128, num_updates=3197, lr=9.99824e-05, gnorm=2.475, loss_scale=2, train_wall=11, gb_free=2.8, wall=36722
2021-06-19 04:50:59 | INFO | train_inner | epoch 002: 220 / 3002 loss=2.923, ppl=7.59, wps=5736.1, ups=0.09, wpb=64844, bsz=128, num_updates=3198, lr=9.99824e-05, gnorm=2.429, loss_scale=2, train_wall=11, gb_free=2.8, wall=36733
2021-06-19 04:51:09 | INFO | train_inner | epoch 002: 221 / 3002 loss=2.858, ppl=7.25, wps=5934.5, ups=0.09, wpb=64847, bsz=128, num_updates=3199, lr=9.99824e-05, gnorm=2.423, loss_scale=2, train_wall=10, gb_free=2.8, wall=36744
2021-06-19 04:51:20 | INFO | train_inner | epoch 002: 222 / 3002 loss=2.938, ppl=7.67, wps=5906.6, ups=0.09, wpb=64844, bsz=128, num_updates=3200, lr=9.99824e-05, gnorm=2.552, loss_scale=2, train_wall=11, gb_free=2.8, wall=36755
2021-06-19 04:51:31 | INFO | train_inner | epoch 002: 223 / 3002 loss=2.804, ppl=6.98, wps=5919.6, ups=0.09, wpb=64767, bsz=128, num_updates=3201, lr=9.99824e-05, gnorm=2.481, loss_scale=2, train_wall=10, gb_free=2.8, wall=36766
2021-06-19 04:51:42 | INFO | train_inner | epoch 002: 224 / 3002 loss=2.908, ppl=7.5, wps=5855.8, ups=0.09, wpb=64797, bsz=128, num_updates=3202, lr=9.99824e-05, gnorm=2.599, loss_scale=2, train_wall=11, gb_free=2.8, wall=36777
2021-06-19 04:51:53 | INFO | train_inner | epoch 002: 225 / 3002 loss=2.831, ppl=7.12, wps=5871.8, ups=0.09, wpb=64776, bsz=128, num_updates=3203, lr=9.99824e-05, gnorm=2.407, loss_scale=2, train_wall=11, gb_free=2.8, wall=36788
2021-06-19 04:52:04 | INFO | train_inner | epoch 002: 226 / 3002 loss=2.891, ppl=7.42, wps=5899.4, ups=0.09, wpb=64867, bsz=128, num_updates=3204, lr=9.99824e-05, gnorm=4.171, loss_scale=2, train_wall=11, gb_free=2.8, wall=36799
2021-06-19 04:52:15 | INFO | train_inner | epoch 002: 227 / 3002 loss=2.867, ppl=7.3, wps=5879.5, ups=0.09, wpb=64911, bsz=128, num_updates=3205, lr=9.99824e-05, gnorm=2.435, loss_scale=2, train_wall=11, gb_free=2.8, wall=36810
2021-06-19 04:52:27 | INFO | train_inner | epoch 002: 228 / 3002 loss=2.836, ppl=7.14, wps=5743.2, ups=0.09, wpb=64855, bsz=128, num_updates=3206, lr=9.99824e-05, gnorm=2.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=36821
2021-06-19 04:52:38 | INFO | train_inner | epoch 002: 229 / 3002 loss=2.819, ppl=7.06, wps=5898.6, ups=0.09, wpb=64863, bsz=128, num_updates=3207, lr=9.99823e-05, gnorm=2.435, loss_scale=2, train_wall=11, gb_free=2.8, wall=36832
2021-06-19 04:52:49 | INFO | train_inner | epoch 002: 230 / 3002 loss=2.756, ppl=6.75, wps=5825.1, ups=0.09, wpb=64864, bsz=128, num_updates=3208, lr=9.99823e-05, gnorm=5.445, loss_scale=2, train_wall=11, gb_free=2.8, wall=36843
2021-06-19 04:53:00 | INFO | train_inner | epoch 002: 231 / 3002 loss=2.716, ppl=6.57, wps=5752.3, ups=0.09, wpb=64798, bsz=128, num_updates=3209, lr=9.99823e-05, gnorm=2.469, loss_scale=2, train_wall=11, gb_free=2.8, wall=36855
2021-06-19 04:53:11 | INFO | train_inner | epoch 002: 232 / 3002 loss=2.742, ppl=6.69, wps=5829.7, ups=0.09, wpb=64771, bsz=128, num_updates=3210, lr=9.99823e-05, gnorm=2.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=36866
2021-06-19 04:53:22 | INFO | train_inner | epoch 002: 233 / 3002 loss=3.047, ppl=8.27, wps=6055.7, ups=0.09, wpb=64821, bsz=128, num_updates=3211, lr=9.99823e-05, gnorm=2.336, loss_scale=2, train_wall=10, gb_free=2.8, wall=36876
2021-06-19 04:53:33 | INFO | train_inner | epoch 002: 234 / 3002 loss=2.911, ppl=7.52, wps=5885.2, ups=0.09, wpb=64846, bsz=128, num_updates=3212, lr=9.99823e-05, gnorm=2.5, loss_scale=2, train_wall=11, gb_free=2.8, wall=36887
2021-06-19 04:53:44 | INFO | train_inner | epoch 002: 235 / 3002 loss=2.824, ppl=7.08, wps=5877.8, ups=0.09, wpb=64873, bsz=128, num_updates=3213, lr=9.99823e-05, gnorm=2.482, loss_scale=2, train_wall=11, gb_free=2.8, wall=36898
2021-06-19 04:53:55 | INFO | train_inner | epoch 002: 236 / 3002 loss=2.751, ppl=6.73, wps=5794.4, ups=0.09, wpb=64855, bsz=128, num_updates=3214, lr=9.99823e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=36910
2021-06-19 04:54:06 | INFO | train_inner | epoch 002: 237 / 3002 loss=2.963, ppl=7.8, wps=5818.4, ups=0.09, wpb=64868, bsz=128, num_updates=3215, lr=9.99823e-05, gnorm=2.529, loss_scale=2, train_wall=11, gb_free=2.8, wall=36921
2021-06-19 04:54:17 | INFO | train_inner | epoch 002: 238 / 3002 loss=2.814, ppl=7.03, wps=5835.9, ups=0.09, wpb=64796, bsz=128, num_updates=3216, lr=9.99823e-05, gnorm=2.357, loss_scale=2, train_wall=11, gb_free=2.8, wall=36932
2021-06-19 04:54:29 | INFO | train_inner | epoch 002: 239 / 3002 loss=2.834, ppl=7.13, wps=5782.1, ups=0.09, wpb=64799, bsz=128, num_updates=3217, lr=9.99823e-05, gnorm=2.296, loss_scale=2, train_wall=11, gb_free=2.8, wall=36943
2021-06-19 04:54:40 | INFO | train_inner | epoch 002: 240 / 3002 loss=2.739, ppl=6.68, wps=5890.1, ups=0.09, wpb=64891, bsz=128, num_updates=3218, lr=9.99823e-05, gnorm=2.462, loss_scale=2, train_wall=11, gb_free=2.8, wall=36954
2021-06-19 04:54:51 | INFO | train_inner | epoch 002: 241 / 3002 loss=2.811, ppl=7.02, wps=5889.9, ups=0.09, wpb=64832, bsz=128, num_updates=3219, lr=9.99822e-05, gnorm=2.529, loss_scale=2, train_wall=11, gb_free=2.8, wall=36965
2021-06-19 04:55:02 | INFO | train_inner | epoch 002: 242 / 3002 loss=2.715, ppl=6.57, wps=5733.8, ups=0.09, wpb=64821, bsz=128, num_updates=3220, lr=9.99822e-05, gnorm=2.443, loss_scale=2, train_wall=11, gb_free=2.8, wall=36976
2021-06-19 04:55:13 | INFO | train_inner | epoch 002: 243 / 3002 loss=2.723, ppl=6.6, wps=5785.1, ups=0.09, wpb=64809, bsz=128, num_updates=3221, lr=9.99822e-05, gnorm=2.62, loss_scale=2, train_wall=11, gb_free=2.8, wall=36988
2021-06-19 04:55:24 | INFO | train_inner | epoch 002: 244 / 3002 loss=2.919, ppl=7.56, wps=5834.3, ups=0.09, wpb=64782, bsz=128, num_updates=3222, lr=9.99822e-05, gnorm=2.498, loss_scale=2, train_wall=11, gb_free=2.8, wall=36999
2021-06-19 04:55:35 | INFO | train_inner | epoch 002: 245 / 3002 loss=2.979, ppl=7.89, wps=5872, ups=0.09, wpb=64861, bsz=128, num_updates=3223, lr=9.99822e-05, gnorm=3.515, loss_scale=2, train_wall=11, gb_free=2.8, wall=37010
2021-06-19 04:55:47 | INFO | train_inner | epoch 002: 246 / 3002 loss=2.804, ppl=6.99, wps=5726.4, ups=0.09, wpb=64866, bsz=128, num_updates=3224, lr=9.99822e-05, gnorm=2.775, loss_scale=2, train_wall=11, gb_free=2.8, wall=37021
2021-06-19 04:55:58 | INFO | train_inner | epoch 002: 247 / 3002 loss=2.714, ppl=6.56, wps=5696.1, ups=0.09, wpb=64835, bsz=128, num_updates=3225, lr=9.99822e-05, gnorm=2.415, loss_scale=2, train_wall=11, gb_free=2.8, wall=37032
2021-06-19 04:56:09 | INFO | train_inner | epoch 002: 248 / 3002 loss=2.912, ppl=7.53, wps=5836.2, ups=0.09, wpb=64791, bsz=128, num_updates=3226, lr=9.99822e-05, gnorm=2.559, loss_scale=2, train_wall=11, gb_free=2.8, wall=37044
2021-06-19 04:56:20 | INFO | train_inner | epoch 002: 249 / 3002 loss=2.839, ppl=7.16, wps=5990.7, ups=0.09, wpb=64798, bsz=128, num_updates=3227, lr=9.99822e-05, gnorm=4.429, loss_scale=2, train_wall=10, gb_free=2.8, wall=37054
2021-06-19 04:56:31 | INFO | train_inner | epoch 002: 250 / 3002 loss=2.746, ppl=6.71, wps=5842.2, ups=0.09, wpb=64816, bsz=128, num_updates=3228, lr=9.99822e-05, gnorm=2.389, loss_scale=2, train_wall=11, gb_free=2.8, wall=37065
2021-06-19 04:56:42 | INFO | train_inner | epoch 002: 251 / 3002 loss=2.773, ppl=6.84, wps=5831.1, ups=0.09, wpb=64846, bsz=128, num_updates=3229, lr=9.99822e-05, gnorm=2.363, loss_scale=2, train_wall=11, gb_free=2.8, wall=37077
2021-06-19 04:56:53 | INFO | train_inner | epoch 002: 252 / 3002 loss=2.956, ppl=7.76, wps=5831.1, ups=0.09, wpb=64791, bsz=128, num_updates=3230, lr=9.99822e-05, gnorm=2.574, loss_scale=2, train_wall=11, gb_free=2.8, wall=37088
2021-06-19 04:57:04 | INFO | train_inner | epoch 002: 253 / 3002 loss=2.846, ppl=7.19, wps=5873.6, ups=0.09, wpb=64822, bsz=128, num_updates=3231, lr=9.99822e-05, gnorm=2.838, loss_scale=2, train_wall=11, gb_free=2.8, wall=37099
2021-06-19 04:57:15 | INFO | train_inner | epoch 002: 254 / 3002 loss=2.77, ppl=6.82, wps=5928.5, ups=0.09, wpb=64829, bsz=128, num_updates=3232, lr=9.99821e-05, gnorm=3.255, loss_scale=2, train_wall=10, gb_free=2.8, wall=37110
2021-06-19 04:57:26 | INFO | train_inner | epoch 002: 255 / 3002 loss=2.823, ppl=7.08, wps=5862.1, ups=0.09, wpb=64852, bsz=128, num_updates=3233, lr=9.99821e-05, gnorm=2.676, loss_scale=2, train_wall=11, gb_free=2.8, wall=37121
2021-06-19 04:57:37 | INFO | train_inner | epoch 002: 256 / 3002 loss=2.828, ppl=7.1, wps=5995, ups=0.09, wpb=64852, bsz=128, num_updates=3234, lr=9.99821e-05, gnorm=2.838, loss_scale=2, train_wall=10, gb_free=2.8, wall=37132
2021-06-19 04:57:48 | INFO | train_inner | epoch 002: 257 / 3002 loss=2.907, ppl=7.5, wps=5763.1, ups=0.09, wpb=64898, bsz=128, num_updates=3235, lr=9.99821e-05, gnorm=2.385, loss_scale=2, train_wall=11, gb_free=2.8, wall=37143
2021-06-19 04:58:00 | INFO | train_inner | epoch 002: 258 / 3002 loss=2.674, ppl=6.38, wps=5825.2, ups=0.09, wpb=64894, bsz=128, num_updates=3236, lr=9.99821e-05, gnorm=2.428, loss_scale=2, train_wall=11, gb_free=2.8, wall=37154
2021-06-19 04:58:11 | INFO | train_inner | epoch 002: 259 / 3002 loss=2.775, ppl=6.84, wps=5893.7, ups=0.09, wpb=64825, bsz=128, num_updates=3237, lr=9.99821e-05, gnorm=2.343, loss_scale=2, train_wall=11, gb_free=2.8, wall=37165
2021-06-19 04:58:21 | INFO | train_inner | epoch 002: 260 / 3002 loss=2.677, ppl=6.4, wps=5963.2, ups=0.09, wpb=64896, bsz=128, num_updates=3238, lr=9.99821e-05, gnorm=2.373, loss_scale=2, train_wall=10, gb_free=2.8, wall=37176
2021-06-19 04:58:33 | INFO | train_inner | epoch 002: 261 / 3002 loss=2.659, ppl=6.32, wps=5808.7, ups=0.09, wpb=64816, bsz=128, num_updates=3239, lr=9.99821e-05, gnorm=2.352, loss_scale=2, train_wall=11, gb_free=2.8, wall=37187
2021-06-19 04:58:44 | INFO | train_inner | epoch 002: 262 / 3002 loss=2.87, ppl=7.31, wps=5838.8, ups=0.09, wpb=64746, bsz=128, num_updates=3240, lr=9.99821e-05, gnorm=2.351, loss_scale=2, train_wall=11, gb_free=2.8, wall=37198
2021-06-19 04:58:55 | INFO | train_inner | epoch 002: 263 / 3002 loss=2.911, ppl=7.52, wps=5921.3, ups=0.09, wpb=64826, bsz=128, num_updates=3241, lr=9.99821e-05, gnorm=8.158, loss_scale=2, train_wall=10, gb_free=2.8, wall=37209
2021-06-19 04:59:06 | INFO | train_inner | epoch 002: 264 / 3002 loss=2.959, ppl=7.78, wps=5807.3, ups=0.09, wpb=64762, bsz=128, num_updates=3242, lr=9.99821e-05, gnorm=2.333, loss_scale=2, train_wall=11, gb_free=2.8, wall=37220
2021-06-19 04:59:17 | INFO | train_inner | epoch 002: 265 / 3002 loss=2.968, ppl=7.82, wps=5884.7, ups=0.09, wpb=64809, bsz=128, num_updates=3243, lr=9.99821e-05, gnorm=2.346, loss_scale=2, train_wall=11, gb_free=2.8, wall=37231
2021-06-19 04:59:28 | INFO | train_inner | epoch 002: 266 / 3002 loss=2.881, ppl=7.37, wps=5798.9, ups=0.09, wpb=64859, bsz=128, num_updates=3244, lr=9.9982e-05, gnorm=2.346, loss_scale=2, train_wall=11, gb_free=2.8, wall=37242
2021-06-19 04:59:39 | INFO | train_inner | epoch 002: 267 / 3002 loss=2.857, ppl=7.25, wps=6014.9, ups=0.09, wpb=64831, bsz=128, num_updates=3245, lr=9.9982e-05, gnorm=2.34, loss_scale=2, train_wall=10, gb_free=2.8, wall=37253
2021-06-19 04:59:50 | INFO | train_inner | epoch 002: 268 / 3002 loss=2.944, ppl=7.7, wps=5779.8, ups=0.09, wpb=64874, bsz=128, num_updates=3246, lr=9.9982e-05, gnorm=3.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=37264
2021-06-19 05:00:01 | INFO | train_inner | epoch 002: 269 / 3002 loss=2.775, ppl=6.85, wps=5800.8, ups=0.09, wpb=64741, bsz=128, num_updates=3247, lr=9.9982e-05, gnorm=2.53, loss_scale=2, train_wall=11, gb_free=2.8, wall=37276
2021-06-19 05:00:12 | INFO | train_inner | epoch 002: 270 / 3002 loss=2.812, ppl=7.02, wps=5946.2, ups=0.09, wpb=64838, bsz=128, num_updates=3248, lr=9.9982e-05, gnorm=2.742, loss_scale=2, train_wall=10, gb_free=2.8, wall=37286
2021-06-19 05:00:23 | INFO | train_inner | epoch 002: 271 / 3002 loss=2.862, ppl=7.27, wps=5933.2, ups=0.09, wpb=64852, bsz=128, num_updates=3249, lr=9.9982e-05, gnorm=2.349, loss_scale=2, train_wall=10, gb_free=2.8, wall=37297
2021-06-19 05:00:34 | INFO | train_inner | epoch 002: 272 / 3002 loss=2.714, ppl=6.56, wps=5784.5, ups=0.09, wpb=64825, bsz=128, num_updates=3250, lr=9.9982e-05, gnorm=5.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=37309
2021-06-19 05:00:45 | INFO | train_inner | epoch 002: 273 / 3002 loss=2.839, ppl=7.16, wps=5911.2, ups=0.09, wpb=64885, bsz=128, num_updates=3251, lr=9.9982e-05, gnorm=2.488, loss_scale=2, train_wall=10, gb_free=2.8, wall=37320
2021-06-19 05:00:57 | INFO | train_inner | epoch 002: 274 / 3002 loss=2.905, ppl=7.49, wps=5723.4, ups=0.09, wpb=64780, bsz=128, num_updates=3252, lr=9.9982e-05, gnorm=2.418, loss_scale=2, train_wall=11, gb_free=2.8, wall=37331
2021-06-19 05:01:08 | INFO | train_inner | epoch 002: 275 / 3002 loss=3.041, ppl=8.23, wps=5684.2, ups=0.09, wpb=64822, bsz=128, num_updates=3253, lr=9.9982e-05, gnorm=2.526, loss_scale=2, train_wall=11, gb_free=2.8, wall=37342
2021-06-19 05:01:19 | INFO | train_inner | epoch 002: 276 / 3002 loss=2.69, ppl=6.45, wps=5815.6, ups=0.09, wpb=64791, bsz=128, num_updates=3254, lr=9.9982e-05, gnorm=2.664, loss_scale=2, train_wall=11, gb_free=2.8, wall=37353
2021-06-19 05:01:30 | INFO | train_inner | epoch 002: 277 / 3002 loss=2.772, ppl=6.83, wps=5936, ups=0.09, wpb=64812, bsz=128, num_updates=3255, lr=9.9982e-05, gnorm=3.761, loss_scale=2, train_wall=10, gb_free=2.8, wall=37364
2021-06-19 05:01:41 | INFO | train_inner | epoch 002: 278 / 3002 loss=2.867, ppl=7.3, wps=5918.9, ups=0.09, wpb=64767, bsz=128, num_updates=3256, lr=9.9982e-05, gnorm=5.636, loss_scale=2, train_wall=10, gb_free=2.8, wall=37375
2021-06-19 05:01:52 | INFO | train_inner | epoch 002: 279 / 3002 loss=2.944, ppl=7.69, wps=5885.4, ups=0.09, wpb=64889, bsz=128, num_updates=3257, lr=9.99819e-05, gnorm=2.428, loss_scale=2, train_wall=11, gb_free=2.8, wall=37386
2021-06-19 05:02:03 | INFO | train_inner | epoch 002: 280 / 3002 loss=2.68, ppl=6.41, wps=5805.3, ups=0.09, wpb=64735, bsz=128, num_updates=3258, lr=9.99819e-05, gnorm=2.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=37397
2021-06-19 05:02:14 | INFO | train_inner | epoch 002: 281 / 3002 loss=2.878, ppl=7.35, wps=5856.9, ups=0.09, wpb=64815, bsz=128, num_updates=3259, lr=9.99819e-05, gnorm=2.995, loss_scale=2, train_wall=11, gb_free=2.8, wall=37409
2021-06-19 05:02:25 | INFO | train_inner | epoch 002: 282 / 3002 loss=2.855, ppl=7.23, wps=5949.1, ups=0.09, wpb=64878, bsz=128, num_updates=3260, lr=9.99819e-05, gnorm=3.44, loss_scale=2, train_wall=10, gb_free=2.8, wall=37419
2021-06-19 05:02:36 | INFO | train_inner | epoch 002: 283 / 3002 loss=2.753, ppl=6.74, wps=5716, ups=0.09, wpb=64857, bsz=128, num_updates=3261, lr=9.99819e-05, gnorm=2.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=37431
2021-06-19 05:02:48 | INFO | train_inner | epoch 002: 284 / 3002 loss=2.821, ppl=7.07, wps=5794, ups=0.09, wpb=64356, bsz=128, num_updates=3262, lr=9.99819e-05, gnorm=2.541, loss_scale=2, train_wall=11, gb_free=2.8, wall=37442
2021-06-19 05:02:58 | INFO | train_inner | epoch 002: 285 / 3002 loss=2.787, ppl=6.9, wps=5933.3, ups=0.09, wpb=64830, bsz=128, num_updates=3263, lr=9.99819e-05, gnorm=3.002, loss_scale=2, train_wall=10, gb_free=2.8, wall=37453
2021-06-19 05:03:10 | INFO | train_inner | epoch 002: 286 / 3002 loss=2.75, ppl=6.73, wps=5730.4, ups=0.09, wpb=64846, bsz=128, num_updates=3264, lr=9.99819e-05, gnorm=2.431, loss_scale=2, train_wall=11, gb_free=2.8, wall=37464
2021-06-19 05:03:21 | INFO | train_inner | epoch 002: 287 / 3002 loss=2.848, ppl=7.2, wps=5907.7, ups=0.09, wpb=64922, bsz=128, num_updates=3265, lr=9.99819e-05, gnorm=5.724, loss_scale=2, train_wall=11, gb_free=2.8, wall=37475
2021-06-19 05:03:32 | INFO | train_inner | epoch 002: 288 / 3002 loss=2.933, ppl=7.64, wps=5767.1, ups=0.09, wpb=64852, bsz=128, num_updates=3266, lr=9.99819e-05, gnorm=2.72, loss_scale=2, train_wall=11, gb_free=2.8, wall=37486
2021-06-19 05:03:43 | INFO | train_inner | epoch 002: 289 / 3002 loss=2.892, ppl=7.42, wps=5952.9, ups=0.09, wpb=64871, bsz=128, num_updates=3267, lr=9.99819e-05, gnorm=2.852, loss_scale=2, train_wall=10, gb_free=2.8, wall=37497
2021-06-19 05:03:54 | INFO | train_inner | epoch 002: 290 / 3002 loss=2.769, ppl=6.82, wps=5857, ups=0.09, wpb=64874, bsz=128, num_updates=3268, lr=9.99819e-05, gnorm=3.501, loss_scale=2, train_wall=11, gb_free=2.8, wall=37508
2021-06-19 05:04:05 | INFO | train_inner | epoch 002: 291 / 3002 loss=2.825, ppl=7.09, wps=5873.1, ups=0.09, wpb=64799, bsz=128, num_updates=3269, lr=9.99818e-05, gnorm=2.772, loss_scale=2, train_wall=11, gb_free=2.8, wall=37519
2021-06-19 05:04:16 | INFO | train_inner | epoch 002: 292 / 3002 loss=3.024, ppl=8.13, wps=5702.7, ups=0.09, wpb=64815, bsz=128, num_updates=3270, lr=9.99818e-05, gnorm=2.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=37531
2021-06-19 05:04:28 | INFO | train_inner | epoch 002: 293 / 3002 loss=3.01, ppl=8.05, wps=5806.9, ups=0.09, wpb=64873, bsz=128, num_updates=3271, lr=9.99818e-05, gnorm=2.983, loss_scale=2, train_wall=11, gb_free=2.8, wall=37542
2021-06-19 05:04:39 | INFO | train_inner | epoch 002: 294 / 3002 loss=2.956, ppl=7.76, wps=5855, ups=0.09, wpb=64887, bsz=128, num_updates=3272, lr=9.99818e-05, gnorm=3.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=37553
2021-06-19 05:04:50 | INFO | train_inner | epoch 002: 295 / 3002 loss=2.996, ppl=7.98, wps=5865.1, ups=0.09, wpb=64785, bsz=128, num_updates=3273, lr=9.99818e-05, gnorm=3.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=37564
2021-06-19 05:05:01 | INFO | train_inner | epoch 002: 296 / 3002 loss=2.977, ppl=7.87, wps=5818.1, ups=0.09, wpb=64797, bsz=128, num_updates=3274, lr=9.99818e-05, gnorm=5.354, loss_scale=2, train_wall=11, gb_free=2.8, wall=37575
2021-06-19 05:05:12 | INFO | train_inner | epoch 002: 297 / 3002 loss=2.747, ppl=6.71, wps=5864.6, ups=0.09, wpb=64842, bsz=128, num_updates=3275, lr=9.99818e-05, gnorm=2.429, loss_scale=2, train_wall=11, gb_free=2.8, wall=37586
2021-06-19 05:05:23 | INFO | train_inner | epoch 002: 298 / 3002 loss=2.887, ppl=7.4, wps=5910.5, ups=0.09, wpb=64925, bsz=128, num_updates=3276, lr=9.99818e-05, gnorm=2.779, loss_scale=2, train_wall=11, gb_free=2.8, wall=37597
2021-06-19 05:05:34 | INFO | train_inner | epoch 002: 299 / 3002 loss=2.867, ppl=7.29, wps=5886.7, ups=0.09, wpb=64809, bsz=128, num_updates=3277, lr=9.99818e-05, gnorm=2.962, loss_scale=2, train_wall=11, gb_free=2.8, wall=37608
2021-06-19 05:05:45 | INFO | train_inner | epoch 002: 300 / 3002 loss=2.877, ppl=7.35, wps=5814, ups=0.09, wpb=64789, bsz=128, num_updates=3278, lr=9.99818e-05, gnorm=2.486, loss_scale=2, train_wall=11, gb_free=2.8, wall=37619
2021-06-19 05:05:56 | INFO | train_inner | epoch 002: 301 / 3002 loss=2.816, ppl=7.04, wps=5868.3, ups=0.09, wpb=64804, bsz=128, num_updates=3279, lr=9.99818e-05, gnorm=9.481, loss_scale=2, train_wall=11, gb_free=2.8, wall=37630
2021-06-19 05:06:07 | INFO | train_inner | epoch 002: 302 / 3002 loss=2.807, ppl=7, wps=5802.6, ups=0.09, wpb=64829, bsz=128, num_updates=3280, lr=9.99818e-05, gnorm=2.436, loss_scale=2, train_wall=11, gb_free=2.8, wall=37642
2021-06-19 05:06:18 | INFO | train_inner | epoch 002: 303 / 3002 loss=2.964, ppl=7.8, wps=5835, ups=0.09, wpb=64750, bsz=128, num_updates=3281, lr=9.99818e-05, gnorm=2.66, loss_scale=2, train_wall=11, gb_free=2.8, wall=37653
2021-06-19 05:06:29 | INFO | train_inner | epoch 002: 304 / 3002 loss=2.889, ppl=7.41, wps=5855.2, ups=0.09, wpb=64798, bsz=128, num_updates=3282, lr=9.99817e-05, gnorm=2.466, loss_scale=2, train_wall=11, gb_free=2.8, wall=37664
2021-06-19 05:06:40 | INFO | train_inner | epoch 002: 305 / 3002 loss=2.903, ppl=7.48, wps=5903.4, ups=0.09, wpb=64736, bsz=128, num_updates=3283, lr=9.99817e-05, gnorm=2.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=37675
2021-06-19 05:06:51 | INFO | train_inner | epoch 002: 306 / 3002 loss=2.842, ppl=7.17, wps=5914, ups=0.09, wpb=64813, bsz=128, num_updates=3284, lr=9.99817e-05, gnorm=2.512, loss_scale=2, train_wall=11, gb_free=2.8, wall=37686
2021-06-19 05:07:02 | INFO | train_inner | epoch 002: 307 / 3002 loss=2.863, ppl=7.27, wps=5835.6, ups=0.09, wpb=64855, bsz=128, num_updates=3285, lr=9.99817e-05, gnorm=2.353, loss_scale=2, train_wall=11, gb_free=2.8, wall=37697
2021-06-19 05:07:14 | INFO | train_inner | epoch 002: 308 / 3002 loss=3.113, ppl=8.65, wps=5762.1, ups=0.09, wpb=64717, bsz=128, num_updates=3286, lr=9.99817e-05, gnorm=2.623, loss_scale=2, train_wall=11, gb_free=2.8, wall=37708
2021-06-19 05:07:25 | INFO | train_inner | epoch 002: 309 / 3002 loss=2.933, ppl=7.64, wps=5762.3, ups=0.09, wpb=64711, bsz=128, num_updates=3287, lr=9.99817e-05, gnorm=2.426, loss_scale=2, train_wall=11, gb_free=2.8, wall=37719
2021-06-19 05:07:36 | INFO | train_inner | epoch 002: 310 / 3002 loss=2.795, ppl=6.94, wps=5836.5, ups=0.09, wpb=64882, bsz=128, num_updates=3288, lr=9.99817e-05, gnorm=2.395, loss_scale=2, train_wall=11, gb_free=2.8, wall=37730
2021-06-19 05:07:47 | INFO | train_inner | epoch 002: 311 / 3002 loss=2.93, ppl=7.62, wps=5940.3, ups=0.09, wpb=64832, bsz=128, num_updates=3289, lr=9.99817e-05, gnorm=2.376, loss_scale=2, train_wall=10, gb_free=2.8, wall=37741
2021-06-19 05:07:58 | INFO | train_inner | epoch 002: 312 / 3002 loss=2.928, ppl=7.61, wps=5877.5, ups=0.09, wpb=64814, bsz=128, num_updates=3290, lr=9.99817e-05, gnorm=2.364, loss_scale=2, train_wall=11, gb_free=2.8, wall=37752
2021-06-19 05:08:09 | INFO | train_inner | epoch 002: 313 / 3002 loss=2.967, ppl=7.82, wps=5877.2, ups=0.09, wpb=64795, bsz=128, num_updates=3291, lr=9.99817e-05, gnorm=2.612, loss_scale=2, train_wall=11, gb_free=2.8, wall=37763
2021-06-19 05:08:20 | INFO | train_inner | epoch 002: 314 / 3002 loss=2.812, ppl=7.02, wps=5742, ups=0.09, wpb=64773, bsz=128, num_updates=3292, lr=9.99817e-05, gnorm=2.621, loss_scale=2, train_wall=11, gb_free=2.8, wall=37775
2021-06-19 05:08:31 | INFO | train_inner | epoch 002: 315 / 3002 loss=2.852, ppl=7.22, wps=5897.9, ups=0.09, wpb=64840, bsz=128, num_updates=3293, lr=9.99817e-05, gnorm=2.597, loss_scale=2, train_wall=11, gb_free=2.8, wall=37786
2021-06-19 05:08:42 | INFO | train_inner | epoch 002: 316 / 3002 loss=2.782, ppl=6.88, wps=5786.2, ups=0.09, wpb=64854, bsz=128, num_updates=3294, lr=9.99816e-05, gnorm=2.547, loss_scale=2, train_wall=11, gb_free=2.8, wall=37797
2021-06-19 05:08:54 | INFO | train_inner | epoch 002: 317 / 3002 loss=3.025, ppl=8.14, wps=5877.5, ups=0.09, wpb=64841, bsz=128, num_updates=3295, lr=9.99816e-05, gnorm=2.393, loss_scale=2, train_wall=11, gb_free=2.8, wall=37808
2021-06-19 05:09:05 | INFO | train_inner | epoch 002: 318 / 3002 loss=2.691, ppl=6.46, wps=5817.9, ups=0.09, wpb=64845, bsz=128, num_updates=3296, lr=9.99816e-05, gnorm=3.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=37819
2021-06-19 05:09:16 | INFO | train_inner | epoch 002: 319 / 3002 loss=2.759, ppl=6.77, wps=5742.9, ups=0.09, wpb=64831, bsz=128, num_updates=3297, lr=9.99816e-05, gnorm=2.481, loss_scale=2, train_wall=11, gb_free=2.8, wall=37830
2021-06-19 05:09:27 | INFO | train_inner | epoch 002: 320 / 3002 loss=2.886, ppl=7.39, wps=5835.5, ups=0.09, wpb=64834, bsz=128, num_updates=3298, lr=9.99816e-05, gnorm=2.456, loss_scale=2, train_wall=11, gb_free=2.8, wall=37841
2021-06-19 05:09:38 | INFO | train_inner | epoch 002: 321 / 3002 loss=2.808, ppl=7, wps=5908.5, ups=0.09, wpb=64834, bsz=128, num_updates=3299, lr=9.99816e-05, gnorm=2.716, loss_scale=2, train_wall=10, gb_free=2.8, wall=37852
2021-06-19 05:09:49 | INFO | train_inner | epoch 002: 322 / 3002 loss=2.568, ppl=5.93, wps=5888.2, ups=0.09, wpb=64833, bsz=128, num_updates=3300, lr=9.99816e-05, gnorm=2.572, loss_scale=2, train_wall=11, gb_free=2.8, wall=37863
2021-06-19 05:10:00 | INFO | train_inner | epoch 002: 323 / 3002 loss=2.876, ppl=7.34, wps=5837.5, ups=0.09, wpb=64822, bsz=128, num_updates=3301, lr=9.99816e-05, gnorm=2.44, loss_scale=2, train_wall=11, gb_free=2.8, wall=37874
2021-06-19 05:10:11 | INFO | train_inner | epoch 002: 324 / 3002 loss=2.743, ppl=6.69, wps=5823.1, ups=0.09, wpb=64798, bsz=128, num_updates=3302, lr=9.99816e-05, gnorm=2.409, loss_scale=4, train_wall=11, gb_free=2.8, wall=37886
2021-06-19 05:10:23 | INFO | train_inner | epoch 002: 325 / 3002 loss=2.816, ppl=7.04, wps=5760.2, ups=0.09, wpb=64827, bsz=128, num_updates=3303, lr=9.99816e-05, gnorm=2.376, loss_scale=4, train_wall=11, gb_free=2.8, wall=37897
2021-06-19 05:10:34 | INFO | train_inner | epoch 002: 326 / 3002 loss=2.772, ppl=6.83, wps=5810.2, ups=0.09, wpb=64826, bsz=128, num_updates=3304, lr=9.99816e-05, gnorm=2.865, loss_scale=4, train_wall=11, gb_free=2.8, wall=37908
2021-06-19 05:10:45 | INFO | train_inner | epoch 002: 327 / 3002 loss=2.85, ppl=7.21, wps=5766.1, ups=0.09, wpb=64919, bsz=128, num_updates=3305, lr=9.99816e-05, gnorm=2.377, loss_scale=4, train_wall=11, gb_free=2.8, wall=37919
2021-06-19 05:10:56 | INFO | train_inner | epoch 002: 328 / 3002 loss=2.724, ppl=6.61, wps=5935.1, ups=0.09, wpb=64897, bsz=128, num_updates=3306, lr=9.99816e-05, gnorm=3.409, loss_scale=4, train_wall=10, gb_free=2.8, wall=37930
2021-06-19 05:11:07 | INFO | train_inner | epoch 002: 329 / 3002 loss=2.733, ppl=6.65, wps=6015.9, ups=0.09, wpb=64969, bsz=128, num_updates=3307, lr=9.99815e-05, gnorm=12.542, loss_scale=4, train_wall=10, gb_free=2.8, wall=37941
2021-06-19 05:11:18 | INFO | train_inner | epoch 002: 330 / 3002 loss=2.803, ppl=6.98, wps=5840.9, ups=0.09, wpb=64825, bsz=128, num_updates=3308, lr=9.99815e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=37952
2021-06-19 05:11:29 | INFO | train_inner | epoch 002: 331 / 3002 loss=2.884, ppl=7.38, wps=5873.7, ups=0.09, wpb=64794, bsz=128, num_updates=3309, lr=9.99815e-05, gnorm=2.399, loss_scale=4, train_wall=11, gb_free=2.8, wall=37963
2021-06-19 05:11:40 | INFO | train_inner | epoch 002: 332 / 3002 loss=2.903, ppl=7.48, wps=5824.6, ups=0.09, wpb=64725, bsz=128, num_updates=3310, lr=9.99815e-05, gnorm=2.529, loss_scale=4, train_wall=11, gb_free=2.8, wall=37974
2021-06-19 05:11:51 | INFO | train_inner | epoch 002: 333 / 3002 loss=2.879, ppl=7.36, wps=5773.3, ups=0.09, wpb=64853, bsz=128, num_updates=3311, lr=9.99815e-05, gnorm=2.509, loss_scale=4, train_wall=11, gb_free=2.8, wall=37985
2021-06-19 05:12:02 | INFO | train_inner | epoch 002: 334 / 3002 loss=2.907, ppl=7.5, wps=5907.7, ups=0.09, wpb=64793, bsz=128, num_updates=3312, lr=9.99815e-05, gnorm=3.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=37996
2021-06-19 05:12:13 | INFO | train_inner | epoch 002: 335 / 3002 loss=2.803, ppl=6.98, wps=5816.9, ups=0.09, wpb=64917, bsz=128, num_updates=3313, lr=9.99815e-05, gnorm=2.493, loss_scale=4, train_wall=11, gb_free=2.8, wall=38008
2021-06-19 05:12:24 | INFO | train_inner | epoch 002: 336 / 3002 loss=2.805, ppl=6.99, wps=5901.9, ups=0.09, wpb=64784, bsz=128, num_updates=3314, lr=9.99815e-05, gnorm=3.498, loss_scale=4, train_wall=11, gb_free=2.8, wall=38019
2021-06-19 05:12:35 | INFO | train_inner | epoch 002: 337 / 3002 loss=2.864, ppl=7.28, wps=5832.6, ups=0.09, wpb=64790, bsz=128, num_updates=3315, lr=9.99815e-05, gnorm=5.389, loss_scale=4, train_wall=11, gb_free=2.8, wall=38030
2021-06-19 05:12:46 | INFO | train_inner | epoch 002: 338 / 3002 loss=2.91, ppl=7.52, wps=5860.8, ups=0.09, wpb=64859, bsz=128, num_updates=3316, lr=9.99815e-05, gnorm=2.3, loss_scale=4, train_wall=11, gb_free=2.8, wall=38041
2021-06-19 05:12:57 | INFO | train_inner | epoch 002: 339 / 3002 loss=2.768, ppl=6.81, wps=5898.5, ups=0.09, wpb=64814, bsz=128, num_updates=3317, lr=9.99815e-05, gnorm=2.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=38052
2021-06-19 05:13:09 | INFO | train_inner | epoch 002: 340 / 3002 loss=2.909, ppl=7.51, wps=5825, ups=0.09, wpb=64888, bsz=128, num_updates=3318, lr=9.99815e-05, gnorm=2.579, loss_scale=4, train_wall=11, gb_free=2.8, wall=38063
2021-06-19 05:13:20 | INFO | train_inner | epoch 002: 341 / 3002 loss=2.877, ppl=7.35, wps=5867.9, ups=0.09, wpb=64888, bsz=128, num_updates=3319, lr=9.99814e-05, gnorm=3.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=38074
2021-06-19 05:13:31 | INFO | train_inner | epoch 002: 342 / 3002 loss=2.79, ppl=6.92, wps=5926, ups=0.09, wpb=64893, bsz=128, num_updates=3320, lr=9.99814e-05, gnorm=2.604, loss_scale=4, train_wall=10, gb_free=2.8, wall=38085
2021-06-19 05:13:42 | INFO | train_inner | epoch 002: 343 / 3002 loss=2.815, ppl=7.04, wps=5855.5, ups=0.09, wpb=64821, bsz=128, num_updates=3321, lr=9.99814e-05, gnorm=2.594, loss_scale=4, train_wall=11, gb_free=2.8, wall=38096
2021-06-19 05:13:53 | INFO | train_inner | epoch 002: 344 / 3002 loss=2.816, ppl=7.04, wps=5819.1, ups=0.09, wpb=64861, bsz=128, num_updates=3322, lr=9.99814e-05, gnorm=2.77, loss_scale=4, train_wall=11, gb_free=2.8, wall=38107
2021-06-19 05:14:04 | INFO | train_inner | epoch 002: 345 / 3002 loss=2.738, ppl=6.67, wps=5896.8, ups=0.09, wpb=64876, bsz=128, num_updates=3323, lr=9.99814e-05, gnorm=2.556, loss_scale=4, train_wall=11, gb_free=2.8, wall=38118
2021-06-19 05:14:15 | INFO | train_inner | epoch 002: 346 / 3002 loss=2.787, ppl=6.9, wps=5978.6, ups=0.09, wpb=64889, bsz=128, num_updates=3324, lr=9.99814e-05, gnorm=6.18, loss_scale=4, train_wall=10, gb_free=2.8, wall=38129
2021-06-19 05:14:26 | INFO | train_inner | epoch 002: 347 / 3002 loss=2.848, ppl=7.2, wps=5786.7, ups=0.09, wpb=64840, bsz=128, num_updates=3325, lr=9.99814e-05, gnorm=3.351, loss_scale=4, train_wall=11, gb_free=2.8, wall=38140
2021-06-19 05:14:37 | INFO | train_inner | epoch 002: 348 / 3002 loss=2.964, ppl=7.81, wps=5904.6, ups=0.09, wpb=64868, bsz=128, num_updates=3326, lr=9.99814e-05, gnorm=2.563, loss_scale=4, train_wall=11, gb_free=2.8, wall=38151
2021-06-19 05:14:48 | INFO | train_inner | epoch 002: 349 / 3002 loss=2.791, ppl=6.92, wps=5724.2, ups=0.09, wpb=64777, bsz=128, num_updates=3327, lr=9.99814e-05, gnorm=2.864, loss_scale=4, train_wall=11, gb_free=2.8, wall=38162
2021-06-19 05:14:59 | INFO | train_inner | epoch 002: 350 / 3002 loss=2.769, ppl=6.82, wps=5760.7, ups=0.09, wpb=64831, bsz=128, num_updates=3328, lr=9.99814e-05, gnorm=3.65, loss_scale=4, train_wall=11, gb_free=2.8, wall=38174
2021-06-19 05:15:11 | INFO | train_inner | epoch 002: 351 / 3002 loss=2.884, ppl=7.38, wps=5795.2, ups=0.09, wpb=64837, bsz=128, num_updates=3329, lr=9.99814e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=38185
2021-06-19 05:15:22 | INFO | train_inner | epoch 002: 352 / 3002 loss=2.799, ppl=6.96, wps=5871.1, ups=0.09, wpb=64892, bsz=128, num_updates=3330, lr=9.99814e-05, gnorm=2.611, loss_scale=4, train_wall=11, gb_free=2.8, wall=38196
2021-06-19 05:15:33 | INFO | train_inner | epoch 002: 353 / 3002 loss=2.811, ppl=7.02, wps=5822.2, ups=0.09, wpb=64871, bsz=128, num_updates=3331, lr=9.99814e-05, gnorm=2.673, loss_scale=4, train_wall=11, gb_free=2.8, wall=38207
2021-06-19 05:15:44 | INFO | train_inner | epoch 002: 354 / 3002 loss=2.923, ppl=7.58, wps=5935.2, ups=0.09, wpb=64862, bsz=128, num_updates=3332, lr=9.99813e-05, gnorm=2.406, loss_scale=4, train_wall=10, gb_free=2.8, wall=38218
2021-06-19 05:15:55 | INFO | train_inner | epoch 002: 355 / 3002 loss=2.799, ppl=6.96, wps=5817.4, ups=0.09, wpb=64838, bsz=128, num_updates=3333, lr=9.99813e-05, gnorm=2.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=38229
2021-06-19 05:16:06 | INFO | train_inner | epoch 002: 356 / 3002 loss=2.841, ppl=7.17, wps=5922.1, ups=0.09, wpb=64845, bsz=128, num_updates=3334, lr=9.99813e-05, gnorm=3.596, loss_scale=4, train_wall=10, gb_free=2.8, wall=38240
2021-06-19 05:16:17 | INFO | train_inner | epoch 002: 357 / 3002 loss=2.921, ppl=7.57, wps=5870.3, ups=0.09, wpb=64897, bsz=128, num_updates=3335, lr=9.99813e-05, gnorm=2.384, loss_scale=4, train_wall=11, gb_free=2.8, wall=38251
2021-06-19 05:16:28 | INFO | train_inner | epoch 002: 358 / 3002 loss=2.878, ppl=7.35, wps=5930.1, ups=0.09, wpb=64803, bsz=128, num_updates=3336, lr=9.99813e-05, gnorm=3.643, loss_scale=4, train_wall=10, gb_free=2.8, wall=38262
2021-06-19 05:16:39 | INFO | train_inner | epoch 002: 359 / 3002 loss=2.909, ppl=7.51, wps=5862.4, ups=0.09, wpb=64834, bsz=128, num_updates=3337, lr=9.99813e-05, gnorm=5.774, loss_scale=4, train_wall=11, gb_free=2.8, wall=38273
2021-06-19 05:16:50 | INFO | train_inner | epoch 002: 360 / 3002 loss=2.751, ppl=6.73, wps=5872.7, ups=0.09, wpb=64815, bsz=128, num_updates=3338, lr=9.99813e-05, gnorm=3.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=38284
2021-06-19 05:17:01 | INFO | train_inner | epoch 002: 361 / 3002 loss=2.87, ppl=7.31, wps=5856, ups=0.09, wpb=64775, bsz=128, num_updates=3339, lr=9.99813e-05, gnorm=5.31, loss_scale=4, train_wall=11, gb_free=2.8, wall=38295
2021-06-19 05:17:12 | INFO | train_inner | epoch 002: 362 / 3002 loss=2.804, ppl=6.98, wps=5872.9, ups=0.09, wpb=64823, bsz=128, num_updates=3340, lr=9.99813e-05, gnorm=2.378, loss_scale=4, train_wall=11, gb_free=2.8, wall=38306
2021-06-19 05:17:23 | INFO | train_inner | epoch 002: 363 / 3002 loss=2.992, ppl=7.96, wps=5857.2, ups=0.09, wpb=64738, bsz=128, num_updates=3341, lr=9.99813e-05, gnorm=3.367, loss_scale=4, train_wall=11, gb_free=2.8, wall=38317
2021-06-19 05:17:34 | INFO | train_inner | epoch 002: 364 / 3002 loss=2.807, ppl=7, wps=5845.3, ups=0.09, wpb=64862, bsz=128, num_updates=3342, lr=9.99813e-05, gnorm=2.467, loss_scale=4, train_wall=11, gb_free=2.8, wall=38328
2021-06-19 05:17:45 | INFO | train_inner | epoch 002: 365 / 3002 loss=2.828, ppl=7.1, wps=5787.9, ups=0.09, wpb=64799, bsz=128, num_updates=3343, lr=9.99813e-05, gnorm=2.563, loss_scale=4, train_wall=11, gb_free=2.8, wall=38340
2021-06-19 05:17:57 | INFO | train_inner | epoch 002: 366 / 3002 loss=2.984, ppl=7.91, wps=5793.7, ups=0.09, wpb=64843, bsz=128, num_updates=3344, lr=9.99812e-05, gnorm=2.51, loss_scale=4, train_wall=11, gb_free=2.8, wall=38351
2021-06-19 05:18:08 | INFO | train_inner | epoch 002: 367 / 3002 loss=2.875, ppl=7.34, wps=5811.8, ups=0.09, wpb=64821, bsz=128, num_updates=3345, lr=9.99812e-05, gnorm=2.394, loss_scale=4, train_wall=11, gb_free=2.8, wall=38362
2021-06-19 05:18:19 | INFO | train_inner | epoch 002: 368 / 3002 loss=2.901, ppl=7.47, wps=5845.2, ups=0.09, wpb=64801, bsz=128, num_updates=3346, lr=9.99812e-05, gnorm=2.374, loss_scale=4, train_wall=11, gb_free=2.8, wall=38373
2021-06-19 05:18:30 | INFO | train_inner | epoch 002: 369 / 3002 loss=2.836, ppl=7.14, wps=5815.8, ups=0.09, wpb=64847, bsz=128, num_updates=3347, lr=9.99812e-05, gnorm=2.347, loss_scale=4, train_wall=11, gb_free=2.8, wall=38384
2021-06-19 05:18:41 | INFO | train_inner | epoch 002: 370 / 3002 loss=2.905, ppl=7.49, wps=5817.9, ups=0.09, wpb=64830, bsz=128, num_updates=3348, lr=9.99812e-05, gnorm=2.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=38395
2021-06-19 05:18:52 | INFO | train_inner | epoch 002: 371 / 3002 loss=2.936, ppl=7.65, wps=5756.2, ups=0.09, wpb=64841, bsz=128, num_updates=3349, lr=9.99812e-05, gnorm=2.71, loss_scale=4, train_wall=11, gb_free=2.8, wall=38407
2021-06-19 05:19:03 | INFO | train_inner | epoch 002: 372 / 3002 loss=2.814, ppl=7.03, wps=5890.9, ups=0.09, wpb=64872, bsz=128, num_updates=3350, lr=9.99812e-05, gnorm=2.403, loss_scale=4, train_wall=11, gb_free=2.8, wall=38418
2021-06-19 05:19:14 | INFO | train_inner | epoch 002: 373 / 3002 loss=2.914, ppl=7.54, wps=5845.1, ups=0.09, wpb=64786, bsz=128, num_updates=3351, lr=9.99812e-05, gnorm=2.483, loss_scale=4, train_wall=11, gb_free=2.8, wall=38429
2021-06-19 05:19:25 | INFO | train_inner | epoch 002: 374 / 3002 loss=2.627, ppl=6.18, wps=5923.6, ups=0.09, wpb=64883, bsz=128, num_updates=3352, lr=9.99812e-05, gnorm=2.351, loss_scale=4, train_wall=10, gb_free=2.8, wall=38440
2021-06-19 05:19:36 | INFO | train_inner | epoch 002: 375 / 3002 loss=2.898, ppl=7.45, wps=5946.6, ups=0.09, wpb=64846, bsz=128, num_updates=3353, lr=9.99812e-05, gnorm=6.393, loss_scale=4, train_wall=10, gb_free=2.8, wall=38451
2021-06-19 05:19:47 | INFO | train_inner | epoch 002: 376 / 3002 loss=3.132, ppl=8.77, wps=5889.1, ups=0.09, wpb=64842, bsz=128, num_updates=3354, lr=9.99812e-05, gnorm=2.502, loss_scale=4, train_wall=11, gb_free=2.8, wall=38462
2021-06-19 05:19:58 | INFO | train_inner | epoch 002: 377 / 3002 loss=2.913, ppl=7.53, wps=5840.7, ups=0.09, wpb=64813, bsz=128, num_updates=3355, lr=9.99812e-05, gnorm=2.364, loss_scale=4, train_wall=11, gb_free=2.8, wall=38473
2021-06-19 05:20:10 | INFO | train_inner | epoch 002: 378 / 3002 loss=2.819, ppl=7.06, wps=5770.6, ups=0.09, wpb=64729, bsz=128, num_updates=3356, lr=9.99812e-05, gnorm=2.726, loss_scale=4, train_wall=11, gb_free=2.8, wall=38484
2021-06-19 05:20:21 | INFO | train_inner | epoch 002: 379 / 3002 loss=2.954, ppl=7.75, wps=5747.2, ups=0.09, wpb=64729, bsz=128, num_updates=3357, lr=9.99811e-05, gnorm=9.456, loss_scale=4, train_wall=11, gb_free=2.8, wall=38495
2021-06-19 05:20:32 | INFO | train_inner | epoch 002: 380 / 3002 loss=2.625, ppl=6.17, wps=5833.4, ups=0.09, wpb=64914, bsz=128, num_updates=3358, lr=9.99811e-05, gnorm=2.732, loss_scale=4, train_wall=11, gb_free=2.8, wall=38506
2021-06-19 05:20:43 | INFO | train_inner | epoch 002: 381 / 3002 loss=2.854, ppl=7.23, wps=5791, ups=0.09, wpb=64783, bsz=128, num_updates=3359, lr=9.99811e-05, gnorm=3.058, loss_scale=4, train_wall=11, gb_free=2.8, wall=38518
2021-06-19 05:20:54 | INFO | train_inner | epoch 002: 382 / 3002 loss=2.752, ppl=6.74, wps=5855.1, ups=0.09, wpb=64867, bsz=128, num_updates=3360, lr=9.99811e-05, gnorm=2.624, loss_scale=4, train_wall=11, gb_free=2.8, wall=38529
2021-06-19 05:21:05 | INFO | train_inner | epoch 002: 383 / 3002 loss=2.922, ppl=7.58, wps=5867.8, ups=0.09, wpb=64900, bsz=128, num_updates=3361, lr=9.99811e-05, gnorm=2.394, loss_scale=4, train_wall=11, gb_free=2.8, wall=38540
2021-06-19 05:21:17 | INFO | train_inner | epoch 002: 384 / 3002 loss=2.789, ppl=6.91, wps=5792.3, ups=0.09, wpb=64849, bsz=128, num_updates=3362, lr=9.99811e-05, gnorm=12.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=38551
2021-06-19 05:21:28 | INFO | train_inner | epoch 002: 385 / 3002 loss=2.886, ppl=7.39, wps=5767.8, ups=0.09, wpb=64861, bsz=128, num_updates=3363, lr=9.99811e-05, gnorm=2.317, loss_scale=4, train_wall=11, gb_free=2.8, wall=38562
2021-06-19 05:21:39 | INFO | train_inner | epoch 002: 386 / 3002 loss=2.854, ppl=7.23, wps=5682.2, ups=0.09, wpb=64857, bsz=128, num_updates=3364, lr=9.99811e-05, gnorm=2.519, loss_scale=4, train_wall=11, gb_free=2.8, wall=38574
2021-06-19 05:21:50 | INFO | train_inner | epoch 002: 387 / 3002 loss=2.85, ppl=7.21, wps=5773.4, ups=0.09, wpb=64851, bsz=128, num_updates=3365, lr=9.99811e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=38585
2021-06-19 05:22:02 | INFO | train_inner | epoch 002: 388 / 3002 loss=2.783, ppl=6.88, wps=5844.4, ups=0.09, wpb=64818, bsz=128, num_updates=3366, lr=9.99811e-05, gnorm=2.61, loss_scale=4, train_wall=11, gb_free=2.8, wall=38596
2021-06-19 05:22:13 | INFO | train_inner | epoch 002: 389 / 3002 loss=2.843, ppl=7.18, wps=5772.5, ups=0.09, wpb=64871, bsz=128, num_updates=3367, lr=9.99811e-05, gnorm=2.447, loss_scale=4, train_wall=11, gb_free=2.8, wall=38607
2021-06-19 05:22:24 | INFO | train_inner | epoch 002: 390 / 3002 loss=2.937, ppl=7.66, wps=5787.1, ups=0.09, wpb=64832, bsz=128, num_updates=3368, lr=9.99811e-05, gnorm=2.438, loss_scale=4, train_wall=11, gb_free=2.8, wall=38618
2021-06-19 05:22:35 | INFO | train_inner | epoch 002: 391 / 3002 loss=2.893, ppl=7.43, wps=5739, ups=0.09, wpb=64764, bsz=128, num_updates=3369, lr=9.9981e-05, gnorm=2.66, loss_scale=4, train_wall=11, gb_free=2.8, wall=38630
2021-06-19 05:22:46 | INFO | train_inner | epoch 002: 392 / 3002 loss=3.023, ppl=8.13, wps=5848.6, ups=0.09, wpb=64760, bsz=128, num_updates=3370, lr=9.9981e-05, gnorm=2.491, loss_scale=4, train_wall=11, gb_free=2.8, wall=38641
2021-06-19 05:22:57 | INFO | train_inner | epoch 002: 393 / 3002 loss=2.944, ppl=7.69, wps=5913.4, ups=0.09, wpb=64880, bsz=128, num_updates=3371, lr=9.9981e-05, gnorm=2.477, loss_scale=4, train_wall=10, gb_free=2.8, wall=38652
2021-06-19 05:23:08 | INFO | train_inner | epoch 002: 394 / 3002 loss=2.83, ppl=7.11, wps=5930.5, ups=0.09, wpb=64847, bsz=128, num_updates=3372, lr=9.9981e-05, gnorm=2.401, loss_scale=4, train_wall=10, gb_free=2.8, wall=38663
2021-06-19 05:23:19 | INFO | train_inner | epoch 002: 395 / 3002 loss=2.805, ppl=6.99, wps=5890.3, ups=0.09, wpb=64849, bsz=128, num_updates=3373, lr=9.9981e-05, gnorm=2.846, loss_scale=4, train_wall=11, gb_free=2.8, wall=38674
2021-06-19 05:23:31 | INFO | train_inner | epoch 002: 396 / 3002 loss=2.713, ppl=6.55, wps=5673.6, ups=0.09, wpb=64769, bsz=128, num_updates=3374, lr=9.9981e-05, gnorm=2.477, loss_scale=4, train_wall=11, gb_free=2.8, wall=38685
2021-06-19 05:23:42 | INFO | train_inner | epoch 002: 397 / 3002 loss=2.834, ppl=7.13, wps=5704.5, ups=0.09, wpb=64669, bsz=128, num_updates=3375, lr=9.9981e-05, gnorm=2.462, loss_scale=4, train_wall=11, gb_free=2.8, wall=38696
2021-06-19 05:23:53 | INFO | train_inner | epoch 002: 398 / 3002 loss=2.825, ppl=7.09, wps=5878.4, ups=0.09, wpb=64918, bsz=128, num_updates=3376, lr=9.9981e-05, gnorm=2.569, loss_scale=4, train_wall=11, gb_free=2.8, wall=38707
2021-06-19 05:24:04 | INFO | train_inner | epoch 002: 399 / 3002 loss=2.727, ppl=6.62, wps=5792.7, ups=0.09, wpb=64881, bsz=128, num_updates=3377, lr=9.9981e-05, gnorm=2.427, loss_scale=4, train_wall=11, gb_free=2.8, wall=38719
2021-06-19 05:24:15 | INFO | train_inner | epoch 002: 400 / 3002 loss=2.656, ppl=6.3, wps=5935.3, ups=0.09, wpb=64929, bsz=128, num_updates=3378, lr=9.9981e-05, gnorm=3.606, loss_scale=4, train_wall=10, gb_free=2.8, wall=38730
2021-06-19 05:24:26 | INFO | train_inner | epoch 002: 401 / 3002 loss=2.772, ppl=6.83, wps=5758.7, ups=0.09, wpb=64779, bsz=128, num_updates=3379, lr=9.9981e-05, gnorm=4.599, loss_scale=4, train_wall=11, gb_free=2.8, wall=38741
2021-06-19 05:24:37 | INFO | train_inner | epoch 002: 402 / 3002 loss=2.819, ppl=7.06, wps=5859.6, ups=0.09, wpb=64830, bsz=128, num_updates=3380, lr=9.9981e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=38752
2021-06-19 05:24:49 | INFO | train_inner | epoch 002: 403 / 3002 loss=2.929, ppl=7.61, wps=5824.7, ups=0.09, wpb=64750, bsz=128, num_updates=3381, lr=9.9981e-05, gnorm=2.677, loss_scale=4, train_wall=11, gb_free=2.8, wall=38763
2021-06-19 05:25:00 | INFO | train_inner | epoch 002: 404 / 3002 loss=2.684, ppl=6.43, wps=5764.7, ups=0.09, wpb=64836, bsz=128, num_updates=3382, lr=9.99809e-05, gnorm=2.561, loss_scale=4, train_wall=11, gb_free=2.8, wall=38774
2021-06-19 05:25:11 | INFO | train_inner | epoch 002: 405 / 3002 loss=2.872, ppl=7.32, wps=5795.3, ups=0.09, wpb=64880, bsz=128, num_updates=3383, lr=9.99809e-05, gnorm=5.759, loss_scale=4, train_wall=11, gb_free=2.8, wall=38785
2021-06-19 05:25:22 | INFO | train_inner | epoch 002: 406 / 3002 loss=2.836, ppl=7.14, wps=5830.1, ups=0.09, wpb=64808, bsz=128, num_updates=3384, lr=9.99809e-05, gnorm=2.501, loss_scale=4, train_wall=11, gb_free=2.8, wall=38796
2021-06-19 05:25:33 | INFO | train_inner | epoch 002: 407 / 3002 loss=2.894, ppl=7.43, wps=5829.4, ups=0.09, wpb=64857, bsz=128, num_updates=3385, lr=9.99809e-05, gnorm=7.51, loss_scale=4, train_wall=11, gb_free=2.8, wall=38808
2021-06-19 05:25:44 | INFO | train_inner | epoch 002: 408 / 3002 loss=2.922, ppl=7.58, wps=5894.5, ups=0.09, wpb=64824, bsz=128, num_updates=3386, lr=9.99809e-05, gnorm=12.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=38819
2021-06-19 05:25:55 | INFO | train_inner | epoch 002: 409 / 3002 loss=2.772, ppl=6.83, wps=5828.8, ups=0.09, wpb=64825, bsz=128, num_updates=3387, lr=9.99809e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=38830
2021-06-19 05:26:06 | INFO | train_inner | epoch 002: 410 / 3002 loss=2.875, ppl=7.34, wps=5858.1, ups=0.09, wpb=64874, bsz=128, num_updates=3388, lr=9.99809e-05, gnorm=2.719, loss_scale=4, train_wall=11, gb_free=2.8, wall=38841
2021-06-19 05:26:17 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 05:26:28 | INFO | train_inner | epoch 002: 412 / 3002 loss=2.924, ppl=7.59, wps=2958.8, ups=0.05, wpb=64889, bsz=128, num_updates=3389, lr=9.99809e-05, gnorm=3.039, loss_scale=2, train_wall=21, gb_free=2.8, wall=38863
2021-06-19 05:26:40 | INFO | train_inner | epoch 002: 413 / 3002 loss=2.859, ppl=7.26, wps=5827, ups=0.09, wpb=64761, bsz=128, num_updates=3390, lr=9.99809e-05, gnorm=2.546, loss_scale=2, train_wall=11, gb_free=2.8, wall=38874
2021-06-19 05:26:51 | INFO | train_inner | epoch 002: 414 / 3002 loss=2.929, ppl=7.62, wps=5783, ups=0.09, wpb=64741, bsz=128, num_updates=3391, lr=9.99809e-05, gnorm=3.452, loss_scale=2, train_wall=11, gb_free=2.8, wall=38885
2021-06-19 05:27:02 | INFO | train_inner | epoch 002: 415 / 3002 loss=2.948, ppl=7.72, wps=5700.5, ups=0.09, wpb=64765, bsz=128, num_updates=3392, lr=9.99809e-05, gnorm=2.485, loss_scale=2, train_wall=11, gb_free=2.8, wall=38896
2021-06-19 05:27:13 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-19 05:27:24 | INFO | train_inner | epoch 002: 417 / 3002 loss=2.83, ppl=7.11, wps=2934.1, ups=0.05, wpb=64829, bsz=128, num_updates=3393, lr=9.99809e-05, gnorm=4.484, loss_scale=1, train_wall=21, gb_free=2.8, wall=38919
2021-06-19 05:27:35 | INFO | train_inner | epoch 002: 418 / 3002 loss=2.769, ppl=6.82, wps=5829.1, ups=0.09, wpb=64892, bsz=128, num_updates=3394, lr=9.99808e-05, gnorm=2.725, loss_scale=1, train_wall=11, gb_free=2.8, wall=38930
2021-06-19 05:27:46 | INFO | train_inner | epoch 002: 419 / 3002 loss=3.001, ppl=8.01, wps=5913.2, ups=0.09, wpb=64943, bsz=128, num_updates=3395, lr=9.99808e-05, gnorm=2.407, loss_scale=1, train_wall=11, gb_free=2.8, wall=38941
2021-06-19 05:27:57 | INFO | train_inner | epoch 002: 420 / 3002 loss=2.695, ppl=6.48, wps=5801.2, ups=0.09, wpb=64924, bsz=128, num_updates=3396, lr=9.99808e-05, gnorm=2.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=38952
2021-06-19 05:28:08 | INFO | train_inner | epoch 002: 421 / 3002 loss=2.835, ppl=7.13, wps=5880.9, ups=0.09, wpb=64769, bsz=128, num_updates=3397, lr=9.99808e-05, gnorm=2.422, loss_scale=1, train_wall=11, gb_free=2.8, wall=38963
2021-06-19 05:28:19 | INFO | train_inner | epoch 002: 422 / 3002 loss=2.96, ppl=7.78, wps=5913.3, ups=0.09, wpb=64849, bsz=128, num_updates=3398, lr=9.99808e-05, gnorm=2.405, loss_scale=1, train_wall=10, gb_free=2.8, wall=38974
2021-06-19 05:28:30 | INFO | train_inner | epoch 002: 423 / 3002 loss=2.771, ppl=6.83, wps=5902.2, ups=0.09, wpb=64844, bsz=128, num_updates=3399, lr=9.99808e-05, gnorm=2.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=38985
2021-06-19 05:28:42 | INFO | train_inner | epoch 002: 424 / 3002 loss=2.907, ppl=7.5, wps=5817.3, ups=0.09, wpb=64737, bsz=128, num_updates=3400, lr=9.99808e-05, gnorm=2.498, loss_scale=1, train_wall=11, gb_free=2.8, wall=38996
2021-06-19 05:28:53 | INFO | train_inner | epoch 002: 425 / 3002 loss=2.905, ppl=7.49, wps=5894.1, ups=0.09, wpb=64836, bsz=128, num_updates=3401, lr=9.99808e-05, gnorm=2.337, loss_scale=1, train_wall=11, gb_free=2.8, wall=39007
2021-06-19 05:29:04 | INFO | train_inner | epoch 002: 426 / 3002 loss=2.733, ppl=6.65, wps=5931.7, ups=0.09, wpb=64822, bsz=128, num_updates=3402, lr=9.99808e-05, gnorm=2.377, loss_scale=1, train_wall=10, gb_free=2.8, wall=39018
2021-06-19 05:29:15 | INFO | train_inner | epoch 002: 427 / 3002 loss=2.843, ppl=7.18, wps=5849.8, ups=0.09, wpb=64772, bsz=128, num_updates=3403, lr=9.99808e-05, gnorm=2.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=39029
2021-06-19 05:29:26 | INFO | train_inner | epoch 002: 428 / 3002 loss=2.659, ppl=6.32, wps=5783.3, ups=0.09, wpb=64845, bsz=128, num_updates=3404, lr=9.99808e-05, gnorm=2.436, loss_scale=1, train_wall=11, gb_free=2.8, wall=39040
2021-06-19 05:29:37 | INFO | train_inner | epoch 002: 429 / 3002 loss=2.84, ppl=7.16, wps=5981.5, ups=0.09, wpb=64818, bsz=128, num_updates=3405, lr=9.99808e-05, gnorm=2.765, loss_scale=1, train_wall=10, gb_free=2.8, wall=39051
2021-06-19 05:29:47 | INFO | train_inner | epoch 002: 430 / 3002 loss=2.739, ppl=6.68, wps=6011.6, ups=0.09, wpb=64795, bsz=128, num_updates=3406, lr=9.99808e-05, gnorm=2.51, loss_scale=1, train_wall=10, gb_free=2.8, wall=39062
2021-06-19 05:29:58 | INFO | train_inner | epoch 002: 431 / 3002 loss=2.702, ppl=6.51, wps=5867.5, ups=0.09, wpb=64820, bsz=128, num_updates=3407, lr=9.99807e-05, gnorm=2.445, loss_scale=1, train_wall=11, gb_free=2.8, wall=39073
2021-06-19 05:30:09 | INFO | train_inner | epoch 002: 432 / 3002 loss=2.699, ppl=6.49, wps=5942.5, ups=0.09, wpb=64854, bsz=128, num_updates=3408, lr=9.99807e-05, gnorm=3.444, loss_scale=1, train_wall=10, gb_free=2.8, wall=39084
2021-06-19 05:30:20 | INFO | train_inner | epoch 002: 433 / 3002 loss=2.907, ppl=7.5, wps=5871.2, ups=0.09, wpb=64807, bsz=128, num_updates=3409, lr=9.99807e-05, gnorm=2.309, loss_scale=1, train_wall=11, gb_free=2.8, wall=39095
2021-06-19 05:30:31 | INFO | train_inner | epoch 002: 434 / 3002 loss=2.758, ppl=6.76, wps=5985.5, ups=0.09, wpb=64878, bsz=128, num_updates=3410, lr=9.99807e-05, gnorm=2.801, loss_scale=1, train_wall=10, gb_free=2.8, wall=39106
2021-06-19 05:30:42 | INFO | train_inner | epoch 002: 435 / 3002 loss=2.826, ppl=7.09, wps=5820.2, ups=0.09, wpb=64887, bsz=128, num_updates=3411, lr=9.99807e-05, gnorm=2.283, loss_scale=1, train_wall=11, gb_free=2.8, wall=39117
2021-06-19 05:30:54 | INFO | train_inner | epoch 002: 436 / 3002 loss=2.799, ppl=6.96, wps=5820.7, ups=0.09, wpb=64776, bsz=128, num_updates=3412, lr=9.99807e-05, gnorm=2.234, loss_scale=1, train_wall=11, gb_free=2.8, wall=39128
2021-06-19 05:31:05 | INFO | train_inner | epoch 002: 437 / 3002 loss=2.843, ppl=7.18, wps=5795.5, ups=0.09, wpb=64866, bsz=128, num_updates=3413, lr=9.99807e-05, gnorm=2.303, loss_scale=1, train_wall=11, gb_free=2.8, wall=39139
2021-06-19 05:31:16 | INFO | train_inner | epoch 002: 438 / 3002 loss=2.859, ppl=7.25, wps=5822.6, ups=0.09, wpb=64799, bsz=128, num_updates=3414, lr=9.99807e-05, gnorm=14.64, loss_scale=1, train_wall=11, gb_free=2.8, wall=39150
2021-06-19 05:31:27 | INFO | train_inner | epoch 002: 439 / 3002 loss=2.917, ppl=7.55, wps=5741.2, ups=0.09, wpb=64854, bsz=128, num_updates=3415, lr=9.99807e-05, gnorm=2.633, loss_scale=1, train_wall=11, gb_free=2.8, wall=39161
2021-06-19 05:31:38 | INFO | train_inner | epoch 002: 440 / 3002 loss=2.818, ppl=7.05, wps=5878.2, ups=0.09, wpb=64842, bsz=128, num_updates=3416, lr=9.99807e-05, gnorm=2.356, loss_scale=1, train_wall=11, gb_free=2.8, wall=39173
2021-06-19 05:31:49 | INFO | train_inner | epoch 002: 441 / 3002 loss=2.977, ppl=7.87, wps=5856.4, ups=0.09, wpb=64835, bsz=128, num_updates=3417, lr=9.99807e-05, gnorm=2.471, loss_scale=1, train_wall=11, gb_free=2.8, wall=39184
2021-06-19 05:32:01 | INFO | train_inner | epoch 002: 442 / 3002 loss=2.818, ppl=7.05, wps=5752.4, ups=0.09, wpb=64871, bsz=128, num_updates=3418, lr=9.99807e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=39195
2021-06-19 05:32:12 | INFO | train_inner | epoch 002: 443 / 3002 loss=2.901, ppl=7.47, wps=5801.5, ups=0.09, wpb=64876, bsz=128, num_updates=3419, lr=9.99806e-05, gnorm=2.663, loss_scale=1, train_wall=11, gb_free=2.8, wall=39206
2021-06-19 05:32:23 | INFO | train_inner | epoch 002: 444 / 3002 loss=2.804, ppl=6.99, wps=5790, ups=0.09, wpb=64781, bsz=128, num_updates=3420, lr=9.99806e-05, gnorm=2.342, loss_scale=1, train_wall=11, gb_free=2.8, wall=39217
2021-06-19 05:32:34 | INFO | train_inner | epoch 002: 445 / 3002 loss=2.752, ppl=6.74, wps=5760.7, ups=0.09, wpb=64854, bsz=128, num_updates=3421, lr=9.99806e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=39228
2021-06-19 05:32:45 | INFO | train_inner | epoch 002: 446 / 3002 loss=2.874, ppl=7.33, wps=5859.6, ups=0.09, wpb=64783, bsz=128, num_updates=3422, lr=9.99806e-05, gnorm=2.394, loss_scale=1, train_wall=11, gb_free=2.8, wall=39240
2021-06-19 05:32:56 | INFO | train_inner | epoch 002: 447 / 3002 loss=2.745, ppl=6.71, wps=5984.1, ups=0.09, wpb=64787, bsz=128, num_updates=3423, lr=9.99806e-05, gnorm=2.332, loss_scale=1, train_wall=10, gb_free=2.8, wall=39250
2021-06-19 05:33:07 | INFO | train_inner | epoch 002: 448 / 3002 loss=2.835, ppl=7.14, wps=5829.3, ups=0.09, wpb=64843, bsz=128, num_updates=3424, lr=9.99806e-05, gnorm=2.402, loss_scale=1, train_wall=11, gb_free=2.8, wall=39261
2021-06-19 05:33:18 | INFO | train_inner | epoch 002: 449 / 3002 loss=2.759, ppl=6.77, wps=5787.5, ups=0.09, wpb=64816, bsz=128, num_updates=3425, lr=9.99806e-05, gnorm=2.514, loss_scale=1, train_wall=11, gb_free=2.8, wall=39273
2021-06-19 05:33:29 | INFO | train_inner | epoch 002: 450 / 3002 loss=2.767, ppl=6.81, wps=5905, ups=0.09, wpb=64892, bsz=128, num_updates=3426, lr=9.99806e-05, gnorm=2.524, loss_scale=1, train_wall=11, gb_free=2.8, wall=39284
2021-06-19 05:33:40 | INFO | train_inner | epoch 002: 451 / 3002 loss=2.69, ppl=6.45, wps=6039.5, ups=0.09, wpb=64854, bsz=128, num_updates=3427, lr=9.99806e-05, gnorm=2.498, loss_scale=1, train_wall=10, gb_free=2.8, wall=39294
2021-06-19 05:33:51 | INFO | train_inner | epoch 002: 452 / 3002 loss=2.762, ppl=6.79, wps=5864.3, ups=0.09, wpb=64805, bsz=128, num_updates=3428, lr=9.99806e-05, gnorm=2.429, loss_scale=1, train_wall=11, gb_free=2.8, wall=39305
2021-06-19 05:34:02 | INFO | train_inner | epoch 002: 453 / 3002 loss=2.743, ppl=6.69, wps=5920.3, ups=0.09, wpb=64823, bsz=128, num_updates=3429, lr=9.99806e-05, gnorm=2.489, loss_scale=1, train_wall=10, gb_free=2.8, wall=39316
2021-06-19 05:34:13 | INFO | train_inner | epoch 002: 454 / 3002 loss=2.653, ppl=6.29, wps=5777.7, ups=0.09, wpb=64857, bsz=128, num_updates=3430, lr=9.99806e-05, gnorm=2.46, loss_scale=1, train_wall=11, gb_free=2.8, wall=39328
2021-06-19 05:34:24 | INFO | train_inner | epoch 002: 455 / 3002 loss=2.826, ppl=7.09, wps=5937.4, ups=0.09, wpb=64880, bsz=128, num_updates=3431, lr=9.99806e-05, gnorm=2.977, loss_scale=1, train_wall=10, gb_free=2.8, wall=39339
2021-06-19 05:34:35 | INFO | train_inner | epoch 002: 456 / 3002 loss=2.543, ppl=5.83, wps=5854.1, ups=0.09, wpb=64892, bsz=128, num_updates=3432, lr=9.99805e-05, gnorm=3.287, loss_scale=1, train_wall=11, gb_free=2.8, wall=39350
2021-06-19 05:34:47 | INFO | train_inner | epoch 002: 457 / 3002 loss=2.874, ppl=7.33, wps=5779.7, ups=0.09, wpb=64774, bsz=128, num_updates=3433, lr=9.99805e-05, gnorm=2.314, loss_scale=1, train_wall=11, gb_free=2.8, wall=39361
2021-06-19 05:34:57 | INFO | train_inner | epoch 002: 458 / 3002 loss=2.796, ppl=6.94, wps=5916.7, ups=0.09, wpb=64824, bsz=128, num_updates=3434, lr=9.99805e-05, gnorm=3.186, loss_scale=1, train_wall=11, gb_free=2.8, wall=39372
2021-06-19 05:35:08 | INFO | train_inner | epoch 002: 459 / 3002 loss=2.679, ppl=6.4, wps=5965.3, ups=0.09, wpb=64910, bsz=128, num_updates=3435, lr=9.99805e-05, gnorm=2.477, loss_scale=1, train_wall=10, gb_free=2.8, wall=39383
2021-06-19 05:35:19 | INFO | train_inner | epoch 002: 460 / 3002 loss=2.865, ppl=7.29, wps=5880.8, ups=0.09, wpb=64888, bsz=128, num_updates=3436, lr=9.99805e-05, gnorm=2.978, loss_scale=1, train_wall=11, gb_free=2.8, wall=39394
2021-06-19 05:35:30 | INFO | train_inner | epoch 002: 461 / 3002 loss=2.938, ppl=7.67, wps=5952.9, ups=0.09, wpb=64871, bsz=128, num_updates=3437, lr=9.99805e-05, gnorm=7.105, loss_scale=1, train_wall=10, gb_free=2.8, wall=39405
2021-06-19 05:35:42 | INFO | train_inner | epoch 002: 462 / 3002 loss=2.867, ppl=7.3, wps=5747.8, ups=0.09, wpb=64886, bsz=128, num_updates=3438, lr=9.99805e-05, gnorm=2.515, loss_scale=1, train_wall=11, gb_free=2.8, wall=39416
2021-06-19 05:35:53 | INFO | train_inner | epoch 002: 463 / 3002 loss=2.863, ppl=7.28, wps=5861.2, ups=0.09, wpb=64827, bsz=128, num_updates=3439, lr=9.99805e-05, gnorm=2.47, loss_scale=1, train_wall=11, gb_free=2.8, wall=39427
2021-06-19 05:36:04 | INFO | train_inner | epoch 002: 464 / 3002 loss=2.753, ppl=6.74, wps=5847.2, ups=0.09, wpb=64789, bsz=128, num_updates=3440, lr=9.99805e-05, gnorm=2.262, loss_scale=1, train_wall=11, gb_free=2.8, wall=39438
2021-06-19 05:36:15 | INFO | train_inner | epoch 002: 465 / 3002 loss=2.871, ppl=7.31, wps=5855.8, ups=0.09, wpb=64829, bsz=128, num_updates=3441, lr=9.99805e-05, gnorm=2.423, loss_scale=1, train_wall=11, gb_free=2.8, wall=39449
2021-06-19 05:36:26 | INFO | train_inner | epoch 002: 466 / 3002 loss=2.836, ppl=7.14, wps=5804.2, ups=0.09, wpb=64864, bsz=128, num_updates=3442, lr=9.99805e-05, gnorm=2.399, loss_scale=1, train_wall=11, gb_free=2.8, wall=39460
2021-06-19 05:36:37 | INFO | train_inner | epoch 002: 467 / 3002 loss=2.842, ppl=7.17, wps=5878.1, ups=0.09, wpb=64880, bsz=128, num_updates=3443, lr=9.99805e-05, gnorm=17.529, loss_scale=1, train_wall=11, gb_free=2.8, wall=39471
2021-06-19 05:36:48 | INFO | train_inner | epoch 002: 468 / 3002 loss=2.888, ppl=7.4, wps=5775.5, ups=0.09, wpb=64804, bsz=128, num_updates=3444, lr=9.99804e-05, gnorm=2.476, loss_scale=1, train_wall=11, gb_free=2.8, wall=39483
2021-06-19 05:36:59 | INFO | train_inner | epoch 002: 469 / 3002 loss=2.797, ppl=6.95, wps=5773.8, ups=0.09, wpb=64810, bsz=128, num_updates=3445, lr=9.99804e-05, gnorm=2.595, loss_scale=1, train_wall=11, gb_free=2.8, wall=39494
2021-06-19 05:37:10 | INFO | train_inner | epoch 002: 470 / 3002 loss=2.897, ppl=7.45, wps=5962.9, ups=0.09, wpb=64859, bsz=128, num_updates=3446, lr=9.99804e-05, gnorm=2.454, loss_scale=1, train_wall=10, gb_free=2.8, wall=39505
2021-06-19 05:37:21 | INFO | train_inner | epoch 002: 471 / 3002 loss=2.888, ppl=7.4, wps=5841.8, ups=0.09, wpb=64747, bsz=128, num_updates=3447, lr=9.99804e-05, gnorm=2.866, loss_scale=1, train_wall=11, gb_free=2.8, wall=39516
2021-06-19 05:37:33 | INFO | train_inner | epoch 002: 472 / 3002 loss=2.785, ppl=6.89, wps=5824.6, ups=0.09, wpb=64837, bsz=128, num_updates=3448, lr=9.99804e-05, gnorm=2.847, loss_scale=1, train_wall=11, gb_free=2.8, wall=39527
2021-06-19 05:37:44 | INFO | train_inner | epoch 002: 473 / 3002 loss=2.829, ppl=7.1, wps=5841.3, ups=0.09, wpb=64818, bsz=128, num_updates=3449, lr=9.99804e-05, gnorm=2.428, loss_scale=1, train_wall=11, gb_free=2.8, wall=39538
2021-06-19 05:37:55 | INFO | train_inner | epoch 002: 474 / 3002 loss=2.809, ppl=7.01, wps=5855.6, ups=0.09, wpb=64846, bsz=128, num_updates=3450, lr=9.99804e-05, gnorm=2.429, loss_scale=1, train_wall=11, gb_free=2.8, wall=39549
2021-06-19 05:38:06 | INFO | train_inner | epoch 002: 475 / 3002 loss=2.826, ppl=7.09, wps=5956.4, ups=0.09, wpb=64861, bsz=128, num_updates=3451, lr=9.99804e-05, gnorm=2.627, loss_scale=1, train_wall=10, gb_free=2.8, wall=39560
2021-06-19 05:38:16 | INFO | train_inner | epoch 002: 476 / 3002 loss=2.918, ppl=7.56, wps=6051.3, ups=0.09, wpb=64841, bsz=128, num_updates=3452, lr=9.99804e-05, gnorm=2.426, loss_scale=1, train_wall=10, gb_free=2.8, wall=39571
2021-06-19 05:38:28 | INFO | train_inner | epoch 002: 477 / 3002 loss=2.878, ppl=7.35, wps=5768.9, ups=0.09, wpb=64925, bsz=128, num_updates=3453, lr=9.99804e-05, gnorm=3.382, loss_scale=1, train_wall=11, gb_free=2.8, wall=39582
2021-06-19 05:38:39 | INFO | train_inner | epoch 002: 478 / 3002 loss=2.865, ppl=7.29, wps=5822.9, ups=0.09, wpb=64850, bsz=128, num_updates=3454, lr=9.99804e-05, gnorm=2.335, loss_scale=1, train_wall=11, gb_free=2.8, wall=39593
2021-06-19 05:38:50 | INFO | train_inner | epoch 002: 479 / 3002 loss=2.647, ppl=6.26, wps=5828.1, ups=0.09, wpb=64814, bsz=128, num_updates=3455, lr=9.99804e-05, gnorm=2.399, loss_scale=1, train_wall=11, gb_free=2.8, wall=39604
2021-06-19 05:39:01 | INFO | train_inner | epoch 002: 480 / 3002 loss=2.768, ppl=6.81, wps=5839.8, ups=0.09, wpb=64784, bsz=128, num_updates=3456, lr=9.99804e-05, gnorm=2.467, loss_scale=1, train_wall=11, gb_free=2.8, wall=39615
2021-06-19 05:39:12 | INFO | train_inner | epoch 002: 481 / 3002 loss=2.613, ppl=6.12, wps=5956.5, ups=0.09, wpb=64905, bsz=128, num_updates=3457, lr=9.99803e-05, gnorm=2.17, loss_scale=1, train_wall=10, gb_free=2.8, wall=39626
2021-06-19 05:39:23 | INFO | train_inner | epoch 002: 482 / 3002 loss=2.718, ppl=6.58, wps=5808.2, ups=0.09, wpb=64867, bsz=128, num_updates=3458, lr=9.99803e-05, gnorm=4.051, loss_scale=1, train_wall=11, gb_free=2.8, wall=39637
2021-06-19 05:39:34 | INFO | train_inner | epoch 002: 483 / 3002 loss=2.797, ppl=6.95, wps=5739, ups=0.09, wpb=64805, bsz=128, num_updates=3459, lr=9.99803e-05, gnorm=2.28, loss_scale=1, train_wall=11, gb_free=2.8, wall=39649
2021-06-19 05:39:46 | INFO | train_inner | epoch 002: 484 / 3002 loss=2.686, ppl=6.44, wps=5708.1, ups=0.09, wpb=64796, bsz=128, num_updates=3460, lr=9.99803e-05, gnorm=2.3, loss_scale=1, train_wall=11, gb_free=2.8, wall=39660
2021-06-19 05:39:57 | INFO | train_inner | epoch 002: 485 / 3002 loss=2.854, ppl=7.23, wps=5837.9, ups=0.09, wpb=64818, bsz=128, num_updates=3461, lr=9.99803e-05, gnorm=5.205, loss_scale=1, train_wall=11, gb_free=2.8, wall=39671
2021-06-19 05:40:08 | INFO | train_inner | epoch 002: 486 / 3002 loss=2.886, ppl=7.39, wps=5824.5, ups=0.09, wpb=64785, bsz=128, num_updates=3462, lr=9.99803e-05, gnorm=2.262, loss_scale=1, train_wall=11, gb_free=2.8, wall=39682
2021-06-19 05:40:19 | INFO | train_inner | epoch 002: 487 / 3002 loss=2.771, ppl=6.83, wps=5938.5, ups=0.09, wpb=64917, bsz=128, num_updates=3463, lr=9.99803e-05, gnorm=2.371, loss_scale=1, train_wall=10, gb_free=2.8, wall=39693
2021-06-19 05:40:30 | INFO | train_inner | epoch 002: 488 / 3002 loss=2.928, ppl=7.61, wps=5824.5, ups=0.09, wpb=64895, bsz=128, num_updates=3464, lr=9.99803e-05, gnorm=2.258, loss_scale=1, train_wall=11, gb_free=2.8, wall=39704
2021-06-19 05:40:41 | INFO | train_inner | epoch 002: 489 / 3002 loss=2.785, ppl=6.89, wps=5846.8, ups=0.09, wpb=64817, bsz=128, num_updates=3465, lr=9.99803e-05, gnorm=2.332, loss_scale=1, train_wall=11, gb_free=2.8, wall=39715
2021-06-19 05:40:52 | INFO | train_inner | epoch 002: 490 / 3002 loss=2.951, ppl=7.74, wps=5892.7, ups=0.09, wpb=64797, bsz=128, num_updates=3466, lr=9.99803e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=39726
2021-06-19 05:41:03 | INFO | train_inner | epoch 002: 491 / 3002 loss=2.864, ppl=7.28, wps=5882.2, ups=0.09, wpb=64895, bsz=128, num_updates=3467, lr=9.99803e-05, gnorm=4.819, loss_scale=1, train_wall=11, gb_free=2.8, wall=39737
2021-06-19 05:41:14 | INFO | train_inner | epoch 002: 492 / 3002 loss=2.745, ppl=6.71, wps=5964.7, ups=0.09, wpb=64868, bsz=128, num_updates=3468, lr=9.99803e-05, gnorm=2.302, loss_scale=1, train_wall=10, gb_free=2.8, wall=39748
2021-06-19 05:41:25 | INFO | train_inner | epoch 002: 493 / 3002 loss=2.849, ppl=7.21, wps=5802.1, ups=0.09, wpb=64776, bsz=128, num_updates=3469, lr=9.99802e-05, gnorm=2.212, loss_scale=1, train_wall=11, gb_free=2.8, wall=39759
2021-06-19 05:41:36 | INFO | train_inner | epoch 002: 494 / 3002 loss=2.687, ppl=6.44, wps=5789.2, ups=0.09, wpb=64812, bsz=128, num_updates=3470, lr=9.99802e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=39771
2021-06-19 05:41:47 | INFO | train_inner | epoch 002: 495 / 3002 loss=2.64, ppl=6.23, wps=5844.9, ups=0.09, wpb=64841, bsz=128, num_updates=3471, lr=9.99802e-05, gnorm=2.502, loss_scale=1, train_wall=11, gb_free=2.8, wall=39782
2021-06-19 05:41:59 | INFO | train_inner | epoch 002: 496 / 3002 loss=2.885, ppl=7.39, wps=5785.6, ups=0.09, wpb=64820, bsz=128, num_updates=3472, lr=9.99802e-05, gnorm=2.22, loss_scale=1, train_wall=11, gb_free=2.8, wall=39793
2021-06-19 05:42:10 | INFO | train_inner | epoch 002: 497 / 3002 loss=2.958, ppl=7.77, wps=5846.3, ups=0.09, wpb=64831, bsz=128, num_updates=3473, lr=9.99802e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=39804
2021-06-19 05:42:21 | INFO | train_inner | epoch 002: 498 / 3002 loss=2.658, ppl=6.31, wps=5793.7, ups=0.09, wpb=64849, bsz=128, num_updates=3474, lr=9.99802e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=39815
2021-06-19 05:42:32 | INFO | train_inner | epoch 002: 499 / 3002 loss=2.745, ppl=6.7, wps=6032.8, ups=0.09, wpb=64835, bsz=128, num_updates=3475, lr=9.99802e-05, gnorm=2.147, loss_scale=1, train_wall=10, gb_free=2.8, wall=39826
2021-06-19 05:42:43 | INFO | train_inner | epoch 002: 500 / 3002 loss=2.777, ppl=6.86, wps=5818.3, ups=0.09, wpb=64926, bsz=128, num_updates=3476, lr=9.99802e-05, gnorm=2.395, loss_scale=1, train_wall=11, gb_free=2.8, wall=39837
2021-06-19 05:42:54 | INFO | train_inner | epoch 002: 501 / 3002 loss=2.831, ppl=7.12, wps=5884.5, ups=0.09, wpb=64910, bsz=128, num_updates=3477, lr=9.99802e-05, gnorm=2.373, loss_scale=1, train_wall=11, gb_free=2.8, wall=39848
2021-06-19 05:43:05 | INFO | train_inner | epoch 002: 502 / 3002 loss=2.861, ppl=7.27, wps=5911.1, ups=0.09, wpb=64812, bsz=128, num_updates=3478, lr=9.99802e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=39859
2021-06-19 05:43:16 | INFO | train_inner | epoch 002: 503 / 3002 loss=2.796, ppl=6.95, wps=5833.6, ups=0.09, wpb=64801, bsz=128, num_updates=3479, lr=9.99802e-05, gnorm=2.374, loss_scale=1, train_wall=11, gb_free=2.8, wall=39870
2021-06-19 05:43:27 | INFO | train_inner | epoch 002: 504 / 3002 loss=2.769, ppl=6.82, wps=5775.6, ups=0.09, wpb=64837, bsz=128, num_updates=3480, lr=9.99802e-05, gnorm=2.34, loss_scale=1, train_wall=11, gb_free=2.8, wall=39881
2021-06-19 05:43:38 | INFO | train_inner | epoch 002: 505 / 3002 loss=2.87, ppl=7.31, wps=5973.6, ups=0.09, wpb=64850, bsz=128, num_updates=3481, lr=9.99802e-05, gnorm=2.205, loss_scale=1, train_wall=10, gb_free=2.8, wall=39892
2021-06-19 05:43:49 | INFO | train_inner | epoch 002: 506 / 3002 loss=2.839, ppl=7.15, wps=5907.6, ups=0.09, wpb=64769, bsz=128, num_updates=3482, lr=9.99801e-05, gnorm=3.407, loss_scale=1, train_wall=10, gb_free=2.8, wall=39903
2021-06-19 05:44:00 | INFO | train_inner | epoch 002: 507 / 3002 loss=2.883, ppl=7.38, wps=5887, ups=0.09, wpb=64878, bsz=128, num_updates=3483, lr=9.99801e-05, gnorm=2.384, loss_scale=1, train_wall=11, gb_free=2.8, wall=39914
2021-06-19 05:44:11 | INFO | train_inner | epoch 002: 508 / 3002 loss=2.886, ppl=7.39, wps=5966.9, ups=0.09, wpb=64883, bsz=128, num_updates=3484, lr=9.99801e-05, gnorm=2.338, loss_scale=1, train_wall=10, gb_free=2.8, wall=39925
2021-06-19 05:44:22 | INFO | train_inner | epoch 002: 509 / 3002 loss=2.728, ppl=6.63, wps=5806.4, ups=0.09, wpb=64838, bsz=128, num_updates=3485, lr=9.99801e-05, gnorm=2.422, loss_scale=1, train_wall=11, gb_free=2.8, wall=39936
2021-06-19 05:44:33 | INFO | train_inner | epoch 002: 510 / 3002 loss=2.807, ppl=7, wps=5826, ups=0.09, wpb=64890, bsz=128, num_updates=3486, lr=9.99801e-05, gnorm=2.461, loss_scale=1, train_wall=11, gb_free=2.8, wall=39947
2021-06-19 05:44:44 | INFO | train_inner | epoch 002: 511 / 3002 loss=2.834, ppl=7.13, wps=5937.4, ups=0.09, wpb=64821, bsz=128, num_updates=3487, lr=9.99801e-05, gnorm=2.356, loss_scale=1, train_wall=10, gb_free=2.8, wall=39958
2021-06-19 05:44:55 | INFO | train_inner | epoch 002: 512 / 3002 loss=2.806, ppl=6.99, wps=5830.7, ups=0.09, wpb=64902, bsz=128, num_updates=3488, lr=9.99801e-05, gnorm=2.435, loss_scale=1, train_wall=11, gb_free=2.8, wall=39970
2021-06-19 05:45:06 | INFO | train_inner | epoch 002: 513 / 3002 loss=2.923, ppl=7.58, wps=5822.2, ups=0.09, wpb=64780, bsz=128, num_updates=3489, lr=9.99801e-05, gnorm=2.561, loss_scale=1, train_wall=11, gb_free=2.8, wall=39981
2021-06-19 05:45:17 | INFO | train_inner | epoch 002: 514 / 3002 loss=2.77, ppl=6.82, wps=5890.7, ups=0.09, wpb=64889, bsz=128, num_updates=3490, lr=9.99801e-05, gnorm=2.51, loss_scale=1, train_wall=11, gb_free=2.8, wall=39992
2021-06-19 05:45:28 | INFO | train_inner | epoch 002: 515 / 3002 loss=2.761, ppl=6.78, wps=5823.2, ups=0.09, wpb=64824, bsz=128, num_updates=3491, lr=9.99801e-05, gnorm=2.256, loss_scale=1, train_wall=11, gb_free=2.8, wall=40003
2021-06-19 05:45:40 | INFO | train_inner | epoch 002: 516 / 3002 loss=2.851, ppl=7.22, wps=5857.6, ups=0.09, wpb=64802, bsz=128, num_updates=3492, lr=9.99801e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=40014
2021-06-19 05:45:51 | INFO | train_inner | epoch 002: 517 / 3002 loss=2.662, ppl=6.33, wps=5824.5, ups=0.09, wpb=64857, bsz=128, num_updates=3493, lr=9.99801e-05, gnorm=2.57, loss_scale=1, train_wall=11, gb_free=2.8, wall=40025
2021-06-19 05:46:02 | INFO | train_inner | epoch 002: 518 / 3002 loss=2.746, ppl=6.71, wps=5762, ups=0.09, wpb=64881, bsz=128, num_updates=3494, lr=9.998e-05, gnorm=2.598, loss_scale=1, train_wall=11, gb_free=2.8, wall=40036
2021-06-19 05:46:13 | INFO | train_inner | epoch 002: 519 / 3002 loss=2.733, ppl=6.65, wps=5888.1, ups=0.09, wpb=64881, bsz=128, num_updates=3495, lr=9.998e-05, gnorm=2.308, loss_scale=1, train_wall=11, gb_free=2.8, wall=40047
2021-06-19 05:46:24 | INFO | train_inner | epoch 002: 520 / 3002 loss=2.927, ppl=7.6, wps=5852.5, ups=0.09, wpb=64773, bsz=128, num_updates=3496, lr=9.998e-05, gnorm=2.474, loss_scale=1, train_wall=11, gb_free=2.8, wall=40058
2021-06-19 05:46:35 | INFO | train_inner | epoch 002: 521 / 3002 loss=2.869, ppl=7.3, wps=5934.2, ups=0.09, wpb=64749, bsz=128, num_updates=3497, lr=9.998e-05, gnorm=2.347, loss_scale=1, train_wall=10, gb_free=2.8, wall=40069
2021-06-19 05:46:46 | INFO | train_inner | epoch 002: 522 / 3002 loss=2.802, ppl=6.97, wps=5872.1, ups=0.09, wpb=64876, bsz=128, num_updates=3498, lr=9.998e-05, gnorm=2.797, loss_scale=1, train_wall=11, gb_free=2.8, wall=40080
2021-06-19 05:46:57 | INFO | train_inner | epoch 002: 523 / 3002 loss=2.776, ppl=6.85, wps=5856.4, ups=0.09, wpb=64790, bsz=128, num_updates=3499, lr=9.998e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=40091
2021-06-19 05:47:08 | INFO | train_inner | epoch 002: 524 / 3002 loss=2.797, ppl=6.95, wps=5943.5, ups=0.09, wpb=64865, bsz=128, num_updates=3500, lr=9.998e-05, gnorm=2.401, loss_scale=1, train_wall=10, gb_free=2.8, wall=40102
2021-06-19 05:47:19 | INFO | train_inner | epoch 002: 525 / 3002 loss=2.827, ppl=7.09, wps=5790.2, ups=0.09, wpb=64744, bsz=128, num_updates=3501, lr=9.998e-05, gnorm=2.329, loss_scale=1, train_wall=11, gb_free=2.8, wall=40113
2021-06-19 05:47:30 | INFO | train_inner | epoch 002: 526 / 3002 loss=2.706, ppl=6.52, wps=5967.3, ups=0.09, wpb=64878, bsz=128, num_updates=3502, lr=9.998e-05, gnorm=2.412, loss_scale=1, train_wall=10, gb_free=2.8, wall=40124
2021-06-19 05:47:41 | INFO | train_inner | epoch 002: 527 / 3002 loss=2.91, ppl=7.51, wps=5878.7, ups=0.09, wpb=64866, bsz=128, num_updates=3503, lr=9.998e-05, gnorm=2.383, loss_scale=1, train_wall=11, gb_free=2.8, wall=40135
2021-06-19 05:47:52 | INFO | train_inner | epoch 002: 528 / 3002 loss=2.828, ppl=7.1, wps=5819.3, ups=0.09, wpb=64847, bsz=128, num_updates=3504, lr=9.998e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=40147
2021-06-19 05:48:03 | INFO | train_inner | epoch 002: 529 / 3002 loss=2.839, ppl=7.15, wps=5882.8, ups=0.09, wpb=64826, bsz=128, num_updates=3505, lr=9.998e-05, gnorm=2.572, loss_scale=1, train_wall=11, gb_free=2.8, wall=40158
2021-06-19 05:48:14 | INFO | train_inner | epoch 002: 530 / 3002 loss=2.756, ppl=6.76, wps=5748.8, ups=0.09, wpb=64821, bsz=128, num_updates=3506, lr=9.998e-05, gnorm=2.237, loss_scale=1, train_wall=11, gb_free=2.8, wall=40169
2021-06-19 05:48:25 | INFO | train_inner | epoch 002: 531 / 3002 loss=2.796, ppl=6.94, wps=5916.8, ups=0.09, wpb=64885, bsz=128, num_updates=3507, lr=9.99799e-05, gnorm=2.27, loss_scale=1, train_wall=10, gb_free=2.8, wall=40180
2021-06-19 05:48:36 | INFO | train_inner | epoch 002: 532 / 3002 loss=2.804, ppl=6.98, wps=5908.4, ups=0.09, wpb=64856, bsz=128, num_updates=3508, lr=9.99799e-05, gnorm=2.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=40191
2021-06-19 05:48:47 | INFO | train_inner | epoch 002: 533 / 3002 loss=2.81, ppl=7.01, wps=5942.5, ups=0.09, wpb=64932, bsz=128, num_updates=3509, lr=9.99799e-05, gnorm=2.257, loss_scale=1, train_wall=10, gb_free=2.8, wall=40202
2021-06-19 05:48:59 | INFO | train_inner | epoch 002: 534 / 3002 loss=2.795, ppl=6.94, wps=5786.9, ups=0.09, wpb=64832, bsz=128, num_updates=3510, lr=9.99799e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=40213
2021-06-19 05:49:10 | INFO | train_inner | epoch 002: 535 / 3002 loss=2.928, ppl=7.61, wps=5799.8, ups=0.09, wpb=64919, bsz=128, num_updates=3511, lr=9.99799e-05, gnorm=2.268, loss_scale=1, train_wall=11, gb_free=2.8, wall=40224
2021-06-19 05:49:21 | INFO | train_inner | epoch 002: 536 / 3002 loss=2.665, ppl=6.34, wps=5808.9, ups=0.09, wpb=64854, bsz=128, num_updates=3512, lr=9.99799e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=40235
2021-06-19 05:49:32 | INFO | train_inner | epoch 002: 537 / 3002 loss=2.755, ppl=6.75, wps=5878.7, ups=0.09, wpb=64770, bsz=128, num_updates=3513, lr=9.99799e-05, gnorm=4.439, loss_scale=1, train_wall=11, gb_free=2.8, wall=40246
2021-06-19 05:49:43 | INFO | train_inner | epoch 002: 538 / 3002 loss=2.775, ppl=6.84, wps=5930.2, ups=0.09, wpb=64868, bsz=128, num_updates=3514, lr=9.99799e-05, gnorm=2.738, loss_scale=1, train_wall=10, gb_free=2.8, wall=40257
2021-06-19 05:49:54 | INFO | train_inner | epoch 002: 539 / 3002 loss=2.83, ppl=7.11, wps=5769.2, ups=0.09, wpb=64836, bsz=128, num_updates=3515, lr=9.99799e-05, gnorm=2.34, loss_scale=1, train_wall=11, gb_free=2.8, wall=40268
2021-06-19 05:50:05 | INFO | train_inner | epoch 002: 540 / 3002 loss=2.825, ppl=7.08, wps=5786.9, ups=0.09, wpb=64755, bsz=128, num_updates=3516, lr=9.99799e-05, gnorm=2.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=40280
2021-06-19 05:50:16 | INFO | train_inner | epoch 002: 541 / 3002 loss=2.966, ppl=7.81, wps=5844.9, ups=0.09, wpb=64947, bsz=128, num_updates=3517, lr=9.99799e-05, gnorm=14.89, loss_scale=1, train_wall=11, gb_free=2.8, wall=40291
2021-06-19 05:50:27 | INFO | train_inner | epoch 002: 542 / 3002 loss=2.917, ppl=7.56, wps=5918.7, ups=0.09, wpb=64808, bsz=128, num_updates=3518, lr=9.99799e-05, gnorm=2.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=40302
2021-06-19 05:50:38 | INFO | train_inner | epoch 002: 543 / 3002 loss=2.623, ppl=6.16, wps=5862.9, ups=0.09, wpb=64850, bsz=128, num_updates=3519, lr=9.99798e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=40313
2021-06-19 05:50:50 | INFO | train_inner | epoch 002: 544 / 3002 loss=2.773, ppl=6.83, wps=5781.5, ups=0.09, wpb=64819, bsz=128, num_updates=3520, lr=9.99798e-05, gnorm=2.399, loss_scale=2, train_wall=11, gb_free=2.8, wall=40324
2021-06-19 05:51:01 | INFO | train_inner | epoch 002: 545 / 3002 loss=2.765, ppl=6.8, wps=5826.9, ups=0.09, wpb=64909, bsz=128, num_updates=3521, lr=9.99798e-05, gnorm=2.541, loss_scale=2, train_wall=11, gb_free=2.8, wall=40335
2021-06-19 05:51:12 | INFO | train_inner | epoch 002: 546 / 3002 loss=2.75, ppl=6.73, wps=5930.7, ups=0.09, wpb=64880, bsz=128, num_updates=3522, lr=9.99798e-05, gnorm=2.538, loss_scale=2, train_wall=11, gb_free=2.8, wall=40346
2021-06-19 05:51:23 | INFO | train_inner | epoch 002: 547 / 3002 loss=2.882, ppl=7.37, wps=5918.9, ups=0.09, wpb=64885, bsz=128, num_updates=3523, lr=9.99798e-05, gnorm=2.513, loss_scale=2, train_wall=10, gb_free=2.8, wall=40357
2021-06-19 05:51:34 | INFO | train_inner | epoch 002: 548 / 3002 loss=2.727, ppl=6.62, wps=5847.1, ups=0.09, wpb=64825, bsz=128, num_updates=3524, lr=9.99798e-05, gnorm=2.378, loss_scale=2, train_wall=11, gb_free=2.8, wall=40368
2021-06-19 05:51:45 | INFO | train_inner | epoch 002: 549 / 3002 loss=2.785, ppl=6.89, wps=5914.2, ups=0.09, wpb=64817, bsz=128, num_updates=3525, lr=9.99798e-05, gnorm=2.658, loss_scale=2, train_wall=11, gb_free=2.8, wall=40379
2021-06-19 05:51:56 | INFO | train_inner | epoch 002: 550 / 3002 loss=2.791, ppl=6.92, wps=5950.5, ups=0.09, wpb=64799, bsz=128, num_updates=3526, lr=9.99798e-05, gnorm=2.346, loss_scale=2, train_wall=10, gb_free=2.8, wall=40390
2021-06-19 05:52:07 | INFO | train_inner | epoch 002: 551 / 3002 loss=2.949, ppl=7.72, wps=5855.3, ups=0.09, wpb=64847, bsz=128, num_updates=3527, lr=9.99798e-05, gnorm=2.677, loss_scale=2, train_wall=11, gb_free=2.8, wall=40401
2021-06-19 05:52:18 | INFO | train_inner | epoch 002: 552 / 3002 loss=2.766, ppl=6.8, wps=5911.7, ups=0.09, wpb=64806, bsz=128, num_updates=3528, lr=9.99798e-05, gnorm=2.376, loss_scale=2, train_wall=11, gb_free=2.8, wall=40412
2021-06-19 05:52:29 | INFO | train_inner | epoch 002: 553 / 3002 loss=2.833, ppl=7.13, wps=5879.2, ups=0.09, wpb=64861, bsz=128, num_updates=3529, lr=9.99798e-05, gnorm=2.319, loss_scale=2, train_wall=11, gb_free=2.8, wall=40423
2021-06-19 05:52:40 | INFO | train_inner | epoch 002: 554 / 3002 loss=2.701, ppl=6.5, wps=5831.2, ups=0.09, wpb=64814, bsz=128, num_updates=3530, lr=9.99798e-05, gnorm=2.384, loss_scale=2, train_wall=11, gb_free=2.8, wall=40434
2021-06-19 05:52:51 | INFO | train_inner | epoch 002: 555 / 3002 loss=2.881, ppl=7.37, wps=5761.3, ups=0.09, wpb=64836, bsz=128, num_updates=3531, lr=9.99798e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=40445
2021-06-19 05:53:02 | INFO | train_inner | epoch 002: 556 / 3002 loss=3, ppl=8, wps=5901.4, ups=0.09, wpb=64878, bsz=128, num_updates=3532, lr=9.99797e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=40456
2021-06-19 05:53:13 | INFO | train_inner | epoch 002: 557 / 3002 loss=2.748, ppl=6.72, wps=5791.9, ups=0.09, wpb=64725, bsz=128, num_updates=3533, lr=9.99797e-05, gnorm=2.349, loss_scale=2, train_wall=11, gb_free=2.8, wall=40468
2021-06-19 05:53:24 | INFO | train_inner | epoch 002: 558 / 3002 loss=2.952, ppl=7.74, wps=5899.2, ups=0.09, wpb=64803, bsz=128, num_updates=3534, lr=9.99797e-05, gnorm=2.264, loss_scale=2, train_wall=11, gb_free=2.8, wall=40479
2021-06-19 05:53:35 | INFO | train_inner | epoch 002: 559 / 3002 loss=2.947, ppl=7.71, wps=5818.2, ups=0.09, wpb=64879, bsz=128, num_updates=3535, lr=9.99797e-05, gnorm=2.455, loss_scale=2, train_wall=11, gb_free=2.8, wall=40490
2021-06-19 05:53:46 | INFO | train_inner | epoch 002: 560 / 3002 loss=2.878, ppl=7.35, wps=5879.4, ups=0.09, wpb=64817, bsz=128, num_updates=3536, lr=9.99797e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=40501
2021-06-19 05:53:57 | INFO | train_inner | epoch 002: 561 / 3002 loss=2.856, ppl=7.24, wps=5876, ups=0.09, wpb=64853, bsz=128, num_updates=3537, lr=9.99797e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=40512
2021-06-19 05:54:09 | INFO | train_inner | epoch 002: 562 / 3002 loss=2.801, ppl=6.97, wps=5788.3, ups=0.09, wpb=64864, bsz=128, num_updates=3538, lr=9.99797e-05, gnorm=2.341, loss_scale=2, train_wall=11, gb_free=2.8, wall=40523
2021-06-19 05:54:20 | INFO | train_inner | epoch 002: 563 / 3002 loss=2.814, ppl=7.03, wps=5833.7, ups=0.09, wpb=64868, bsz=128, num_updates=3539, lr=9.99797e-05, gnorm=2.358, loss_scale=2, train_wall=11, gb_free=2.8, wall=40534
2021-06-19 05:54:31 | INFO | train_inner | epoch 002: 564 / 3002 loss=2.926, ppl=7.6, wps=5732.5, ups=0.09, wpb=64791, bsz=128, num_updates=3540, lr=9.99797e-05, gnorm=2.353, loss_scale=2, train_wall=11, gb_free=2.8, wall=40545
2021-06-19 05:54:42 | INFO | train_inner | epoch 002: 565 / 3002 loss=2.792, ppl=6.93, wps=5878.5, ups=0.09, wpb=64866, bsz=128, num_updates=3541, lr=9.99797e-05, gnorm=2.306, loss_scale=2, train_wall=11, gb_free=2.8, wall=40556
2021-06-19 05:54:53 | INFO | train_inner | epoch 002: 566 / 3002 loss=2.764, ppl=6.79, wps=5793.9, ups=0.09, wpb=64784, bsz=128, num_updates=3542, lr=9.99797e-05, gnorm=3.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=40568
2021-06-19 05:55:04 | INFO | train_inner | epoch 002: 567 / 3002 loss=2.827, ppl=7.09, wps=5826.2, ups=0.09, wpb=64863, bsz=128, num_updates=3543, lr=9.99797e-05, gnorm=2.307, loss_scale=2, train_wall=11, gb_free=2.8, wall=40579
2021-06-19 05:55:16 | INFO | train_inner | epoch 002: 568 / 3002 loss=2.818, ppl=7.05, wps=5832.7, ups=0.09, wpb=64804, bsz=128, num_updates=3544, lr=9.99796e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=40590
2021-06-19 05:55:27 | INFO | train_inner | epoch 002: 569 / 3002 loss=2.718, ppl=6.58, wps=5819.9, ups=0.09, wpb=64771, bsz=128, num_updates=3545, lr=9.99796e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=40601
2021-06-19 05:55:38 | INFO | train_inner | epoch 002: 570 / 3002 loss=2.703, ppl=6.51, wps=5799.8, ups=0.09, wpb=64892, bsz=128, num_updates=3546, lr=9.99796e-05, gnorm=2.355, loss_scale=2, train_wall=11, gb_free=2.8, wall=40612
2021-06-19 05:55:49 | INFO | train_inner | epoch 002: 571 / 3002 loss=2.627, ppl=6.18, wps=5769.8, ups=0.09, wpb=64792, bsz=128, num_updates=3547, lr=9.99796e-05, gnorm=2.413, loss_scale=2, train_wall=11, gb_free=2.8, wall=40623
2021-06-19 05:56:00 | INFO | train_inner | epoch 002: 572 / 3002 loss=2.662, ppl=6.33, wps=5869.2, ups=0.09, wpb=64793, bsz=128, num_updates=3548, lr=9.99796e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=40634
2021-06-19 05:56:11 | INFO | train_inner | epoch 002: 573 / 3002 loss=2.893, ppl=7.43, wps=5752.9, ups=0.09, wpb=64788, bsz=128, num_updates=3549, lr=9.99796e-05, gnorm=2.253, loss_scale=2, train_wall=11, gb_free=2.8, wall=40646
2021-06-19 05:56:22 | INFO | train_inner | epoch 002: 574 / 3002 loss=2.764, ppl=6.79, wps=5890.5, ups=0.09, wpb=64869, bsz=128, num_updates=3550, lr=9.99796e-05, gnorm=2.294, loss_scale=2, train_wall=11, gb_free=2.8, wall=40657
2021-06-19 05:56:33 | INFO | train_inner | epoch 002: 575 / 3002 loss=2.816, ppl=7.04, wps=5836, ups=0.09, wpb=64854, bsz=128, num_updates=3551, lr=9.99796e-05, gnorm=2.235, loss_scale=2, train_wall=11, gb_free=2.8, wall=40668
2021-06-19 05:56:44 | INFO | train_inner | epoch 002: 576 / 3002 loss=2.874, ppl=7.33, wps=5906.1, ups=0.09, wpb=64804, bsz=128, num_updates=3552, lr=9.99796e-05, gnorm=2.342, loss_scale=2, train_wall=10, gb_free=2.8, wall=40679
2021-06-19 05:56:55 | INFO | train_inner | epoch 002: 577 / 3002 loss=2.854, ppl=7.23, wps=5870.6, ups=0.09, wpb=64822, bsz=128, num_updates=3553, lr=9.99796e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=40690
2021-06-19 05:57:06 | INFO | train_inner | epoch 002: 578 / 3002 loss=2.758, ppl=6.76, wps=5935.2, ups=0.09, wpb=64783, bsz=128, num_updates=3554, lr=9.99796e-05, gnorm=2.419, loss_scale=2, train_wall=10, gb_free=2.8, wall=40701
2021-06-19 05:57:17 | INFO | train_inner | epoch 002: 579 / 3002 loss=2.77, ppl=6.82, wps=5906.2, ups=0.09, wpb=64881, bsz=128, num_updates=3555, lr=9.99796e-05, gnorm=2.333, loss_scale=2, train_wall=11, gb_free=2.8, wall=40712
2021-06-19 05:57:28 | INFO | train_inner | epoch 002: 580 / 3002 loss=2.72, ppl=6.59, wps=5961.3, ups=0.09, wpb=64797, bsz=128, num_updates=3556, lr=9.99796e-05, gnorm=2.337, loss_scale=2, train_wall=10, gb_free=2.8, wall=40723
2021-06-19 05:57:39 | INFO | train_inner | epoch 002: 581 / 3002 loss=2.944, ppl=7.7, wps=5811.6, ups=0.09, wpb=64781, bsz=128, num_updates=3557, lr=9.99795e-05, gnorm=2.374, loss_scale=2, train_wall=11, gb_free=2.8, wall=40734
2021-06-19 05:57:51 | INFO | train_inner | epoch 002: 582 / 3002 loss=2.78, ppl=6.87, wps=5804, ups=0.09, wpb=64835, bsz=128, num_updates=3558, lr=9.99795e-05, gnorm=2.425, loss_scale=2, train_wall=11, gb_free=2.8, wall=40745
2021-06-19 05:58:01 | INFO | train_inner | epoch 002: 583 / 3002 loss=2.852, ppl=7.22, wps=5944.9, ups=0.09, wpb=64834, bsz=128, num_updates=3559, lr=9.99795e-05, gnorm=2.492, loss_scale=2, train_wall=10, gb_free=2.8, wall=40756
2021-06-19 05:58:13 | INFO | train_inner | epoch 002: 584 / 3002 loss=2.925, ppl=7.59, wps=5818.5, ups=0.09, wpb=64844, bsz=128, num_updates=3560, lr=9.99795e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=40767
2021-06-19 05:58:24 | INFO | train_inner | epoch 002: 585 / 3002 loss=2.815, ppl=7.04, wps=5749.8, ups=0.09, wpb=64823, bsz=128, num_updates=3561, lr=9.99795e-05, gnorm=2.611, loss_scale=2, train_wall=11, gb_free=2.8, wall=40778
2021-06-19 05:58:35 | INFO | train_inner | epoch 002: 586 / 3002 loss=2.891, ppl=7.42, wps=5811.8, ups=0.09, wpb=64875, bsz=128, num_updates=3562, lr=9.99795e-05, gnorm=2.254, loss_scale=2, train_wall=11, gb_free=2.8, wall=40789
2021-06-19 05:58:46 | INFO | train_inner | epoch 002: 587 / 3002 loss=2.909, ppl=7.51, wps=5877.7, ups=0.09, wpb=64791, bsz=128, num_updates=3563, lr=9.99795e-05, gnorm=2.39, loss_scale=2, train_wall=11, gb_free=2.8, wall=40800
2021-06-19 05:58:57 | INFO | train_inner | epoch 002: 588 / 3002 loss=2.702, ppl=6.51, wps=5926, ups=0.09, wpb=64770, bsz=128, num_updates=3564, lr=9.99795e-05, gnorm=2.311, loss_scale=2, train_wall=10, gb_free=2.8, wall=40811
2021-06-19 05:59:08 | INFO | train_inner | epoch 002: 589 / 3002 loss=2.817, ppl=7.04, wps=5930.6, ups=0.09, wpb=64858, bsz=128, num_updates=3565, lr=9.99795e-05, gnorm=2.248, loss_scale=2, train_wall=10, gb_free=2.8, wall=40822
2021-06-19 05:59:19 | INFO | train_inner | epoch 002: 590 / 3002 loss=2.719, ppl=6.58, wps=5825.5, ups=0.09, wpb=64854, bsz=128, num_updates=3566, lr=9.99795e-05, gnorm=2.471, loss_scale=2, train_wall=11, gb_free=2.8, wall=40833
2021-06-19 05:59:30 | INFO | train_inner | epoch 002: 591 / 3002 loss=2.806, ppl=6.99, wps=5792.7, ups=0.09, wpb=64882, bsz=128, num_updates=3567, lr=9.99795e-05, gnorm=2.367, loss_scale=2, train_wall=11, gb_free=2.8, wall=40845
2021-06-19 05:59:42 | INFO | train_inner | epoch 002: 592 / 3002 loss=2.714, ppl=6.56, wps=5780.9, ups=0.09, wpb=64863, bsz=128, num_updates=3568, lr=9.99795e-05, gnorm=2.339, loss_scale=2, train_wall=11, gb_free=2.8, wall=40856
2021-06-19 05:59:53 | INFO | train_inner | epoch 002: 593 / 3002 loss=2.863, ppl=7.28, wps=5776.6, ups=0.09, wpb=64764, bsz=128, num_updates=3569, lr=9.99794e-05, gnorm=2.331, loss_scale=2, train_wall=11, gb_free=2.8, wall=40867
2021-06-19 06:00:04 | INFO | train_inner | epoch 002: 594 / 3002 loss=2.777, ppl=6.85, wps=5984, ups=0.09, wpb=64775, bsz=128, num_updates=3570, lr=9.99794e-05, gnorm=2.302, loss_scale=2, train_wall=10, gb_free=2.8, wall=40878
2021-06-19 06:00:15 | INFO | train_inner | epoch 002: 595 / 3002 loss=2.78, ppl=6.87, wps=5886.2, ups=0.09, wpb=64870, bsz=128, num_updates=3571, lr=9.99794e-05, gnorm=2.344, loss_scale=2, train_wall=11, gb_free=2.8, wall=40889
2021-06-19 06:00:26 | INFO | train_inner | epoch 002: 596 / 3002 loss=2.897, ppl=7.45, wps=5921.6, ups=0.09, wpb=64775, bsz=128, num_updates=3572, lr=9.99794e-05, gnorm=2.221, loss_scale=2, train_wall=10, gb_free=2.8, wall=40900
2021-06-19 06:00:37 | INFO | train_inner | epoch 002: 597 / 3002 loss=2.832, ppl=7.12, wps=5796.9, ups=0.09, wpb=64773, bsz=128, num_updates=3573, lr=9.99794e-05, gnorm=2.344, loss_scale=2, train_wall=11, gb_free=2.8, wall=40911
2021-06-19 06:00:48 | INFO | train_inner | epoch 002: 598 / 3002 loss=2.82, ppl=7.06, wps=5840.3, ups=0.09, wpb=64712, bsz=128, num_updates=3574, lr=9.99794e-05, gnorm=2.51, loss_scale=2, train_wall=11, gb_free=2.8, wall=40922
2021-06-19 06:00:59 | INFO | train_inner | epoch 002: 599 / 3002 loss=2.612, ppl=6.11, wps=5895.3, ups=0.09, wpb=64811, bsz=128, num_updates=3575, lr=9.99794e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=40933
2021-06-19 06:01:10 | INFO | train_inner | epoch 002: 600 / 3002 loss=3.013, ppl=8.07, wps=5858.1, ups=0.09, wpb=64764, bsz=128, num_updates=3576, lr=9.99794e-05, gnorm=2.454, loss_scale=2, train_wall=11, gb_free=2.8, wall=40944
2021-06-19 06:01:21 | INFO | train_inner | epoch 002: 601 / 3002 loss=2.693, ppl=6.47, wps=6009.5, ups=0.09, wpb=64838, bsz=128, num_updates=3577, lr=9.99794e-05, gnorm=2.307, loss_scale=2, train_wall=10, gb_free=2.8, wall=40955
2021-06-19 06:01:32 | INFO | train_inner | epoch 002: 602 / 3002 loss=2.787, ppl=6.9, wps=5812.8, ups=0.09, wpb=64821, bsz=128, num_updates=3578, lr=9.99794e-05, gnorm=2.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=40966
2021-06-19 06:01:43 | INFO | train_inner | epoch 002: 603 / 3002 loss=2.857, ppl=7.24, wps=5906.8, ups=0.09, wpb=64811, bsz=128, num_updates=3579, lr=9.99794e-05, gnorm=2.308, loss_scale=2, train_wall=11, gb_free=2.8, wall=40977
2021-06-19 06:01:54 | INFO | train_inner | epoch 002: 604 / 3002 loss=2.842, ppl=7.17, wps=5879.5, ups=0.09, wpb=64846, bsz=128, num_updates=3580, lr=9.99794e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=40988
2021-06-19 06:02:05 | INFO | train_inner | epoch 002: 605 / 3002 loss=2.679, ppl=6.41, wps=5945.7, ups=0.09, wpb=64865, bsz=128, num_updates=3581, lr=9.99794e-05, gnorm=2.425, loss_scale=2, train_wall=10, gb_free=2.8, wall=40999
2021-06-19 06:02:16 | INFO | train_inner | epoch 002: 606 / 3002 loss=2.812, ppl=7.02, wps=5959.7, ups=0.09, wpb=64882, bsz=128, num_updates=3582, lr=9.99793e-05, gnorm=2.31, loss_scale=2, train_wall=10, gb_free=2.8, wall=41010
2021-06-19 06:02:27 | INFO | train_inner | epoch 002: 607 / 3002 loss=2.725, ppl=6.61, wps=5863.3, ups=0.09, wpb=64735, bsz=128, num_updates=3583, lr=9.99793e-05, gnorm=2.326, loss_scale=2, train_wall=11, gb_free=2.8, wall=41021
2021-06-19 06:02:38 | INFO | train_inner | epoch 002: 608 / 3002 loss=2.879, ppl=7.35, wps=5860.7, ups=0.09, wpb=64721, bsz=128, num_updates=3584, lr=9.99793e-05, gnorm=2.486, loss_scale=2, train_wall=11, gb_free=2.8, wall=41032
2021-06-19 06:02:48 | INFO | train_inner | epoch 002: 609 / 3002 loss=2.782, ppl=6.88, wps=5967.3, ups=0.09, wpb=64750, bsz=128, num_updates=3585, lr=9.99793e-05, gnorm=2.434, loss_scale=2, train_wall=10, gb_free=2.8, wall=41043
2021-06-19 06:03:00 | INFO | train_inner | epoch 002: 610 / 3002 loss=2.783, ppl=6.88, wps=5817.2, ups=0.09, wpb=64836, bsz=128, num_updates=3586, lr=9.99793e-05, gnorm=2.853, loss_scale=2, train_wall=11, gb_free=2.8, wall=41054
2021-06-19 06:03:11 | INFO | train_inner | epoch 002: 611 / 3002 loss=2.65, ppl=6.28, wps=5831, ups=0.09, wpb=64790, bsz=128, num_updates=3587, lr=9.99793e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=41065
2021-06-19 06:03:22 | INFO | train_inner | epoch 002: 612 / 3002 loss=2.669, ppl=6.36, wps=5725.6, ups=0.09, wpb=64720, bsz=128, num_updates=3588, lr=9.99793e-05, gnorm=2.266, loss_scale=2, train_wall=11, gb_free=2.8, wall=41076
2021-06-19 06:03:33 | INFO | train_inner | epoch 002: 613 / 3002 loss=2.789, ppl=6.91, wps=5737.3, ups=0.09, wpb=64847, bsz=128, num_updates=3589, lr=9.99793e-05, gnorm=2.281, loss_scale=2, train_wall=11, gb_free=2.8, wall=41088
2021-06-19 06:03:44 | INFO | train_inner | epoch 002: 614 / 3002 loss=2.789, ppl=6.91, wps=5895.8, ups=0.09, wpb=64784, bsz=128, num_updates=3590, lr=9.99793e-05, gnorm=2.393, loss_scale=2, train_wall=10, gb_free=2.8, wall=41099
2021-06-19 06:03:55 | INFO | train_inner | epoch 002: 615 / 3002 loss=2.624, ppl=6.17, wps=5986.6, ups=0.09, wpb=64832, bsz=128, num_updates=3591, lr=9.99793e-05, gnorm=2.403, loss_scale=2, train_wall=10, gb_free=2.8, wall=41110
2021-06-19 06:04:06 | INFO | train_inner | epoch 002: 616 / 3002 loss=2.725, ppl=6.61, wps=5848.3, ups=0.09, wpb=64847, bsz=128, num_updates=3592, lr=9.99793e-05, gnorm=2.37, loss_scale=2, train_wall=11, gb_free=2.8, wall=41121
2021-06-19 06:04:17 | INFO | train_inner | epoch 002: 617 / 3002 loss=2.699, ppl=6.49, wps=5815.6, ups=0.09, wpb=64804, bsz=128, num_updates=3593, lr=9.99793e-05, gnorm=2.303, loss_scale=2, train_wall=11, gb_free=2.8, wall=41132
2021-06-19 06:04:29 | INFO | train_inner | epoch 002: 618 / 3002 loss=2.537, ppl=5.8, wps=5844.6, ups=0.09, wpb=64907, bsz=128, num_updates=3594, lr=9.99792e-05, gnorm=2.303, loss_scale=2, train_wall=11, gb_free=2.8, wall=41143
2021-06-19 06:04:40 | INFO | train_inner | epoch 002: 619 / 3002 loss=2.979, ppl=7.88, wps=5811.1, ups=0.09, wpb=64708, bsz=128, num_updates=3595, lr=9.99792e-05, gnorm=2.23, loss_scale=2, train_wall=11, gb_free=2.8, wall=41154
2021-06-19 06:04:51 | INFO | train_inner | epoch 002: 620 / 3002 loss=2.998, ppl=7.99, wps=5791, ups=0.09, wpb=64737, bsz=128, num_updates=3596, lr=9.99792e-05, gnorm=2.705, loss_scale=2, train_wall=11, gb_free=2.8, wall=41165
2021-06-19 06:05:02 | INFO | train_inner | epoch 002: 621 / 3002 loss=2.824, ppl=7.08, wps=5898.4, ups=0.09, wpb=64810, bsz=128, num_updates=3597, lr=9.99792e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=41176
2021-06-19 06:05:13 | INFO | train_inner | epoch 002: 622 / 3002 loss=2.761, ppl=6.78, wps=5928.5, ups=0.09, wpb=64799, bsz=128, num_updates=3598, lr=9.99792e-05, gnorm=2.249, loss_scale=2, train_wall=10, gb_free=2.8, wall=41187
2021-06-19 06:05:24 | INFO | train_inner | epoch 002: 623 / 3002 loss=2.77, ppl=6.82, wps=5875.4, ups=0.09, wpb=64823, bsz=128, num_updates=3599, lr=9.99792e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=41198
2021-06-19 06:05:35 | INFO | train_inner | epoch 002: 624 / 3002 loss=2.807, ppl=7, wps=5986.1, ups=0.09, wpb=64789, bsz=128, num_updates=3600, lr=9.99792e-05, gnorm=2.369, loss_scale=2, train_wall=10, gb_free=2.8, wall=41209
2021-06-19 06:05:46 | INFO | train_inner | epoch 002: 625 / 3002 loss=2.783, ppl=6.88, wps=5870.3, ups=0.09, wpb=64758, bsz=128, num_updates=3601, lr=9.99792e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=41220
2021-06-19 06:05:57 | INFO | train_inner | epoch 002: 626 / 3002 loss=2.865, ppl=7.29, wps=5881.9, ups=0.09, wpb=64808, bsz=128, num_updates=3602, lr=9.99792e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=41231
2021-06-19 06:06:08 | INFO | train_inner | epoch 002: 627 / 3002 loss=2.834, ppl=7.13, wps=5797.8, ups=0.09, wpb=64791, bsz=128, num_updates=3603, lr=9.99792e-05, gnorm=2.259, loss_scale=2, train_wall=11, gb_free=2.8, wall=41242
2021-06-19 06:06:19 | INFO | train_inner | epoch 002: 628 / 3002 loss=2.906, ppl=7.49, wps=5804, ups=0.09, wpb=64850, bsz=128, num_updates=3604, lr=9.99792e-05, gnorm=2.385, loss_scale=2, train_wall=11, gb_free=2.8, wall=41253
2021-06-19 06:06:30 | INFO | train_inner | epoch 002: 629 / 3002 loss=2.847, ppl=7.2, wps=5742.2, ups=0.09, wpb=64763, bsz=128, num_updates=3605, lr=9.99792e-05, gnorm=2.234, loss_scale=2, train_wall=11, gb_free=2.8, wall=41265
2021-06-19 06:06:41 | INFO | train_inner | epoch 002: 630 / 3002 loss=2.817, ppl=7.05, wps=5866.4, ups=0.09, wpb=64836, bsz=128, num_updates=3606, lr=9.99792e-05, gnorm=2.434, loss_scale=2, train_wall=11, gb_free=2.8, wall=41276
2021-06-19 06:06:53 | INFO | train_inner | epoch 002: 631 / 3002 loss=2.706, ppl=6.52, wps=5809.9, ups=0.09, wpb=64910, bsz=128, num_updates=3607, lr=9.99791e-05, gnorm=2.437, loss_scale=2, train_wall=11, gb_free=2.8, wall=41287
2021-06-19 06:07:04 | INFO | train_inner | epoch 002: 632 / 3002 loss=2.927, ppl=7.6, wps=5863.8, ups=0.09, wpb=64760, bsz=128, num_updates=3608, lr=9.99791e-05, gnorm=2.664, loss_scale=2, train_wall=11, gb_free=2.8, wall=41298
2021-06-19 06:07:15 | INFO | train_inner | epoch 002: 633 / 3002 loss=2.828, ppl=7.1, wps=5907.2, ups=0.09, wpb=64824, bsz=128, num_updates=3609, lr=9.99791e-05, gnorm=2.323, loss_scale=2, train_wall=11, gb_free=2.8, wall=41309
2021-06-19 06:07:25 | INFO | train_inner | epoch 002: 634 / 3002 loss=2.787, ppl=6.9, wps=5907.2, ups=0.09, wpb=64779, bsz=128, num_updates=3610, lr=9.99791e-05, gnorm=2.383, loss_scale=2, train_wall=10, gb_free=2.8, wall=41320
2021-06-19 06:07:36 | INFO | train_inner | epoch 002: 635 / 3002 loss=2.737, ppl=6.67, wps=5971.6, ups=0.09, wpb=64845, bsz=128, num_updates=3611, lr=9.99791e-05, gnorm=2.205, loss_scale=2, train_wall=10, gb_free=2.8, wall=41331
2021-06-19 06:07:47 | INFO | train_inner | epoch 002: 636 / 3002 loss=2.832, ppl=7.12, wps=5861.7, ups=0.09, wpb=64842, bsz=128, num_updates=3612, lr=9.99791e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=41342
2021-06-19 06:07:58 | INFO | train_inner | epoch 002: 637 / 3002 loss=2.703, ppl=6.51, wps=5980.1, ups=0.09, wpb=64848, bsz=128, num_updates=3613, lr=9.99791e-05, gnorm=2.266, loss_scale=2, train_wall=10, gb_free=2.8, wall=41353
2021-06-19 06:08:09 | INFO | train_inner | epoch 002: 638 / 3002 loss=2.678, ppl=6.4, wps=5776.6, ups=0.09, wpb=64866, bsz=128, num_updates=3614, lr=9.99791e-05, gnorm=3.891, loss_scale=2, train_wall=11, gb_free=2.8, wall=41364
2021-06-19 06:08:21 | INFO | train_inner | epoch 002: 639 / 3002 loss=2.805, ppl=6.99, wps=5759.1, ups=0.09, wpb=64779, bsz=128, num_updates=3615, lr=9.99791e-05, gnorm=2.402, loss_scale=2, train_wall=11, gb_free=2.8, wall=41375
2021-06-19 06:08:32 | INFO | train_inner | epoch 002: 640 / 3002 loss=2.847, ppl=7.2, wps=5882.4, ups=0.09, wpb=64867, bsz=128, num_updates=3616, lr=9.99791e-05, gnorm=2.536, loss_scale=2, train_wall=11, gb_free=2.8, wall=41386
2021-06-19 06:08:43 | INFO | train_inner | epoch 002: 641 / 3002 loss=2.833, ppl=7.13, wps=5775.5, ups=0.09, wpb=64768, bsz=128, num_updates=3617, lr=9.99791e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=41397
2021-06-19 06:08:54 | INFO | train_inner | epoch 002: 642 / 3002 loss=2.736, ppl=6.66, wps=5757.2, ups=0.09, wpb=64853, bsz=128, num_updates=3618, lr=9.99791e-05, gnorm=2.254, loss_scale=2, train_wall=11, gb_free=2.8, wall=41409
2021-06-19 06:09:05 | INFO | train_inner | epoch 002: 643 / 3002 loss=3.087, ppl=8.5, wps=5988.5, ups=0.09, wpb=64894, bsz=128, num_updates=3619, lr=9.9979e-05, gnorm=2.509, loss_scale=2, train_wall=10, gb_free=2.8, wall=41419
2021-06-19 06:09:16 | INFO | train_inner | epoch 002: 644 / 3002 loss=2.753, ppl=6.74, wps=5842.8, ups=0.09, wpb=64838, bsz=128, num_updates=3620, lr=9.9979e-05, gnorm=2.247, loss_scale=2, train_wall=11, gb_free=2.8, wall=41431
2021-06-19 06:09:27 | INFO | train_inner | epoch 002: 645 / 3002 loss=2.807, ppl=7, wps=5884.6, ups=0.09, wpb=64856, bsz=128, num_updates=3621, lr=9.9979e-05, gnorm=12.368, loss_scale=2, train_wall=11, gb_free=2.8, wall=41442
2021-06-19 06:09:38 | INFO | train_inner | epoch 002: 646 / 3002 loss=2.774, ppl=6.84, wps=5787.9, ups=0.09, wpb=64793, bsz=128, num_updates=3622, lr=9.9979e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=41453
2021-06-19 06:09:49 | INFO | train_inner | epoch 002: 647 / 3002 loss=2.729, ppl=6.63, wps=5857.1, ups=0.09, wpb=64867, bsz=128, num_updates=3623, lr=9.9979e-05, gnorm=2.297, loss_scale=2, train_wall=11, gb_free=2.8, wall=41464
2021-06-19 06:10:00 | INFO | train_inner | epoch 002: 648 / 3002 loss=2.817, ppl=7.05, wps=5886.1, ups=0.09, wpb=64810, bsz=128, num_updates=3624, lr=9.9979e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=41475
2021-06-19 06:10:12 | INFO | train_inner | epoch 002: 649 / 3002 loss=2.849, ppl=7.2, wps=5857.7, ups=0.09, wpb=64876, bsz=128, num_updates=3625, lr=9.9979e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=41486
2021-06-19 06:10:23 | INFO | train_inner | epoch 002: 650 / 3002 loss=2.871, ppl=7.32, wps=5891.3, ups=0.09, wpb=64790, bsz=128, num_updates=3626, lr=9.9979e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=41497
2021-06-19 06:10:34 | INFO | train_inner | epoch 002: 651 / 3002 loss=2.791, ppl=6.92, wps=5752.6, ups=0.09, wpb=64820, bsz=128, num_updates=3627, lr=9.9979e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=41508
2021-06-19 06:10:45 | INFO | train_inner | epoch 002: 652 / 3002 loss=2.73, ppl=6.64, wps=5874.6, ups=0.09, wpb=64907, bsz=128, num_updates=3628, lr=9.9979e-05, gnorm=2.229, loss_scale=2, train_wall=11, gb_free=2.8, wall=41519
2021-06-19 06:10:56 | INFO | train_inner | epoch 002: 653 / 3002 loss=2.714, ppl=6.56, wps=5875.4, ups=0.09, wpb=64805, bsz=128, num_updates=3629, lr=9.9979e-05, gnorm=2.365, loss_scale=2, train_wall=11, gb_free=2.8, wall=41530
2021-06-19 06:11:07 | INFO | train_inner | epoch 002: 654 / 3002 loss=2.804, ppl=6.98, wps=5886.5, ups=0.09, wpb=64846, bsz=128, num_updates=3630, lr=9.9979e-05, gnorm=2.191, loss_scale=2, train_wall=11, gb_free=2.8, wall=41541
2021-06-19 06:11:18 | INFO | train_inner | epoch 002: 655 / 3002 loss=2.669, ppl=6.36, wps=5829.9, ups=0.09, wpb=64905, bsz=128, num_updates=3631, lr=9.9979e-05, gnorm=2.436, loss_scale=2, train_wall=11, gb_free=2.8, wall=41552
2021-06-19 06:11:29 | INFO | train_inner | epoch 002: 656 / 3002 loss=2.849, ppl=7.2, wps=5731.9, ups=0.09, wpb=64815, bsz=128, num_updates=3632, lr=9.99789e-05, gnorm=2.378, loss_scale=2, train_wall=11, gb_free=2.8, wall=41564
2021-06-19 06:11:40 | INFO | train_inner | epoch 002: 657 / 3002 loss=2.785, ppl=6.89, wps=5902.5, ups=0.09, wpb=64839, bsz=128, num_updates=3633, lr=9.99789e-05, gnorm=2.391, loss_scale=2, train_wall=11, gb_free=2.8, wall=41575
2021-06-19 06:11:51 | INFO | train_inner | epoch 002: 658 / 3002 loss=2.858, ppl=7.25, wps=5833.1, ups=0.09, wpb=64834, bsz=128, num_updates=3634, lr=9.99789e-05, gnorm=2.243, loss_scale=2, train_wall=11, gb_free=2.8, wall=41586
2021-06-19 06:12:03 | INFO | train_inner | epoch 002: 659 / 3002 loss=2.859, ppl=7.25, wps=5773.9, ups=0.09, wpb=64816, bsz=128, num_updates=3635, lr=9.99789e-05, gnorm=2.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=41597
2021-06-19 06:12:14 | INFO | train_inner | epoch 002: 660 / 3002 loss=2.809, ppl=7.01, wps=5845.1, ups=0.09, wpb=64850, bsz=128, num_updates=3636, lr=9.99789e-05, gnorm=2.361, loss_scale=2, train_wall=11, gb_free=2.8, wall=41608
2021-06-19 06:12:25 | INFO | train_inner | epoch 002: 661 / 3002 loss=2.737, ppl=6.67, wps=5896.4, ups=0.09, wpb=64895, bsz=128, num_updates=3637, lr=9.99789e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=41619
2021-06-19 06:12:36 | INFO | train_inner | epoch 002: 662 / 3002 loss=2.773, ppl=6.84, wps=5862.7, ups=0.09, wpb=64772, bsz=128, num_updates=3638, lr=9.99789e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=41630
2021-06-19 06:12:47 | INFO | train_inner | epoch 002: 663 / 3002 loss=2.779, ppl=6.86, wps=5769.3, ups=0.09, wpb=64806, bsz=128, num_updates=3639, lr=9.99789e-05, gnorm=2.344, loss_scale=2, train_wall=11, gb_free=2.8, wall=41641
2021-06-19 06:12:58 | INFO | train_inner | epoch 002: 664 / 3002 loss=2.851, ppl=7.22, wps=5887.2, ups=0.09, wpb=64835, bsz=128, num_updates=3640, lr=9.99789e-05, gnorm=2.329, loss_scale=2, train_wall=11, gb_free=2.8, wall=41652
2021-06-19 06:13:09 | INFO | train_inner | epoch 002: 665 / 3002 loss=2.943, ppl=7.69, wps=5781.9, ups=0.09, wpb=64826, bsz=128, num_updates=3641, lr=9.99789e-05, gnorm=2.347, loss_scale=2, train_wall=11, gb_free=2.8, wall=41664
2021-06-19 06:13:21 | INFO | train_inner | epoch 002: 666 / 3002 loss=2.777, ppl=6.86, wps=5757.6, ups=0.09, wpb=64867, bsz=128, num_updates=3642, lr=9.99789e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=41675
2021-06-19 06:13:32 | INFO | train_inner | epoch 002: 667 / 3002 loss=2.752, ppl=6.74, wps=5680.7, ups=0.09, wpb=64879, bsz=128, num_updates=3643, lr=9.99789e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=41686
2021-06-19 06:13:43 | INFO | train_inner | epoch 002: 668 / 3002 loss=2.785, ppl=6.89, wps=5780.8, ups=0.09, wpb=64860, bsz=128, num_updates=3644, lr=9.99788e-05, gnorm=2.226, loss_scale=2, train_wall=11, gb_free=2.8, wall=41698
2021-06-19 06:13:54 | INFO | train_inner | epoch 002: 669 / 3002 loss=2.673, ppl=6.38, wps=5817.3, ups=0.09, wpb=64839, bsz=128, num_updates=3645, lr=9.99788e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=41709
2021-06-19 06:14:05 | INFO | train_inner | epoch 002: 670 / 3002 loss=2.751, ppl=6.73, wps=5901.8, ups=0.09, wpb=64849, bsz=128, num_updates=3646, lr=9.99788e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=41720
2021-06-19 06:14:16 | INFO | train_inner | epoch 002: 671 / 3002 loss=2.781, ppl=6.87, wps=5977, ups=0.09, wpb=64845, bsz=128, num_updates=3647, lr=9.99788e-05, gnorm=2.292, loss_scale=2, train_wall=10, gb_free=2.8, wall=41731
2021-06-19 06:14:27 | INFO | train_inner | epoch 002: 672 / 3002 loss=2.818, ppl=7.05, wps=5789.2, ups=0.09, wpb=64756, bsz=128, num_updates=3648, lr=9.99788e-05, gnorm=2.386, loss_scale=4, train_wall=11, gb_free=2.8, wall=41742
2021-06-19 06:14:39 | INFO | train_inner | epoch 002: 673 / 3002 loss=2.888, ppl=7.4, wps=5769.1, ups=0.09, wpb=64742, bsz=128, num_updates=3649, lr=9.99788e-05, gnorm=2.329, loss_scale=4, train_wall=11, gb_free=2.8, wall=41753
2021-06-19 06:14:50 | INFO | train_inner | epoch 002: 674 / 3002 loss=2.718, ppl=6.58, wps=5926.6, ups=0.09, wpb=64859, bsz=128, num_updates=3650, lr=9.99788e-05, gnorm=2.201, loss_scale=4, train_wall=10, gb_free=2.8, wall=41764
2021-06-19 06:15:01 | INFO | train_inner | epoch 002: 675 / 3002 loss=2.807, ppl=7, wps=5848, ups=0.09, wpb=64879, bsz=128, num_updates=3651, lr=9.99788e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=41775
2021-06-19 06:15:12 | INFO | train_inner | epoch 002: 676 / 3002 loss=2.905, ppl=7.49, wps=5747.6, ups=0.09, wpb=64754, bsz=128, num_updates=3652, lr=9.99788e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=41786
2021-06-19 06:15:23 | INFO | train_inner | epoch 002: 677 / 3002 loss=2.83, ppl=7.11, wps=5919, ups=0.09, wpb=64876, bsz=128, num_updates=3653, lr=9.99788e-05, gnorm=2.345, loss_scale=4, train_wall=11, gb_free=2.8, wall=41797
2021-06-19 06:15:34 | INFO | train_inner | epoch 002: 678 / 3002 loss=2.818, ppl=7.05, wps=5833, ups=0.09, wpb=64887, bsz=128, num_updates=3654, lr=9.99788e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=41808
2021-06-19 06:15:45 | INFO | train_inner | epoch 002: 679 / 3002 loss=2.815, ppl=7.04, wps=5887.4, ups=0.09, wpb=64822, bsz=128, num_updates=3655, lr=9.99788e-05, gnorm=2.321, loss_scale=4, train_wall=11, gb_free=2.8, wall=41819
2021-06-19 06:15:56 | INFO | train_inner | epoch 002: 680 / 3002 loss=2.662, ppl=6.33, wps=5823.4, ups=0.09, wpb=64802, bsz=128, num_updates=3656, lr=9.99788e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=41830
2021-06-19 06:16:07 | INFO | train_inner | epoch 002: 681 / 3002 loss=2.906, ppl=7.49, wps=5831.2, ups=0.09, wpb=64804, bsz=128, num_updates=3657, lr=9.99787e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=41842
2021-06-19 06:16:18 | INFO | train_inner | epoch 002: 682 / 3002 loss=2.652, ppl=6.29, wps=5986.5, ups=0.09, wpb=64812, bsz=128, num_updates=3658, lr=9.99787e-05, gnorm=2.253, loss_scale=4, train_wall=10, gb_free=2.8, wall=41852
2021-06-19 06:16:29 | INFO | train_inner | epoch 002: 683 / 3002 loss=2.8, ppl=6.96, wps=5777.3, ups=0.09, wpb=64857, bsz=128, num_updates=3659, lr=9.99787e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=41864
2021-06-19 06:16:40 | INFO | train_inner | epoch 002: 684 / 3002 loss=2.768, ppl=6.81, wps=5851.4, ups=0.09, wpb=64841, bsz=128, num_updates=3660, lr=9.99787e-05, gnorm=2.307, loss_scale=4, train_wall=11, gb_free=2.8, wall=41875
2021-06-19 06:16:51 | INFO | train_inner | epoch 002: 685 / 3002 loss=2.886, ppl=7.39, wps=5894.1, ups=0.09, wpb=64940, bsz=128, num_updates=3661, lr=9.99787e-05, gnorm=6.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=41886
2021-06-19 06:17:03 | INFO | train_inner | epoch 002: 686 / 3002 loss=2.659, ppl=6.32, wps=5832, ups=0.09, wpb=64834, bsz=128, num_updates=3662, lr=9.99787e-05, gnorm=2.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=41897
2021-06-19 06:17:14 | INFO | train_inner | epoch 002: 687 / 3002 loss=2.604, ppl=6.08, wps=5894.7, ups=0.09, wpb=64874, bsz=128, num_updates=3663, lr=9.99787e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=41908
2021-06-19 06:17:25 | INFO | train_inner | epoch 002: 688 / 3002 loss=2.795, ppl=6.94, wps=5890.5, ups=0.09, wpb=64880, bsz=128, num_updates=3664, lr=9.99787e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=41919
2021-06-19 06:17:36 | INFO | train_inner | epoch 002: 689 / 3002 loss=2.803, ppl=6.98, wps=5818, ups=0.09, wpb=64825, bsz=128, num_updates=3665, lr=9.99787e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=41930
2021-06-19 06:17:47 | INFO | train_inner | epoch 002: 690 / 3002 loss=2.746, ppl=6.71, wps=5815, ups=0.09, wpb=64848, bsz=128, num_updates=3666, lr=9.99787e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=41941
2021-06-19 06:17:58 | INFO | train_inner | epoch 002: 691 / 3002 loss=2.796, ppl=6.95, wps=5723.5, ups=0.09, wpb=64880, bsz=128, num_updates=3667, lr=9.99787e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=41952
2021-06-19 06:18:09 | INFO | train_inner | epoch 002: 692 / 3002 loss=2.816, ppl=7.04, wps=5830.7, ups=0.09, wpb=64805, bsz=128, num_updates=3668, lr=9.99787e-05, gnorm=2.351, loss_scale=4, train_wall=11, gb_free=2.8, wall=41964
2021-06-19 06:18:20 | INFO | train_inner | epoch 002: 693 / 3002 loss=3.05, ppl=8.28, wps=5806.4, ups=0.09, wpb=64788, bsz=128, num_updates=3669, lr=9.99786e-05, gnorm=2.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=41975
2021-06-19 06:18:31 | INFO | train_inner | epoch 002: 694 / 3002 loss=2.802, ppl=6.98, wps=5883.8, ups=0.09, wpb=64767, bsz=128, num_updates=3670, lr=9.99786e-05, gnorm=2.27, loss_scale=4, train_wall=11, gb_free=2.8, wall=41986
2021-06-19 06:18:43 | INFO | train_inner | epoch 002: 695 / 3002 loss=2.786, ppl=6.9, wps=5853.4, ups=0.09, wpb=64864, bsz=128, num_updates=3671, lr=9.99786e-05, gnorm=2.289, loss_scale=4, train_wall=11, gb_free=2.8, wall=41997
2021-06-19 06:18:54 | INFO | train_inner | epoch 002: 696 / 3002 loss=2.813, ppl=7.03, wps=5857, ups=0.09, wpb=64785, bsz=128, num_updates=3672, lr=9.99786e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=42008
2021-06-19 06:19:05 | INFO | train_inner | epoch 002: 697 / 3002 loss=2.851, ppl=7.21, wps=5900.4, ups=0.09, wpb=64698, bsz=128, num_updates=3673, lr=9.99786e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=42019
2021-06-19 06:19:16 | INFO | train_inner | epoch 002: 698 / 3002 loss=3.025, ppl=8.14, wps=5714, ups=0.09, wpb=64763, bsz=128, num_updates=3674, lr=9.99786e-05, gnorm=2.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=42030
2021-06-19 06:19:27 | INFO | train_inner | epoch 002: 699 / 3002 loss=2.775, ppl=6.84, wps=5862.1, ups=0.09, wpb=64831, bsz=128, num_updates=3675, lr=9.99786e-05, gnorm=2.349, loss_scale=4, train_wall=11, gb_free=2.8, wall=42041
2021-06-19 06:19:38 | INFO | train_inner | epoch 002: 700 / 3002 loss=2.706, ppl=6.52, wps=5978, ups=0.09, wpb=64923, bsz=128, num_updates=3676, lr=9.99786e-05, gnorm=2.428, loss_scale=4, train_wall=10, gb_free=2.8, wall=42052
2021-06-19 06:19:49 | INFO | train_inner | epoch 002: 701 / 3002 loss=2.804, ppl=6.98, wps=5785.3, ups=0.09, wpb=64721, bsz=128, num_updates=3677, lr=9.99786e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=42063
2021-06-19 06:20:00 | INFO | train_inner | epoch 002: 702 / 3002 loss=2.795, ppl=6.94, wps=5779.9, ups=0.09, wpb=64876, bsz=128, num_updates=3678, lr=9.99786e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=42075
2021-06-19 06:20:11 | INFO | train_inner | epoch 002: 703 / 3002 loss=2.898, ppl=7.45, wps=5771.1, ups=0.09, wpb=64738, bsz=128, num_updates=3679, lr=9.99786e-05, gnorm=3.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=42086
2021-06-19 06:20:22 | INFO | train_inner | epoch 002: 704 / 3002 loss=2.725, ppl=6.61, wps=5890.2, ups=0.09, wpb=64785, bsz=128, num_updates=3680, lr=9.99786e-05, gnorm=2.335, loss_scale=4, train_wall=11, gb_free=2.8, wall=42097
2021-06-19 06:20:33 | INFO | train_inner | epoch 002: 705 / 3002 loss=2.929, ppl=7.61, wps=6030.1, ups=0.09, wpb=64836, bsz=128, num_updates=3681, lr=9.99786e-05, gnorm=2.373, loss_scale=4, train_wall=10, gb_free=2.8, wall=42108
2021-06-19 06:20:44 | INFO | train_inner | epoch 002: 706 / 3002 loss=2.676, ppl=6.39, wps=5860.1, ups=0.09, wpb=64800, bsz=128, num_updates=3682, lr=9.99785e-05, gnorm=2.432, loss_scale=4, train_wall=11, gb_free=2.8, wall=42119
2021-06-19 06:20:55 | INFO | train_inner | epoch 002: 707 / 3002 loss=2.919, ppl=7.56, wps=5968, ups=0.09, wpb=64790, bsz=128, num_updates=3683, lr=9.99785e-05, gnorm=2.39, loss_scale=4, train_wall=10, gb_free=2.8, wall=42129
2021-06-19 06:21:06 | INFO | train_inner | epoch 002: 708 / 3002 loss=2.863, ppl=7.27, wps=5964.6, ups=0.09, wpb=64812, bsz=128, num_updates=3684, lr=9.99785e-05, gnorm=2.251, loss_scale=4, train_wall=10, gb_free=2.8, wall=42140
2021-06-19 06:21:17 | INFO | train_inner | epoch 002: 709 / 3002 loss=2.801, ppl=6.97, wps=5751.4, ups=0.09, wpb=64861, bsz=128, num_updates=3685, lr=9.99785e-05, gnorm=2.428, loss_scale=4, train_wall=11, gb_free=2.8, wall=42152
2021-06-19 06:21:28 | INFO | train_inner | epoch 002: 710 / 3002 loss=2.975, ppl=7.86, wps=5959.1, ups=0.09, wpb=64752, bsz=128, num_updates=3686, lr=9.99785e-05, gnorm=2.505, loss_scale=4, train_wall=10, gb_free=2.8, wall=42162
2021-06-19 06:21:39 | INFO | train_inner | epoch 002: 711 / 3002 loss=2.787, ppl=6.9, wps=5750.1, ups=0.09, wpb=64806, bsz=128, num_updates=3687, lr=9.99785e-05, gnorm=2.452, loss_scale=4, train_wall=11, gb_free=2.8, wall=42174
2021-06-19 06:21:50 | INFO | train_inner | epoch 002: 712 / 3002 loss=2.759, ppl=6.77, wps=5853.7, ups=0.09, wpb=64818, bsz=128, num_updates=3688, lr=9.99785e-05, gnorm=2.526, loss_scale=4, train_wall=11, gb_free=2.8, wall=42185
2021-06-19 06:22:01 | INFO | train_inner | epoch 002: 713 / 3002 loss=2.79, ppl=6.91, wps=5878.1, ups=0.09, wpb=64797, bsz=128, num_updates=3689, lr=9.99785e-05, gnorm=2.385, loss_scale=4, train_wall=11, gb_free=2.8, wall=42196
2021-06-19 06:22:13 | INFO | train_inner | epoch 002: 714 / 3002 loss=2.828, ppl=7.1, wps=5858.4, ups=0.09, wpb=64796, bsz=128, num_updates=3690, lr=9.99785e-05, gnorm=2.398, loss_scale=4, train_wall=11, gb_free=2.8, wall=42207
2021-06-19 06:22:24 | INFO | train_inner | epoch 002: 715 / 3002 loss=2.836, ppl=7.14, wps=5856.7, ups=0.09, wpb=64861, bsz=128, num_updates=3691, lr=9.99785e-05, gnorm=2.678, loss_scale=4, train_wall=11, gb_free=2.8, wall=42218
2021-06-19 06:22:35 | INFO | train_inner | epoch 002: 716 / 3002 loss=2.691, ppl=6.46, wps=5858.9, ups=0.09, wpb=64809, bsz=128, num_updates=3692, lr=9.99785e-05, gnorm=3.483, loss_scale=4, train_wall=11, gb_free=2.8, wall=42229
2021-06-19 06:22:46 | INFO | train_inner | epoch 002: 717 / 3002 loss=2.906, ppl=7.49, wps=5831.9, ups=0.09, wpb=64739, bsz=128, num_updates=3693, lr=9.99785e-05, gnorm=2.489, loss_scale=4, train_wall=11, gb_free=2.8, wall=42240
2021-06-19 06:22:57 | INFO | train_inner | epoch 002: 718 / 3002 loss=2.843, ppl=7.18, wps=5903.6, ups=0.09, wpb=64798, bsz=128, num_updates=3694, lr=9.99784e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=42251
2021-06-19 06:23:08 | INFO | train_inner | epoch 002: 719 / 3002 loss=2.784, ppl=6.89, wps=5846.9, ups=0.09, wpb=64831, bsz=128, num_updates=3695, lr=9.99784e-05, gnorm=2.546, loss_scale=4, train_wall=11, gb_free=2.8, wall=42262
2021-06-19 06:23:19 | INFO | train_inner | epoch 002: 720 / 3002 loss=2.71, ppl=6.55, wps=5885.8, ups=0.09, wpb=64874, bsz=128, num_updates=3696, lr=9.99784e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=42273
2021-06-19 06:23:30 | INFO | train_inner | epoch 002: 721 / 3002 loss=2.809, ppl=7.01, wps=5893.1, ups=0.09, wpb=64799, bsz=128, num_updates=3697, lr=9.99784e-05, gnorm=3.687, loss_scale=4, train_wall=11, gb_free=2.8, wall=42284
2021-06-19 06:23:41 | INFO | train_inner | epoch 002: 722 / 3002 loss=2.738, ppl=6.67, wps=5937.8, ups=0.09, wpb=64854, bsz=128, num_updates=3698, lr=9.99784e-05, gnorm=2.335, loss_scale=4, train_wall=10, gb_free=2.8, wall=42295
2021-06-19 06:23:52 | INFO | train_inner | epoch 002: 723 / 3002 loss=2.779, ppl=6.86, wps=5792.3, ups=0.09, wpb=64844, bsz=128, num_updates=3699, lr=9.99784e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=42306
2021-06-19 06:24:03 | INFO | train_inner | epoch 002: 724 / 3002 loss=2.834, ppl=7.13, wps=5863.4, ups=0.09, wpb=64869, bsz=128, num_updates=3700, lr=9.99784e-05, gnorm=2.329, loss_scale=4, train_wall=11, gb_free=2.8, wall=42317
2021-06-19 06:24:14 | INFO | train_inner | epoch 002: 725 / 3002 loss=2.676, ppl=6.39, wps=5871.5, ups=0.09, wpb=64837, bsz=128, num_updates=3701, lr=9.99784e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=42328
2021-06-19 06:24:25 | INFO | train_inner | epoch 002: 726 / 3002 loss=2.833, ppl=7.12, wps=5861.1, ups=0.09, wpb=64900, bsz=128, num_updates=3702, lr=9.99784e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=42339
2021-06-19 06:24:36 | INFO | train_inner | epoch 002: 727 / 3002 loss=2.787, ppl=6.9, wps=5874.2, ups=0.09, wpb=64953, bsz=128, num_updates=3703, lr=9.99784e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=42351
2021-06-19 06:24:47 | INFO | train_inner | epoch 002: 728 / 3002 loss=2.814, ppl=7.03, wps=6097.2, ups=0.09, wpb=64826, bsz=128, num_updates=3704, lr=9.99784e-05, gnorm=2.267, loss_scale=4, train_wall=10, gb_free=2.8, wall=42361
2021-06-19 06:24:58 | INFO | train_inner | epoch 002: 729 / 3002 loss=2.806, ppl=6.99, wps=5933.9, ups=0.09, wpb=64839, bsz=128, num_updates=3705, lr=9.99784e-05, gnorm=2.318, loss_scale=4, train_wall=10, gb_free=2.8, wall=42372
2021-06-19 06:25:09 | INFO | train_inner | epoch 002: 730 / 3002 loss=2.81, ppl=7.01, wps=5908, ups=0.09, wpb=64947, bsz=128, num_updates=3706, lr=9.99784e-05, gnorm=2.322, loss_scale=4, train_wall=11, gb_free=2.8, wall=42383
2021-06-19 06:25:20 | INFO | train_inner | epoch 002: 731 / 3002 loss=2.877, ppl=7.35, wps=5840.5, ups=0.09, wpb=64886, bsz=128, num_updates=3707, lr=9.99783e-05, gnorm=2.287, loss_scale=4, train_wall=11, gb_free=2.8, wall=42394
2021-06-19 06:25:31 | INFO | train_inner | epoch 002: 732 / 3002 loss=2.702, ppl=6.51, wps=5857.2, ups=0.09, wpb=64812, bsz=128, num_updates=3708, lr=9.99783e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=42405
2021-06-19 06:25:42 | INFO | train_inner | epoch 002: 733 / 3002 loss=2.812, ppl=7.02, wps=5872.9, ups=0.09, wpb=64824, bsz=128, num_updates=3709, lr=9.99783e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=42416
2021-06-19 06:25:53 | INFO | train_inner | epoch 002: 734 / 3002 loss=2.861, ppl=7.27, wps=5881, ups=0.09, wpb=64848, bsz=128, num_updates=3710, lr=9.99783e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=42427
2021-06-19 06:26:04 | INFO | train_inner | epoch 002: 735 / 3002 loss=2.871, ppl=7.32, wps=5793.7, ups=0.09, wpb=64844, bsz=128, num_updates=3711, lr=9.99783e-05, gnorm=2.346, loss_scale=4, train_wall=11, gb_free=2.8, wall=42439
2021-06-19 06:26:15 | INFO | train_inner | epoch 002: 736 / 3002 loss=2.871, ppl=7.31, wps=5821.2, ups=0.09, wpb=64783, bsz=128, num_updates=3712, lr=9.99783e-05, gnorm=2.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=42450
2021-06-19 06:26:26 | INFO | train_inner | epoch 002: 737 / 3002 loss=2.997, ppl=7.98, wps=5890.3, ups=0.09, wpb=64796, bsz=128, num_updates=3713, lr=9.99783e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=42461
2021-06-19 06:26:37 | INFO | train_inner | epoch 002: 738 / 3002 loss=2.925, ppl=7.6, wps=5932.8, ups=0.09, wpb=64919, bsz=128, num_updates=3714, lr=9.99783e-05, gnorm=2.286, loss_scale=4, train_wall=10, gb_free=2.8, wall=42472
2021-06-19 06:26:48 | INFO | train_inner | epoch 002: 739 / 3002 loss=2.598, ppl=6.05, wps=5798.2, ups=0.09, wpb=64807, bsz=128, num_updates=3715, lr=9.99783e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=42483
2021-06-19 06:26:59 | INFO | train_inner | epoch 002: 740 / 3002 loss=2.596, ppl=6.04, wps=5872.3, ups=0.09, wpb=64876, bsz=128, num_updates=3716, lr=9.99783e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=42494
2021-06-19 06:27:11 | INFO | train_inner | epoch 002: 741 / 3002 loss=2.909, ppl=7.51, wps=5821, ups=0.09, wpb=64828, bsz=128, num_updates=3717, lr=9.99783e-05, gnorm=2.326, loss_scale=4, train_wall=11, gb_free=2.8, wall=42505
2021-06-19 06:27:22 | INFO | train_inner | epoch 002: 742 / 3002 loss=2.598, ppl=6.05, wps=5928.3, ups=0.09, wpb=64893, bsz=128, num_updates=3718, lr=9.99783e-05, gnorm=2.285, loss_scale=4, train_wall=10, gb_free=2.8, wall=42516
2021-06-19 06:27:33 | INFO | train_inner | epoch 002: 743 / 3002 loss=2.699, ppl=6.49, wps=5831.3, ups=0.09, wpb=64872, bsz=128, num_updates=3719, lr=9.99782e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=42527
2021-06-19 06:27:44 | INFO | train_inner | epoch 002: 744 / 3002 loss=2.802, ppl=6.98, wps=5883, ups=0.09, wpb=64878, bsz=128, num_updates=3720, lr=9.99782e-05, gnorm=2.326, loss_scale=4, train_wall=11, gb_free=2.8, wall=42538
2021-06-19 06:27:55 | INFO | train_inner | epoch 002: 745 / 3002 loss=2.618, ppl=6.14, wps=5883, ups=0.09, wpb=64909, bsz=128, num_updates=3721, lr=9.99782e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=42549
2021-06-19 06:28:06 | INFO | train_inner | epoch 002: 746 / 3002 loss=2.697, ppl=6.48, wps=5780.8, ups=0.09, wpb=64765, bsz=128, num_updates=3722, lr=9.99782e-05, gnorm=2.308, loss_scale=4, train_wall=11, gb_free=2.8, wall=42560
2021-06-19 06:28:17 | INFO | train_inner | epoch 002: 747 / 3002 loss=2.878, ppl=7.35, wps=5799.1, ups=0.09, wpb=64783, bsz=128, num_updates=3723, lr=9.99782e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=42571
2021-06-19 06:28:28 | INFO | train_inner | epoch 002: 748 / 3002 loss=2.848, ppl=7.2, wps=5847.7, ups=0.09, wpb=64837, bsz=128, num_updates=3724, lr=9.99782e-05, gnorm=2.403, loss_scale=4, train_wall=11, gb_free=2.8, wall=42583
2021-06-19 06:28:39 | INFO | train_inner | epoch 002: 749 / 3002 loss=3.065, ppl=8.37, wps=5905.6, ups=0.09, wpb=64831, bsz=128, num_updates=3725, lr=9.99782e-05, gnorm=2.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=42594
2021-06-19 06:28:50 | INFO | train_inner | epoch 002: 750 / 3002 loss=2.716, ppl=6.57, wps=5820.3, ups=0.09, wpb=64788, bsz=128, num_updates=3726, lr=9.99782e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=42605
2021-06-19 06:29:01 | INFO | train_inner | epoch 002: 751 / 3002 loss=2.935, ppl=7.65, wps=5884.9, ups=0.09, wpb=64884, bsz=128, num_updates=3727, lr=9.99782e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=42616
2021-06-19 06:29:12 | INFO | train_inner | epoch 002: 752 / 3002 loss=2.842, ppl=7.17, wps=5829.7, ups=0.09, wpb=64804, bsz=128, num_updates=3728, lr=9.99782e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=42627
2021-06-19 06:29:24 | INFO | train_inner | epoch 002: 753 / 3002 loss=2.858, ppl=7.25, wps=5845.9, ups=0.09, wpb=64868, bsz=128, num_updates=3729, lr=9.99782e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=42638
2021-06-19 06:29:34 | INFO | train_inner | epoch 002: 754 / 3002 loss=2.896, ppl=7.45, wps=5982.9, ups=0.09, wpb=64828, bsz=128, num_updates=3730, lr=9.99782e-05, gnorm=2.393, loss_scale=4, train_wall=10, gb_free=2.8, wall=42649
2021-06-19 06:29:46 | INFO | train_inner | epoch 002: 755 / 3002 loss=2.811, ppl=7.02, wps=5834.3, ups=0.09, wpb=64794, bsz=128, num_updates=3731, lr=9.99782e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=42660
2021-06-19 06:29:57 | INFO | train_inner | epoch 002: 756 / 3002 loss=2.807, ppl=7, wps=5893.3, ups=0.09, wpb=64812, bsz=128, num_updates=3732, lr=9.99781e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=42671
2021-06-19 06:30:07 | INFO | train_inner | epoch 002: 757 / 3002 loss=2.875, ppl=7.34, wps=5908.4, ups=0.09, wpb=64852, bsz=128, num_updates=3733, lr=9.99781e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=42682
2021-06-19 06:30:19 | INFO | train_inner | epoch 002: 758 / 3002 loss=2.885, ppl=7.39, wps=5805.8, ups=0.09, wpb=64787, bsz=128, num_updates=3734, lr=9.99781e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=42693
2021-06-19 06:30:30 | INFO | train_inner | epoch 002: 759 / 3002 loss=2.811, ppl=7.02, wps=5848.2, ups=0.09, wpb=64763, bsz=128, num_updates=3735, lr=9.99781e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=42704
2021-06-19 06:30:41 | INFO | train_inner | epoch 002: 760 / 3002 loss=2.677, ppl=6.4, wps=5852.5, ups=0.09, wpb=64874, bsz=128, num_updates=3736, lr=9.99781e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=42715
2021-06-19 06:30:52 | INFO | train_inner | epoch 002: 761 / 3002 loss=2.819, ppl=7.06, wps=5713.1, ups=0.09, wpb=64794, bsz=128, num_updates=3737, lr=9.99781e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=42726
2021-06-19 06:31:03 | INFO | train_inner | epoch 002: 762 / 3002 loss=2.826, ppl=7.09, wps=5819.7, ups=0.09, wpb=64881, bsz=128, num_updates=3738, lr=9.99781e-05, gnorm=2.27, loss_scale=4, train_wall=11, gb_free=2.8, wall=42738
2021-06-19 06:31:14 | INFO | train_inner | epoch 002: 763 / 3002 loss=2.707, ppl=6.53, wps=5850.5, ups=0.09, wpb=64820, bsz=128, num_updates=3739, lr=9.99781e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=42749
2021-06-19 06:31:25 | INFO | train_inner | epoch 002: 764 / 3002 loss=2.748, ppl=6.72, wps=5892.6, ups=0.09, wpb=64832, bsz=128, num_updates=3740, lr=9.99781e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=42760
2021-06-19 06:31:36 | INFO | train_inner | epoch 002: 765 / 3002 loss=2.854, ppl=7.23, wps=5934.7, ups=0.09, wpb=64801, bsz=128, num_updates=3741, lr=9.99781e-05, gnorm=2.419, loss_scale=4, train_wall=10, gb_free=2.8, wall=42771
2021-06-19 06:31:47 | INFO | train_inner | epoch 002: 766 / 3002 loss=2.773, ppl=6.83, wps=5820.6, ups=0.09, wpb=64828, bsz=128, num_updates=3742, lr=9.99781e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=42782
2021-06-19 06:31:58 | INFO | train_inner | epoch 002: 767 / 3002 loss=2.812, ppl=7.02, wps=5885.5, ups=0.09, wpb=64821, bsz=128, num_updates=3743, lr=9.99781e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=42793
2021-06-19 06:32:10 | INFO | train_inner | epoch 002: 768 / 3002 loss=2.789, ppl=6.91, wps=5868.5, ups=0.09, wpb=64919, bsz=128, num_updates=3744, lr=9.9978e-05, gnorm=2.439, loss_scale=4, train_wall=11, gb_free=2.8, wall=42804
2021-06-19 06:32:21 | INFO | train_inner | epoch 002: 769 / 3002 loss=2.709, ppl=6.54, wps=5842.3, ups=0.09, wpb=64844, bsz=128, num_updates=3745, lr=9.9978e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=42815
2021-06-19 06:32:32 | INFO | train_inner | epoch 002: 770 / 3002 loss=2.777, ppl=6.85, wps=5820.9, ups=0.09, wpb=64814, bsz=128, num_updates=3746, lr=9.9978e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=42826
2021-06-19 06:32:43 | INFO | train_inner | epoch 002: 771 / 3002 loss=2.783, ppl=6.88, wps=5933.6, ups=0.09, wpb=64860, bsz=128, num_updates=3747, lr=9.9978e-05, gnorm=2.375, loss_scale=4, train_wall=10, gb_free=2.8, wall=42837
2021-06-19 06:32:54 | INFO | train_inner | epoch 002: 772 / 3002 loss=2.681, ppl=6.41, wps=5839.1, ups=0.09, wpb=64885, bsz=128, num_updates=3748, lr=9.9978e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=42848
2021-06-19 06:33:05 | INFO | train_inner | epoch 002: 773 / 3002 loss=2.804, ppl=6.98, wps=5782.9, ups=0.09, wpb=64838, bsz=128, num_updates=3749, lr=9.9978e-05, gnorm=3.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=42859
2021-06-19 06:33:16 | INFO | train_inner | epoch 002: 774 / 3002 loss=2.746, ppl=6.71, wps=5904.8, ups=0.09, wpb=64778, bsz=128, num_updates=3750, lr=9.9978e-05, gnorm=2.427, loss_scale=4, train_wall=11, gb_free=2.8, wall=42870
2021-06-19 06:33:27 | INFO | train_inner | epoch 002: 775 / 3002 loss=2.748, ppl=6.72, wps=5774.2, ups=0.09, wpb=64880, bsz=128, num_updates=3751, lr=9.9978e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=42882
2021-06-19 06:33:38 | INFO | train_inner | epoch 002: 776 / 3002 loss=2.88, ppl=7.36, wps=5923.4, ups=0.09, wpb=64746, bsz=128, num_updates=3752, lr=9.9978e-05, gnorm=2.232, loss_scale=4, train_wall=10, gb_free=2.8, wall=42892
2021-06-19 06:33:49 | INFO | train_inner | epoch 002: 777 / 3002 loss=2.923, ppl=7.58, wps=5851.3, ups=0.09, wpb=64809, bsz=128, num_updates=3753, lr=9.9978e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=42904
2021-06-19 06:34:00 | INFO | train_inner | epoch 002: 778 / 3002 loss=2.807, ppl=7, wps=5790.4, ups=0.09, wpb=64806, bsz=128, num_updates=3754, lr=9.9978e-05, gnorm=2.341, loss_scale=4, train_wall=11, gb_free=2.8, wall=42915
2021-06-19 06:34:12 | INFO | train_inner | epoch 002: 779 / 3002 loss=2.95, ppl=7.73, wps=5698.3, ups=0.09, wpb=64749, bsz=128, num_updates=3755, lr=9.9978e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=42926
2021-06-19 06:34:23 | INFO | train_inner | epoch 002: 780 / 3002 loss=2.557, ppl=5.89, wps=5756.2, ups=0.09, wpb=64846, bsz=128, num_updates=3756, lr=9.9978e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=42937
2021-06-19 06:34:34 | INFO | train_inner | epoch 002: 781 / 3002 loss=2.758, ppl=6.76, wps=5782.7, ups=0.09, wpb=64871, bsz=128, num_updates=3757, lr=9.99779e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=42949
2021-06-19 06:34:46 | INFO | train_inner | epoch 002: 782 / 3002 loss=2.939, ppl=7.67, wps=5709, ups=0.09, wpb=64760, bsz=128, num_updates=3758, lr=9.99779e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=42960
2021-06-19 06:34:57 | INFO | train_inner | epoch 002: 783 / 3002 loss=2.754, ppl=6.74, wps=5890.5, ups=0.09, wpb=64869, bsz=128, num_updates=3759, lr=9.99779e-05, gnorm=3.447, loss_scale=4, train_wall=11, gb_free=2.8, wall=42971
2021-06-19 06:35:08 | INFO | train_inner | epoch 002: 784 / 3002 loss=2.831, ppl=7.12, wps=5927.4, ups=0.09, wpb=64848, bsz=128, num_updates=3760, lr=9.99779e-05, gnorm=2.26, loss_scale=4, train_wall=10, gb_free=2.8, wall=42982
2021-06-19 06:35:19 | INFO | train_inner | epoch 002: 785 / 3002 loss=2.715, ppl=6.56, wps=5877.2, ups=0.09, wpb=64795, bsz=128, num_updates=3761, lr=9.99779e-05, gnorm=2.409, loss_scale=4, train_wall=11, gb_free=2.8, wall=42993
2021-06-19 06:35:30 | INFO | train_inner | epoch 002: 786 / 3002 loss=2.978, ppl=7.88, wps=5808.5, ups=0.09, wpb=64814, bsz=128, num_updates=3762, lr=9.99779e-05, gnorm=2.733, loss_scale=4, train_wall=11, gb_free=2.8, wall=43004
2021-06-19 06:35:41 | INFO | train_inner | epoch 002: 787 / 3002 loss=2.777, ppl=6.86, wps=5878.9, ups=0.09, wpb=64865, bsz=128, num_updates=3763, lr=9.99779e-05, gnorm=2.32, loss_scale=4, train_wall=11, gb_free=2.8, wall=43015
2021-06-19 06:35:52 | INFO | train_inner | epoch 002: 788 / 3002 loss=2.861, ppl=7.26, wps=5808, ups=0.09, wpb=64692, bsz=128, num_updates=3764, lr=9.99779e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=43026
2021-06-19 06:36:03 | INFO | train_inner | epoch 002: 789 / 3002 loss=2.839, ppl=7.16, wps=5727.3, ups=0.09, wpb=64838, bsz=128, num_updates=3765, lr=9.99779e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=43038
2021-06-19 06:36:14 | INFO | train_inner | epoch 002: 790 / 3002 loss=2.949, ppl=7.72, wps=5950.8, ups=0.09, wpb=64827, bsz=128, num_updates=3766, lr=9.99779e-05, gnorm=2.346, loss_scale=4, train_wall=10, gb_free=2.8, wall=43048
2021-06-19 06:36:25 | INFO | train_inner | epoch 002: 791 / 3002 loss=2.79, ppl=6.92, wps=5807.6, ups=0.09, wpb=64838, bsz=128, num_updates=3767, lr=9.99779e-05, gnorm=2.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=43060
2021-06-19 06:36:36 | INFO | train_inner | epoch 002: 792 / 3002 loss=2.742, ppl=6.69, wps=5908.2, ups=0.09, wpb=64874, bsz=128, num_updates=3768, lr=9.99779e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=43071
2021-06-19 06:36:47 | INFO | train_inner | epoch 002: 793 / 3002 loss=2.847, ppl=7.19, wps=5954.8, ups=0.09, wpb=64775, bsz=128, num_updates=3769, lr=9.99778e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=43081
2021-06-19 06:36:58 | INFO | train_inner | epoch 002: 794 / 3002 loss=2.685, ppl=6.43, wps=5873.5, ups=0.09, wpb=64859, bsz=128, num_updates=3770, lr=9.99778e-05, gnorm=2.527, loss_scale=4, train_wall=11, gb_free=2.8, wall=43093
2021-06-19 06:37:09 | INFO | train_inner | epoch 002: 795 / 3002 loss=2.846, ppl=7.19, wps=5844.9, ups=0.09, wpb=64897, bsz=128, num_updates=3771, lr=9.99778e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=43104
2021-06-19 06:37:20 | INFO | train_inner | epoch 002: 796 / 3002 loss=2.799, ppl=6.96, wps=5847.4, ups=0.09, wpb=64813, bsz=128, num_updates=3772, lr=9.99778e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=43115
2021-06-19 06:37:31 | INFO | train_inner | epoch 002: 797 / 3002 loss=2.816, ppl=7.04, wps=5978.7, ups=0.09, wpb=64852, bsz=128, num_updates=3773, lr=9.99778e-05, gnorm=2.241, loss_scale=4, train_wall=10, gb_free=2.8, wall=43126
2021-06-19 06:37:42 | INFO | train_inner | epoch 002: 798 / 3002 loss=2.807, ppl=7, wps=5849.4, ups=0.09, wpb=64847, bsz=128, num_updates=3774, lr=9.99778e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=43137
2021-06-19 06:37:54 | INFO | train_inner | epoch 002: 799 / 3002 loss=2.832, ppl=7.12, wps=5775.3, ups=0.09, wpb=64765, bsz=128, num_updates=3775, lr=9.99778e-05, gnorm=2.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=43148
2021-06-19 06:38:05 | INFO | train_inner | epoch 002: 800 / 3002 loss=2.768, ppl=6.81, wps=5766.4, ups=0.09, wpb=64815, bsz=128, num_updates=3776, lr=9.99778e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=43159
2021-06-19 06:38:16 | INFO | train_inner | epoch 002: 801 / 3002 loss=2.663, ppl=6.33, wps=5880.5, ups=0.09, wpb=64846, bsz=128, num_updates=3777, lr=9.99778e-05, gnorm=2.511, loss_scale=8, train_wall=11, gb_free=2.8, wall=43170
2021-06-19 06:38:27 | INFO | train_inner | epoch 002: 802 / 3002 loss=2.859, ppl=7.26, wps=5895.3, ups=0.09, wpb=64841, bsz=128, num_updates=3778, lr=9.99778e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=43181
2021-06-19 06:38:38 | INFO | train_inner | epoch 002: 803 / 3002 loss=2.828, ppl=7.1, wps=5778.4, ups=0.09, wpb=64882, bsz=128, num_updates=3779, lr=9.99778e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=43192
2021-06-19 06:38:49 | INFO | train_inner | epoch 002: 804 / 3002 loss=2.742, ppl=6.69, wps=5946.9, ups=0.09, wpb=64758, bsz=128, num_updates=3780, lr=9.99778e-05, gnorm=3.387, loss_scale=8, train_wall=10, gb_free=2.8, wall=43203
2021-06-19 06:39:00 | INFO | train_inner | epoch 002: 805 / 3002 loss=2.849, ppl=7.2, wps=5962.8, ups=0.09, wpb=64832, bsz=128, num_updates=3781, lr=9.99778e-05, gnorm=2.17, loss_scale=8, train_wall=10, gb_free=2.8, wall=43214
2021-06-19 06:39:11 | INFO | train_inner | epoch 002: 806 / 3002 loss=2.808, ppl=7.01, wps=5742.2, ups=0.09, wpb=64827, bsz=128, num_updates=3782, lr=9.99777e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=43225
2021-06-19 06:39:22 | INFO | train_inner | epoch 002: 807 / 3002 loss=2.811, ppl=7.02, wps=5888.3, ups=0.09, wpb=64880, bsz=128, num_updates=3783, lr=9.99777e-05, gnorm=2.177, loss_scale=8, train_wall=11, gb_free=2.8, wall=43236
2021-06-19 06:39:33 | INFO | train_inner | epoch 002: 808 / 3002 loss=2.642, ppl=6.24, wps=5845.9, ups=0.09, wpb=64772, bsz=128, num_updates=3784, lr=9.99777e-05, gnorm=2.344, loss_scale=8, train_wall=11, gb_free=2.8, wall=43248
2021-06-19 06:39:44 | INFO | train_inner | epoch 002: 809 / 3002 loss=2.77, ppl=6.82, wps=5805.4, ups=0.09, wpb=64811, bsz=128, num_updates=3785, lr=9.99777e-05, gnorm=2.328, loss_scale=8, train_wall=11, gb_free=2.8, wall=43259
2021-06-19 06:39:55 | INFO | train_inner | epoch 002: 810 / 3002 loss=2.678, ppl=6.4, wps=5899.8, ups=0.09, wpb=64864, bsz=128, num_updates=3786, lr=9.99777e-05, gnorm=2.348, loss_scale=8, train_wall=11, gb_free=2.8, wall=43270
2021-06-19 06:40:07 | INFO | train_inner | epoch 002: 811 / 3002 loss=2.821, ppl=7.07, wps=5788.7, ups=0.09, wpb=64829, bsz=128, num_updates=3787, lr=9.99777e-05, gnorm=2.377, loss_scale=8, train_wall=11, gb_free=2.8, wall=43281
2021-06-19 06:40:18 | INFO | train_inner | epoch 002: 812 / 3002 loss=2.641, ppl=6.24, wps=5804.8, ups=0.09, wpb=64795, bsz=128, num_updates=3788, lr=9.99777e-05, gnorm=2.218, loss_scale=8, train_wall=11, gb_free=2.8, wall=43292
2021-06-19 06:40:29 | INFO | train_inner | epoch 002: 813 / 3002 loss=2.777, ppl=6.85, wps=5869.9, ups=0.09, wpb=64881, bsz=128, num_updates=3789, lr=9.99777e-05, gnorm=2.197, loss_scale=8, train_wall=11, gb_free=2.8, wall=43303
2021-06-19 06:40:40 | INFO | train_inner | epoch 002: 814 / 3002 loss=2.815, ppl=7.04, wps=5900.5, ups=0.09, wpb=64792, bsz=128, num_updates=3790, lr=9.99777e-05, gnorm=2.564, loss_scale=8, train_wall=11, gb_free=2.8, wall=43314
2021-06-19 06:40:51 | INFO | train_inner | epoch 002: 815 / 3002 loss=2.777, ppl=6.86, wps=5777.4, ups=0.09, wpb=64800, bsz=128, num_updates=3791, lr=9.99777e-05, gnorm=2.384, loss_scale=8, train_wall=11, gb_free=2.8, wall=43325
2021-06-19 06:41:02 | INFO | train_inner | epoch 002: 816 / 3002 loss=2.822, ppl=7.07, wps=5832.8, ups=0.09, wpb=64877, bsz=128, num_updates=3792, lr=9.99777e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=43336
2021-06-19 06:41:13 | INFO | train_inner | epoch 002: 817 / 3002 loss=2.809, ppl=7.01, wps=5863.7, ups=0.09, wpb=64793, bsz=128, num_updates=3793, lr=9.99777e-05, gnorm=2.252, loss_scale=8, train_wall=11, gb_free=2.8, wall=43347
2021-06-19 06:41:24 | INFO | train_inner | epoch 002: 818 / 3002 loss=2.815, ppl=7.04, wps=5892.5, ups=0.09, wpb=64915, bsz=128, num_updates=3794, lr=9.99776e-05, gnorm=2.351, loss_scale=8, train_wall=11, gb_free=2.8, wall=43358
2021-06-19 06:41:35 | INFO | train_inner | epoch 002: 819 / 3002 loss=2.884, ppl=7.38, wps=5874, ups=0.09, wpb=64824, bsz=128, num_updates=3795, lr=9.99776e-05, gnorm=2.503, loss_scale=8, train_wall=11, gb_free=2.8, wall=43370
2021-06-19 06:41:47 | INFO | train_inner | epoch 002: 820 / 3002 loss=2.71, ppl=6.54, wps=5707, ups=0.09, wpb=64923, bsz=128, num_updates=3796, lr=9.99776e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=43381
2021-06-19 06:41:58 | INFO | train_inner | epoch 002: 821 / 3002 loss=2.679, ppl=6.41, wps=5837.9, ups=0.09, wpb=64860, bsz=128, num_updates=3797, lr=9.99776e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=43392
2021-06-19 06:42:09 | INFO | train_inner | epoch 002: 822 / 3002 loss=2.846, ppl=7.19, wps=5828.6, ups=0.09, wpb=64756, bsz=128, num_updates=3798, lr=9.99776e-05, gnorm=2.366, loss_scale=8, train_wall=11, gb_free=2.8, wall=43403
2021-06-19 06:42:20 | INFO | train_inner | epoch 002: 823 / 3002 loss=2.792, ppl=6.93, wps=5799.5, ups=0.09, wpb=64854, bsz=128, num_updates=3799, lr=9.99776e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=43414
2021-06-19 06:42:31 | INFO | train_inner | epoch 002: 824 / 3002 loss=2.823, ppl=7.08, wps=5790.5, ups=0.09, wpb=64947, bsz=128, num_updates=3800, lr=9.99776e-05, gnorm=2.218, loss_scale=8, train_wall=11, gb_free=2.8, wall=43426
2021-06-19 06:42:42 | INFO | train_inner | epoch 002: 825 / 3002 loss=2.805, ppl=6.99, wps=5778.1, ups=0.09, wpb=64726, bsz=128, num_updates=3801, lr=9.99776e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=43437
2021-06-19 06:42:53 | INFO | train_inner | epoch 002: 826 / 3002 loss=2.828, ppl=7.1, wps=5845.3, ups=0.09, wpb=64823, bsz=128, num_updates=3802, lr=9.99776e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=43448
2021-06-19 06:43:05 | INFO | train_inner | epoch 002: 827 / 3002 loss=2.733, ppl=6.65, wps=5782.3, ups=0.09, wpb=64836, bsz=128, num_updates=3803, lr=9.99776e-05, gnorm=2.344, loss_scale=8, train_wall=11, gb_free=2.8, wall=43459
2021-06-19 06:43:16 | INFO | train_inner | epoch 002: 828 / 3002 loss=2.679, ppl=6.4, wps=5845.4, ups=0.09, wpb=64777, bsz=128, num_updates=3804, lr=9.99776e-05, gnorm=2.51, loss_scale=8, train_wall=11, gb_free=2.8, wall=43470
2021-06-19 06:43:27 | INFO | train_inner | epoch 002: 829 / 3002 loss=2.902, ppl=7.48, wps=5806.1, ups=0.09, wpb=64817, bsz=128, num_updates=3805, lr=9.99776e-05, gnorm=2.357, loss_scale=8, train_wall=11, gb_free=2.8, wall=43481
2021-06-19 06:43:38 | INFO | train_inner | epoch 002: 830 / 3002 loss=2.823, ppl=7.07, wps=5790.5, ups=0.09, wpb=64783, bsz=128, num_updates=3806, lr=9.99776e-05, gnorm=2.384, loss_scale=8, train_wall=11, gb_free=2.8, wall=43492
2021-06-19 06:43:49 | INFO | train_inner | epoch 002: 831 / 3002 loss=2.683, ppl=6.42, wps=5869.6, ups=0.09, wpb=64779, bsz=128, num_updates=3807, lr=9.99775e-05, gnorm=2.479, loss_scale=8, train_wall=11, gb_free=2.8, wall=43503
2021-06-19 06:44:00 | INFO | train_inner | epoch 002: 832 / 3002 loss=2.833, ppl=7.13, wps=5835.5, ups=0.09, wpb=64812, bsz=128, num_updates=3808, lr=9.99775e-05, gnorm=2.39, loss_scale=8, train_wall=11, gb_free=2.8, wall=43515
2021-06-19 06:44:11 | INFO | train_inner | epoch 002: 833 / 3002 loss=2.785, ppl=6.89, wps=5858.2, ups=0.09, wpb=64876, bsz=128, num_updates=3809, lr=9.99775e-05, gnorm=2.28, loss_scale=8, train_wall=11, gb_free=2.8, wall=43526
2021-06-19 06:44:22 | INFO | train_inner | epoch 002: 834 / 3002 loss=3.089, ppl=8.51, wps=5941.8, ups=0.09, wpb=64788, bsz=128, num_updates=3810, lr=9.99775e-05, gnorm=2.385, loss_scale=8, train_wall=10, gb_free=2.8, wall=43537
2021-06-19 06:44:33 | INFO | train_inner | epoch 002: 835 / 3002 loss=2.767, ppl=6.81, wps=5831.2, ups=0.09, wpb=64747, bsz=128, num_updates=3811, lr=9.99775e-05, gnorm=2.734, loss_scale=8, train_wall=11, gb_free=2.8, wall=43548
2021-06-19 06:44:44 | INFO | train_inner | epoch 002: 836 / 3002 loss=2.666, ppl=6.35, wps=5866.9, ups=0.09, wpb=64871, bsz=128, num_updates=3812, lr=9.99775e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=43559
2021-06-19 06:44:56 | INFO | train_inner | epoch 002: 837 / 3002 loss=2.732, ppl=6.64, wps=5756, ups=0.09, wpb=64837, bsz=128, num_updates=3813, lr=9.99775e-05, gnorm=3.624, loss_scale=8, train_wall=11, gb_free=2.8, wall=43570
2021-06-19 06:45:07 | INFO | train_inner | epoch 002: 838 / 3002 loss=2.897, ppl=7.45, wps=5883.2, ups=0.09, wpb=64915, bsz=128, num_updates=3814, lr=9.99775e-05, gnorm=2.382, loss_scale=8, train_wall=11, gb_free=2.8, wall=43581
2021-06-19 06:45:18 | INFO | train_inner | epoch 002: 839 / 3002 loss=3.018, ppl=8.1, wps=5740, ups=0.09, wpb=64833, bsz=128, num_updates=3815, lr=9.99775e-05, gnorm=2.283, loss_scale=8, train_wall=11, gb_free=2.8, wall=43592
2021-06-19 06:45:29 | INFO | train_inner | epoch 002: 840 / 3002 loss=2.706, ppl=6.53, wps=5854.1, ups=0.09, wpb=64925, bsz=128, num_updates=3816, lr=9.99775e-05, gnorm=2.307, loss_scale=8, train_wall=11, gb_free=2.8, wall=43603
2021-06-19 06:45:40 | INFO | train_inner | epoch 002: 841 / 3002 loss=2.834, ppl=7.13, wps=5749.6, ups=0.09, wpb=64737, bsz=128, num_updates=3817, lr=9.99775e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=43615
2021-06-19 06:45:52 | INFO | train_inner | epoch 002: 842 / 3002 loss=2.82, ppl=7.06, wps=5712.9, ups=0.09, wpb=64816, bsz=128, num_updates=3818, lr=9.99775e-05, gnorm=2.848, loss_scale=8, train_wall=11, gb_free=2.8, wall=43626
2021-06-19 06:46:03 | INFO | train_inner | epoch 002: 843 / 3002 loss=2.756, ppl=6.75, wps=5953.2, ups=0.09, wpb=64903, bsz=128, num_updates=3819, lr=9.99774e-05, gnorm=2.385, loss_scale=8, train_wall=10, gb_free=2.8, wall=43637
2021-06-19 06:46:13 | INFO | train_inner | epoch 002: 844 / 3002 loss=2.685, ppl=6.43, wps=6003.4, ups=0.09, wpb=64756, bsz=128, num_updates=3820, lr=9.99774e-05, gnorm=2.313, loss_scale=8, train_wall=10, gb_free=2.8, wall=43648
2021-06-19 06:46:25 | INFO | train_inner | epoch 002: 845 / 3002 loss=2.743, ppl=6.69, wps=5763.7, ups=0.09, wpb=64827, bsz=128, num_updates=3821, lr=9.99774e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=43659
2021-06-19 06:46:36 | INFO | train_inner | epoch 002: 846 / 3002 loss=2.729, ppl=6.63, wps=5861.5, ups=0.09, wpb=64823, bsz=128, num_updates=3822, lr=9.99774e-05, gnorm=2.566, loss_scale=8, train_wall=11, gb_free=2.8, wall=43670
2021-06-19 06:46:47 | INFO | train_inner | epoch 002: 847 / 3002 loss=2.625, ppl=6.17, wps=5904.2, ups=0.09, wpb=64866, bsz=128, num_updates=3823, lr=9.99774e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=43681
2021-06-19 06:46:58 | INFO | train_inner | epoch 002: 848 / 3002 loss=2.709, ppl=6.54, wps=5895.1, ups=0.09, wpb=64898, bsz=128, num_updates=3824, lr=9.99774e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=43692
2021-06-19 06:47:09 | INFO | train_inner | epoch 002: 849 / 3002 loss=2.763, ppl=6.79, wps=5778.3, ups=0.09, wpb=64849, bsz=128, num_updates=3825, lr=9.99774e-05, gnorm=2.277, loss_scale=8, train_wall=11, gb_free=2.8, wall=43703
2021-06-19 06:47:20 | INFO | train_inner | epoch 002: 850 / 3002 loss=2.694, ppl=6.47, wps=5793.3, ups=0.09, wpb=64856, bsz=128, num_updates=3826, lr=9.99774e-05, gnorm=2.536, loss_scale=8, train_wall=11, gb_free=2.8, wall=43714
2021-06-19 06:47:31 | INFO | train_inner | epoch 002: 851 / 3002 loss=2.794, ppl=6.93, wps=5727.5, ups=0.09, wpb=64779, bsz=128, num_updates=3827, lr=9.99774e-05, gnorm=2.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=43726
2021-06-19 06:47:43 | INFO | train_inner | epoch 002: 852 / 3002 loss=2.924, ppl=7.59, wps=5830.2, ups=0.09, wpb=64845, bsz=128, num_updates=3828, lr=9.99774e-05, gnorm=2.322, loss_scale=8, train_wall=11, gb_free=2.8, wall=43737
2021-06-19 06:47:54 | INFO | train_inner | epoch 002: 853 / 3002 loss=2.97, ppl=7.83, wps=5865, ups=0.09, wpb=64840, bsz=128, num_updates=3829, lr=9.99774e-05, gnorm=2.353, loss_scale=8, train_wall=11, gb_free=2.8, wall=43748
2021-06-19 06:48:05 | INFO | train_inner | epoch 002: 854 / 3002 loss=2.857, ppl=7.25, wps=5760.3, ups=0.09, wpb=64807, bsz=128, num_updates=3830, lr=9.99774e-05, gnorm=2.365, loss_scale=8, train_wall=11, gb_free=2.8, wall=43759
2021-06-19 06:48:16 | INFO | train_inner | epoch 002: 855 / 3002 loss=2.754, ppl=6.75, wps=5829.6, ups=0.09, wpb=64829, bsz=128, num_updates=3831, lr=9.99774e-05, gnorm=2.584, loss_scale=8, train_wall=11, gb_free=2.8, wall=43770
2021-06-19 06:48:27 | INFO | train_inner | epoch 002: 856 / 3002 loss=2.977, ppl=7.88, wps=5883, ups=0.09, wpb=64895, bsz=128, num_updates=3832, lr=9.99773e-05, gnorm=2.86, loss_scale=8, train_wall=11, gb_free=2.8, wall=43781
2021-06-19 06:48:38 | INFO | train_inner | epoch 002: 857 / 3002 loss=2.87, ppl=7.31, wps=5841.1, ups=0.09, wpb=64809, bsz=128, num_updates=3833, lr=9.99773e-05, gnorm=2.3, loss_scale=8, train_wall=11, gb_free=2.8, wall=43792
2021-06-19 06:48:49 | INFO | train_inner | epoch 002: 858 / 3002 loss=2.808, ppl=7, wps=5853.6, ups=0.09, wpb=64817, bsz=128, num_updates=3834, lr=9.99773e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=43803
2021-06-19 06:49:00 | INFO | train_inner | epoch 002: 859 / 3002 loss=2.839, ppl=7.16, wps=5967, ups=0.09, wpb=64911, bsz=128, num_updates=3835, lr=9.99773e-05, gnorm=2.241, loss_scale=8, train_wall=10, gb_free=2.8, wall=43814
2021-06-19 06:49:11 | INFO | train_inner | epoch 002: 860 / 3002 loss=2.847, ppl=7.19, wps=5688.8, ups=0.09, wpb=64856, bsz=128, num_updates=3836, lr=9.99773e-05, gnorm=2.328, loss_scale=8, train_wall=11, gb_free=2.8, wall=43826
2021-06-19 06:49:23 | INFO | train_inner | epoch 002: 861 / 3002 loss=2.702, ppl=6.51, wps=5827.7, ups=0.09, wpb=64885, bsz=128, num_updates=3837, lr=9.99773e-05, gnorm=2.477, loss_scale=8, train_wall=11, gb_free=2.8, wall=43837
2021-06-19 06:49:34 | INFO | train_inner | epoch 002: 862 / 3002 loss=2.862, ppl=7.27, wps=5742.4, ups=0.09, wpb=64873, bsz=128, num_updates=3838, lr=9.99773e-05, gnorm=2.342, loss_scale=8, train_wall=11, gb_free=2.8, wall=43848
2021-06-19 06:49:45 | INFO | train_inner | epoch 002: 863 / 3002 loss=2.808, ppl=7, wps=5890.3, ups=0.09, wpb=64906, bsz=128, num_updates=3839, lr=9.99773e-05, gnorm=2.806, loss_scale=8, train_wall=11, gb_free=2.8, wall=43859
2021-06-19 06:49:56 | INFO | train_inner | epoch 002: 864 / 3002 loss=2.812, ppl=7.02, wps=5781.2, ups=0.09, wpb=64756, bsz=128, num_updates=3840, lr=9.99773e-05, gnorm=2.354, loss_scale=8, train_wall=11, gb_free=2.8, wall=43870
2021-06-19 06:50:07 | INFO | train_inner | epoch 002: 865 / 3002 loss=2.798, ppl=6.96, wps=5706.7, ups=0.09, wpb=64797, bsz=128, num_updates=3841, lr=9.99773e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=43882
2021-06-19 06:50:18 | INFO | train_inner | epoch 002: 866 / 3002 loss=2.906, ppl=7.49, wps=5901.4, ups=0.09, wpb=64891, bsz=128, num_updates=3842, lr=9.99773e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=43893
2021-06-19 06:50:30 | INFO | train_inner | epoch 002: 867 / 3002 loss=2.752, ppl=6.74, wps=5767.7, ups=0.09, wpb=64835, bsz=128, num_updates=3843, lr=9.99773e-05, gnorm=2.306, loss_scale=8, train_wall=11, gb_free=2.8, wall=43904
2021-06-19 06:50:41 | INFO | train_inner | epoch 002: 868 / 3002 loss=2.786, ppl=6.9, wps=5749.7, ups=0.09, wpb=64806, bsz=128, num_updates=3844, lr=9.99772e-05, gnorm=2.248, loss_scale=8, train_wall=11, gb_free=2.8, wall=43915
2021-06-19 06:50:52 | INFO | train_inner | epoch 002: 869 / 3002 loss=2.732, ppl=6.64, wps=5860.7, ups=0.09, wpb=64810, bsz=128, num_updates=3845, lr=9.99772e-05, gnorm=2.287, loss_scale=8, train_wall=11, gb_free=2.8, wall=43926
2021-06-19 06:51:03 | INFO | train_inner | epoch 002: 870 / 3002 loss=2.705, ppl=6.52, wps=5801.9, ups=0.09, wpb=64836, bsz=128, num_updates=3846, lr=9.99772e-05, gnorm=2.368, loss_scale=8, train_wall=11, gb_free=2.8, wall=43938
2021-06-19 06:51:14 | INFO | train_inner | epoch 002: 871 / 3002 loss=2.915, ppl=7.54, wps=5926.8, ups=0.09, wpb=64757, bsz=128, num_updates=3847, lr=9.99772e-05, gnorm=2.437, loss_scale=8, train_wall=10, gb_free=2.8, wall=43948
2021-06-19 06:51:25 | INFO | train_inner | epoch 002: 872 / 3002 loss=2.881, ppl=7.37, wps=5830.5, ups=0.09, wpb=64862, bsz=128, num_updates=3848, lr=9.99772e-05, gnorm=2.385, loss_scale=8, train_wall=11, gb_free=2.8, wall=43960
2021-06-19 06:51:36 | INFO | train_inner | epoch 002: 873 / 3002 loss=2.774, ppl=6.84, wps=5801.4, ups=0.09, wpb=64740, bsz=128, num_updates=3849, lr=9.99772e-05, gnorm=2.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=43971
2021-06-19 06:51:48 | INFO | train_inner | epoch 002: 874 / 3002 loss=2.767, ppl=6.81, wps=5748.4, ups=0.09, wpb=64796, bsz=128, num_updates=3850, lr=9.99772e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=43982
2021-06-19 06:51:59 | INFO | train_inner | epoch 002: 875 / 3002 loss=2.784, ppl=6.89, wps=5988, ups=0.09, wpb=64901, bsz=128, num_updates=3851, lr=9.99772e-05, gnorm=2.289, loss_scale=8, train_wall=10, gb_free=2.8, wall=43993
2021-06-19 06:52:10 | INFO | train_inner | epoch 002: 876 / 3002 loss=2.77, ppl=6.82, wps=5749.3, ups=0.09, wpb=64788, bsz=128, num_updates=3852, lr=9.99772e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=44004
2021-06-19 06:52:21 | INFO | train_inner | epoch 002: 877 / 3002 loss=2.757, ppl=6.76, wps=5898.1, ups=0.09, wpb=64851, bsz=128, num_updates=3853, lr=9.99772e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=44015
2021-06-19 06:52:32 | INFO | train_inner | epoch 002: 878 / 3002 loss=2.637, ppl=6.22, wps=5945.3, ups=0.09, wpb=64853, bsz=128, num_updates=3854, lr=9.99772e-05, gnorm=2.39, loss_scale=8, train_wall=10, gb_free=2.8, wall=44026
2021-06-19 06:52:43 | INFO | train_inner | epoch 002: 879 / 3002 loss=2.845, ppl=7.19, wps=5925, ups=0.09, wpb=64771, bsz=128, num_updates=3855, lr=9.99772e-05, gnorm=4.433, loss_scale=8, train_wall=10, gb_free=2.8, wall=44037
2021-06-19 06:52:54 | INFO | train_inner | epoch 002: 880 / 3002 loss=2.703, ppl=6.51, wps=5936.1, ups=0.09, wpb=64826, bsz=128, num_updates=3856, lr=9.99772e-05, gnorm=2.316, loss_scale=8, train_wall=10, gb_free=2.8, wall=44048
2021-06-19 06:53:05 | INFO | train_inner | epoch 002: 881 / 3002 loss=2.714, ppl=6.56, wps=5868.6, ups=0.09, wpb=64872, bsz=128, num_updates=3857, lr=9.99771e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=44059
2021-06-19 06:53:16 | INFO | train_inner | epoch 002: 882 / 3002 loss=2.765, ppl=6.8, wps=5837.1, ups=0.09, wpb=64809, bsz=128, num_updates=3858, lr=9.99771e-05, gnorm=2.335, loss_scale=8, train_wall=11, gb_free=2.8, wall=44070
2021-06-19 06:53:27 | INFO | train_inner | epoch 002: 883 / 3002 loss=2.771, ppl=6.83, wps=5812.5, ups=0.09, wpb=64800, bsz=128, num_updates=3859, lr=9.99771e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=44081
2021-06-19 06:53:38 | INFO | train_inner | epoch 002: 884 / 3002 loss=2.847, ppl=7.2, wps=5890.9, ups=0.09, wpb=64888, bsz=128, num_updates=3860, lr=9.99771e-05, gnorm=2.369, loss_scale=8, train_wall=11, gb_free=2.8, wall=44092
2021-06-19 06:53:49 | INFO | train_inner | epoch 002: 885 / 3002 loss=2.813, ppl=7.03, wps=5821.1, ups=0.09, wpb=64770, bsz=128, num_updates=3861, lr=9.99771e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=44103
2021-06-19 06:54:00 | INFO | train_inner | epoch 002: 886 / 3002 loss=2.701, ppl=6.5, wps=5850.2, ups=0.09, wpb=64843, bsz=128, num_updates=3862, lr=9.99771e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=44114
2021-06-19 06:54:11 | INFO | train_inner | epoch 002: 887 / 3002 loss=2.756, ppl=6.75, wps=5784.3, ups=0.09, wpb=64802, bsz=128, num_updates=3863, lr=9.99771e-05, gnorm=2.675, loss_scale=8, train_wall=11, gb_free=2.8, wall=44126
2021-06-19 06:54:23 | INFO | train_inner | epoch 002: 888 / 3002 loss=2.665, ppl=6.34, wps=5744.8, ups=0.09, wpb=64871, bsz=128, num_updates=3864, lr=9.99771e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=44137
2021-06-19 06:54:34 | INFO | train_inner | epoch 002: 889 / 3002 loss=3.107, ppl=8.62, wps=5821.9, ups=0.09, wpb=64781, bsz=128, num_updates=3865, lr=9.99771e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=44148
2021-06-19 06:54:45 | INFO | train_inner | epoch 002: 890 / 3002 loss=2.914, ppl=7.54, wps=5801.5, ups=0.09, wpb=64863, bsz=128, num_updates=3866, lr=9.99771e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=44159
2021-06-19 06:54:56 | INFO | train_inner | epoch 002: 891 / 3002 loss=2.755, ppl=6.75, wps=5857.8, ups=0.09, wpb=64865, bsz=128, num_updates=3867, lr=9.99771e-05, gnorm=2.39, loss_scale=8, train_wall=11, gb_free=2.8, wall=44170
2021-06-19 06:55:07 | INFO | train_inner | epoch 002: 892 / 3002 loss=2.819, ppl=7.06, wps=5915.2, ups=0.09, wpb=64826, bsz=128, num_updates=3868, lr=9.99771e-05, gnorm=2.312, loss_scale=8, train_wall=10, gb_free=2.8, wall=44181
2021-06-19 06:55:18 | INFO | train_inner | epoch 002: 893 / 3002 loss=2.652, ppl=6.29, wps=5844.6, ups=0.09, wpb=64879, bsz=128, num_updates=3869, lr=9.9977e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=44192
2021-06-19 06:55:29 | INFO | train_inner | epoch 002: 894 / 3002 loss=2.717, ppl=6.57, wps=5827.9, ups=0.09, wpb=64882, bsz=128, num_updates=3870, lr=9.9977e-05, gnorm=2.313, loss_scale=8, train_wall=11, gb_free=2.8, wall=44203
2021-06-19 06:55:40 | INFO | train_inner | epoch 002: 895 / 3002 loss=2.796, ppl=6.94, wps=5938.3, ups=0.09, wpb=64871, bsz=128, num_updates=3871, lr=9.9977e-05, gnorm=2.282, loss_scale=8, train_wall=10, gb_free=2.8, wall=44214
2021-06-19 06:55:51 | INFO | train_inner | epoch 002: 896 / 3002 loss=2.967, ppl=7.82, wps=5868.8, ups=0.09, wpb=64892, bsz=128, num_updates=3872, lr=9.9977e-05, gnorm=2.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=44225
2021-06-19 06:56:02 | INFO | train_inner | epoch 002: 897 / 3002 loss=2.752, ppl=6.74, wps=5787.5, ups=0.09, wpb=64736, bsz=128, num_updates=3873, lr=9.9977e-05, gnorm=2.262, loss_scale=8, train_wall=11, gb_free=2.8, wall=44237
2021-06-19 06:56:13 | INFO | train_inner | epoch 002: 898 / 3002 loss=2.658, ppl=6.31, wps=5849.1, ups=0.09, wpb=64921, bsz=128, num_updates=3874, lr=9.9977e-05, gnorm=2.496, loss_scale=8, train_wall=11, gb_free=2.8, wall=44248
2021-06-19 06:56:24 | INFO | train_inner | epoch 002: 899 / 3002 loss=2.947, ppl=7.71, wps=5881.7, ups=0.09, wpb=64895, bsz=128, num_updates=3875, lr=9.9977e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=44259
2021-06-19 06:56:35 | INFO | train_inner | epoch 002: 900 / 3002 loss=2.795, ppl=6.94, wps=5886, ups=0.09, wpb=64766, bsz=128, num_updates=3876, lr=9.9977e-05, gnorm=2.314, loss_scale=8, train_wall=11, gb_free=2.8, wall=44270
2021-06-19 06:56:47 | INFO | train_inner | epoch 002: 901 / 3002 loss=2.716, ppl=6.57, wps=5812.1, ups=0.09, wpb=64799, bsz=128, num_updates=3877, lr=9.9977e-05, gnorm=2.404, loss_scale=8, train_wall=11, gb_free=2.8, wall=44281
2021-06-19 06:56:57 | INFO | train_inner | epoch 002: 902 / 3002 loss=2.725, ppl=6.61, wps=6023.2, ups=0.09, wpb=64818, bsz=128, num_updates=3878, lr=9.9977e-05, gnorm=2.293, loss_scale=8, train_wall=10, gb_free=2.8, wall=44292
2021-06-19 06:57:08 | INFO | train_inner | epoch 002: 903 / 3002 loss=2.849, ppl=7.2, wps=5894.4, ups=0.09, wpb=64818, bsz=128, num_updates=3879, lr=9.9977e-05, gnorm=2.398, loss_scale=8, train_wall=11, gb_free=2.8, wall=44303
2021-06-19 06:57:20 | INFO | train_inner | epoch 002: 904 / 3002 loss=2.863, ppl=7.27, wps=5816.1, ups=0.09, wpb=64849, bsz=128, num_updates=3880, lr=9.9977e-05, gnorm=2.343, loss_scale=8, train_wall=11, gb_free=2.8, wall=44314
2021-06-19 06:57:31 | INFO | train_inner | epoch 002: 905 / 3002 loss=2.779, ppl=6.86, wps=5833.2, ups=0.09, wpb=64804, bsz=128, num_updates=3881, lr=9.9977e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=44325
2021-06-19 06:57:42 | INFO | train_inner | epoch 002: 906 / 3002 loss=2.949, ppl=7.72, wps=5787.7, ups=0.09, wpb=64761, bsz=128, num_updates=3882, lr=9.99769e-05, gnorm=2.298, loss_scale=8, train_wall=11, gb_free=2.8, wall=44336
2021-06-19 06:57:53 | INFO | train_inner | epoch 002: 907 / 3002 loss=2.725, ppl=6.61, wps=5790.3, ups=0.09, wpb=64891, bsz=128, num_updates=3883, lr=9.99769e-05, gnorm=2.297, loss_scale=8, train_wall=11, gb_free=2.8, wall=44347
2021-06-19 06:58:04 | INFO | train_inner | epoch 002: 908 / 3002 loss=2.917, ppl=7.55, wps=5813.8, ups=0.09, wpb=64861, bsz=128, num_updates=3884, lr=9.99769e-05, gnorm=2.47, loss_scale=8, train_wall=11, gb_free=2.8, wall=44359
2021-06-19 06:58:15 | INFO | train_inner | epoch 002: 909 / 3002 loss=2.627, ppl=6.18, wps=5855.6, ups=0.09, wpb=64860, bsz=128, num_updates=3885, lr=9.99769e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=44370
2021-06-19 06:58:26 | INFO | train_inner | epoch 002: 910 / 3002 loss=2.732, ppl=6.64, wps=6000.3, ups=0.09, wpb=64907, bsz=128, num_updates=3886, lr=9.99769e-05, gnorm=2.324, loss_scale=8, train_wall=10, gb_free=2.8, wall=44380
2021-06-19 06:58:37 | INFO | train_inner | epoch 002: 911 / 3002 loss=2.566, ppl=5.92, wps=5762.1, ups=0.09, wpb=64820, bsz=128, num_updates=3887, lr=9.99769e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=44392
2021-06-19 06:58:48 | INFO | train_inner | epoch 002: 912 / 3002 loss=2.691, ppl=6.46, wps=5867.9, ups=0.09, wpb=64761, bsz=128, num_updates=3888, lr=9.99769e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=44403
2021-06-19 06:58:59 | INFO | train_inner | epoch 002: 913 / 3002 loss=2.904, ppl=7.49, wps=5836.7, ups=0.09, wpb=64826, bsz=128, num_updates=3889, lr=9.99769e-05, gnorm=2.496, loss_scale=8, train_wall=11, gb_free=2.8, wall=44414
2021-06-19 06:59:10 | INFO | train_inner | epoch 002: 914 / 3002 loss=2.797, ppl=6.95, wps=5921.1, ups=0.09, wpb=64908, bsz=128, num_updates=3890, lr=9.99769e-05, gnorm=2.339, loss_scale=8, train_wall=10, gb_free=2.8, wall=44425
2021-06-19 06:59:21 | INFO | train_inner | epoch 002: 915 / 3002 loss=2.891, ppl=7.42, wps=5923.4, ups=0.09, wpb=64861, bsz=128, num_updates=3891, lr=9.99769e-05, gnorm=2.292, loss_scale=8, train_wall=10, gb_free=2.8, wall=44436
2021-06-19 06:59:33 | INFO | train_inner | epoch 002: 916 / 3002 loss=2.765, ppl=6.8, wps=5790.3, ups=0.09, wpb=64889, bsz=128, num_updates=3892, lr=9.99769e-05, gnorm=2.351, loss_scale=8, train_wall=11, gb_free=2.8, wall=44447
2021-06-19 06:59:44 | INFO | train_inner | epoch 002: 917 / 3002 loss=2.943, ppl=7.69, wps=5714.7, ups=0.09, wpb=64874, bsz=128, num_updates=3893, lr=9.99769e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=44458
2021-06-19 06:59:55 | INFO | train_inner | epoch 002: 918 / 3002 loss=2.728, ppl=6.62, wps=5898.5, ups=0.09, wpb=64902, bsz=128, num_updates=3894, lr=9.99768e-05, gnorm=2.408, loss_scale=8, train_wall=11, gb_free=2.8, wall=44469
2021-06-19 07:00:06 | INFO | train_inner | epoch 002: 919 / 3002 loss=2.849, ppl=7.2, wps=5903.4, ups=0.09, wpb=64883, bsz=128, num_updates=3895, lr=9.99768e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=44480
2021-06-19 07:00:17 | INFO | train_inner | epoch 002: 920 / 3002 loss=2.751, ppl=6.73, wps=5807.4, ups=0.09, wpb=64867, bsz=128, num_updates=3896, lr=9.99768e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=44491
2021-06-19 07:00:28 | INFO | train_inner | epoch 002: 921 / 3002 loss=2.973, ppl=7.85, wps=5767.8, ups=0.09, wpb=64853, bsz=128, num_updates=3897, lr=9.99768e-05, gnorm=2.314, loss_scale=8, train_wall=11, gb_free=2.8, wall=44503
2021-06-19 07:00:40 | INFO | train_inner | epoch 002: 922 / 3002 loss=2.949, ppl=7.72, wps=5701.6, ups=0.09, wpb=64814, bsz=128, num_updates=3898, lr=9.99768e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=44514
2021-06-19 07:00:51 | INFO | train_inner | epoch 002: 923 / 3002 loss=2.787, ppl=6.9, wps=5840.1, ups=0.09, wpb=64774, bsz=128, num_updates=3899, lr=9.99768e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=44525
2021-06-19 07:01:02 | INFO | train_inner | epoch 002: 924 / 3002 loss=2.704, ppl=6.52, wps=5869.1, ups=0.09, wpb=64901, bsz=128, num_updates=3900, lr=9.99768e-05, gnorm=2.378, loss_scale=8, train_wall=11, gb_free=2.8, wall=44536
2021-06-19 07:01:13 | INFO | train_inner | epoch 002: 925 / 3002 loss=2.797, ppl=6.95, wps=5778.3, ups=0.09, wpb=64762, bsz=128, num_updates=3901, lr=9.99768e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=44547
2021-06-19 07:01:24 | INFO | train_inner | epoch 002: 926 / 3002 loss=2.626, ppl=6.17, wps=5834, ups=0.09, wpb=64831, bsz=128, num_updates=3902, lr=9.99768e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=44559
2021-06-19 07:01:35 | INFO | train_inner | epoch 002: 927 / 3002 loss=2.873, ppl=7.32, wps=5814.5, ups=0.09, wpb=64740, bsz=128, num_updates=3903, lr=9.99768e-05, gnorm=2.31, loss_scale=8, train_wall=11, gb_free=2.8, wall=44570
2021-06-19 07:01:46 | INFO | train_inner | epoch 002: 928 / 3002 loss=2.705, ppl=6.52, wps=5891, ups=0.09, wpb=64941, bsz=128, num_updates=3904, lr=9.99768e-05, gnorm=2.338, loss_scale=16, train_wall=11, gb_free=2.8, wall=44581
2021-06-19 07:01:57 | INFO | train_inner | epoch 002: 929 / 3002 loss=2.742, ppl=6.69, wps=5904.3, ups=0.09, wpb=64879, bsz=128, num_updates=3905, lr=9.99768e-05, gnorm=2.374, loss_scale=16, train_wall=11, gb_free=2.8, wall=44592
2021-06-19 07:02:09 | INFO | train_inner | epoch 002: 930 / 3002 loss=2.78, ppl=6.87, wps=5787.6, ups=0.09, wpb=64773, bsz=128, num_updates=3906, lr=9.99768e-05, gnorm=2.336, loss_scale=16, train_wall=11, gb_free=2.8, wall=44603
2021-06-19 07:02:20 | INFO | train_inner | epoch 002: 931 / 3002 loss=2.821, ppl=7.07, wps=5814.7, ups=0.09, wpb=64831, bsz=128, num_updates=3907, lr=9.99767e-05, gnorm=2.62, loss_scale=16, train_wall=11, gb_free=2.8, wall=44614
2021-06-19 07:02:31 | INFO | train_inner | epoch 002: 932 / 3002 loss=2.944, ppl=7.69, wps=5775.5, ups=0.09, wpb=64845, bsz=128, num_updates=3908, lr=9.99767e-05, gnorm=2.317, loss_scale=16, train_wall=11, gb_free=2.8, wall=44625
2021-06-19 07:02:42 | INFO | train_inner | epoch 002: 933 / 3002 loss=2.79, ppl=6.92, wps=5887.7, ups=0.09, wpb=64785, bsz=128, num_updates=3909, lr=9.99767e-05, gnorm=2.34, loss_scale=16, train_wall=11, gb_free=2.8, wall=44636
2021-06-19 07:02:53 | INFO | train_inner | epoch 002: 934 / 3002 loss=2.734, ppl=6.65, wps=5978.5, ups=0.09, wpb=64828, bsz=128, num_updates=3910, lr=9.99767e-05, gnorm=2.238, loss_scale=16, train_wall=10, gb_free=2.8, wall=44647
2021-06-19 07:03:04 | INFO | train_inner | epoch 002: 935 / 3002 loss=2.869, ppl=7.31, wps=5767.3, ups=0.09, wpb=64823, bsz=128, num_updates=3911, lr=9.99767e-05, gnorm=2.172, loss_scale=16, train_wall=11, gb_free=2.8, wall=44658
2021-06-19 07:03:15 | INFO | train_inner | epoch 002: 936 / 3002 loss=2.918, ppl=7.56, wps=5877.5, ups=0.09, wpb=64891, bsz=128, num_updates=3912, lr=9.99767e-05, gnorm=2.311, loss_scale=16, train_wall=11, gb_free=2.8, wall=44669
2021-06-19 07:03:26 | INFO | train_inner | epoch 002: 937 / 3002 loss=2.852, ppl=7.22, wps=5853.8, ups=0.09, wpb=64817, bsz=128, num_updates=3913, lr=9.99767e-05, gnorm=2.224, loss_scale=16, train_wall=11, gb_free=2.8, wall=44680
2021-06-19 07:03:37 | INFO | train_inner | epoch 002: 938 / 3002 loss=2.772, ppl=6.83, wps=5791.1, ups=0.09, wpb=64786, bsz=128, num_updates=3914, lr=9.99767e-05, gnorm=2.268, loss_scale=16, train_wall=11, gb_free=2.8, wall=44692
2021-06-19 07:03:48 | INFO | train_inner | epoch 002: 939 / 3002 loss=2.569, ppl=5.93, wps=5804.5, ups=0.09, wpb=64796, bsz=128, num_updates=3915, lr=9.99767e-05, gnorm=2.308, loss_scale=16, train_wall=11, gb_free=2.8, wall=44703
2021-06-19 07:03:59 | INFO | train_inner | epoch 002: 940 / 3002 loss=2.841, ppl=7.16, wps=5871.4, ups=0.09, wpb=64779, bsz=128, num_updates=3916, lr=9.99767e-05, gnorm=2.282, loss_scale=16, train_wall=11, gb_free=2.8, wall=44714
2021-06-19 07:04:10 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-19 07:04:21 | INFO | train_inner | epoch 002: 942 / 3002 loss=2.91, ppl=7.52, wps=2961, ups=0.05, wpb=64821, bsz=128, num_updates=3917, lr=9.99767e-05, gnorm=2.284, loss_scale=8, train_wall=21, gb_free=2.8, wall=44736
2021-06-19 07:04:33 | INFO | train_inner | epoch 002: 943 / 3002 loss=2.88, ppl=7.36, wps=5756.8, ups=0.09, wpb=64836, bsz=128, num_updates=3918, lr=9.99767e-05, gnorm=2.35, loss_scale=8, train_wall=11, gb_free=2.8, wall=44747
2021-06-19 07:04:44 | INFO | train_inner | epoch 002: 944 / 3002 loss=2.82, ppl=7.06, wps=5946.6, ups=0.09, wpb=64846, bsz=128, num_updates=3919, lr=9.99766e-05, gnorm=2.343, loss_scale=8, train_wall=10, gb_free=2.8, wall=44758
2021-06-19 07:04:55 | INFO | train_inner | epoch 002: 945 / 3002 loss=2.687, ppl=6.44, wps=5823, ups=0.09, wpb=64822, bsz=128, num_updates=3920, lr=9.99766e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=44769
2021-06-19 07:05:06 | INFO | train_inner | epoch 002: 946 / 3002 loss=2.791, ppl=6.92, wps=5782.2, ups=0.09, wpb=64775, bsz=128, num_updates=3921, lr=9.99766e-05, gnorm=2.312, loss_scale=8, train_wall=11, gb_free=2.8, wall=44780
2021-06-19 07:05:17 | INFO | train_inner | epoch 002: 947 / 3002 loss=2.7, ppl=6.5, wps=5832.6, ups=0.09, wpb=64827, bsz=128, num_updates=3922, lr=9.99766e-05, gnorm=2.255, loss_scale=8, train_wall=11, gb_free=2.8, wall=44791
2021-06-19 07:05:28 | INFO | train_inner | epoch 002: 948 / 3002 loss=2.681, ppl=6.41, wps=5884.5, ups=0.09, wpb=64858, bsz=128, num_updates=3923, lr=9.99766e-05, gnorm=2.376, loss_scale=8, train_wall=11, gb_free=2.8, wall=44802
2021-06-19 07:05:39 | INFO | train_inner | epoch 002: 949 / 3002 loss=2.754, ppl=6.74, wps=5941, ups=0.09, wpb=64857, bsz=128, num_updates=3924, lr=9.99766e-05, gnorm=2.276, loss_scale=8, train_wall=10, gb_free=2.8, wall=44813
2021-06-19 07:05:50 | INFO | train_inner | epoch 002: 950 / 3002 loss=2.716, ppl=6.57, wps=5705.2, ups=0.09, wpb=64873, bsz=128, num_updates=3925, lr=9.99766e-05, gnorm=2.448, loss_scale=8, train_wall=11, gb_free=2.8, wall=44825
2021-06-19 07:06:02 | INFO | train_inner | epoch 002: 951 / 3002 loss=2.663, ppl=6.33, wps=5788.5, ups=0.09, wpb=64910, bsz=128, num_updates=3926, lr=9.99766e-05, gnorm=2.36, loss_scale=8, train_wall=11, gb_free=2.8, wall=44836
2021-06-19 07:06:12 | INFO | train_inner | epoch 002: 952 / 3002 loss=2.718, ppl=6.58, wps=5904.3, ups=0.09, wpb=64845, bsz=128, num_updates=3927, lr=9.99766e-05, gnorm=2.255, loss_scale=8, train_wall=11, gb_free=2.8, wall=44847
2021-06-19 07:06:24 | INFO | train_inner | epoch 002: 953 / 3002 loss=2.869, ppl=7.31, wps=5745.8, ups=0.09, wpb=64808, bsz=128, num_updates=3928, lr=9.99766e-05, gnorm=2.332, loss_scale=8, train_wall=11, gb_free=2.8, wall=44858
2021-06-19 07:06:35 | INFO | train_inner | epoch 002: 954 / 3002 loss=2.879, ppl=7.36, wps=5802.9, ups=0.09, wpb=64844, bsz=128, num_updates=3929, lr=9.99766e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=44869
2021-06-19 07:06:46 | INFO | train_inner | epoch 002: 955 / 3002 loss=2.899, ppl=7.46, wps=5846.8, ups=0.09, wpb=64813, bsz=128, num_updates=3930, lr=9.99766e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=44880
2021-06-19 07:06:57 | INFO | train_inner | epoch 002: 956 / 3002 loss=2.763, ppl=6.79, wps=5806.3, ups=0.09, wpb=64854, bsz=128, num_updates=3931, lr=9.99766e-05, gnorm=2.504, loss_scale=8, train_wall=11, gb_free=2.8, wall=44892
2021-06-19 07:07:08 | INFO | train_inner | epoch 002: 957 / 3002 loss=2.704, ppl=6.52, wps=5968.8, ups=0.09, wpb=64912, bsz=128, num_updates=3932, lr=9.99765e-05, gnorm=2.346, loss_scale=8, train_wall=10, gb_free=2.8, wall=44902
2021-06-19 07:07:19 | INFO | train_inner | epoch 002: 958 / 3002 loss=2.758, ppl=6.77, wps=5826.5, ups=0.09, wpb=64838, bsz=128, num_updates=3933, lr=9.99765e-05, gnorm=2.391, loss_scale=8, train_wall=11, gb_free=2.8, wall=44914
2021-06-19 07:07:30 | INFO | train_inner | epoch 002: 959 / 3002 loss=2.837, ppl=7.15, wps=5800.2, ups=0.09, wpb=64841, bsz=128, num_updates=3934, lr=9.99765e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=44925
2021-06-19 07:07:41 | INFO | train_inner | epoch 002: 960 / 3002 loss=2.827, ppl=7.1, wps=5868.5, ups=0.09, wpb=64847, bsz=128, num_updates=3935, lr=9.99765e-05, gnorm=2.342, loss_scale=8, train_wall=11, gb_free=2.8, wall=44936
2021-06-19 07:07:53 | INFO | train_inner | epoch 002: 961 / 3002 loss=2.697, ppl=6.48, wps=5825.6, ups=0.09, wpb=64757, bsz=128, num_updates=3936, lr=9.99765e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=44947
2021-06-19 07:08:04 | INFO | train_inner | epoch 002: 962 / 3002 loss=2.636, ppl=6.21, wps=5803.9, ups=0.09, wpb=64790, bsz=128, num_updates=3937, lr=9.99765e-05, gnorm=2.392, loss_scale=8, train_wall=11, gb_free=2.8, wall=44958
2021-06-19 07:08:15 | INFO | train_inner | epoch 002: 963 / 3002 loss=2.779, ppl=6.86, wps=5791.8, ups=0.09, wpb=64814, bsz=128, num_updates=3938, lr=9.99765e-05, gnorm=2.397, loss_scale=8, train_wall=11, gb_free=2.8, wall=44969
2021-06-19 07:08:26 | INFO | train_inner | epoch 002: 964 / 3002 loss=2.814, ppl=7.03, wps=5805, ups=0.09, wpb=64772, bsz=128, num_updates=3939, lr=9.99765e-05, gnorm=2.338, loss_scale=8, train_wall=11, gb_free=2.8, wall=44980
2021-06-19 07:08:37 | INFO | train_inner | epoch 002: 965 / 3002 loss=2.797, ppl=6.95, wps=5890.2, ups=0.09, wpb=64771, bsz=128, num_updates=3940, lr=9.99765e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=44991
2021-06-19 07:08:48 | INFO | train_inner | epoch 002: 966 / 3002 loss=2.749, ppl=6.72, wps=5917.8, ups=0.09, wpb=64888, bsz=128, num_updates=3941, lr=9.99765e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=45002
2021-06-19 07:08:59 | INFO | train_inner | epoch 002: 967 / 3002 loss=2.858, ppl=7.25, wps=5892.4, ups=0.09, wpb=64781, bsz=128, num_updates=3942, lr=9.99765e-05, gnorm=2.518, loss_scale=8, train_wall=11, gb_free=2.8, wall=45013
2021-06-19 07:09:10 | INFO | train_inner | epoch 002: 968 / 3002 loss=2.835, ppl=7.14, wps=5916.4, ups=0.09, wpb=64914, bsz=128, num_updates=3943, lr=9.99765e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=45024
2021-06-19 07:09:21 | INFO | train_inner | epoch 002: 969 / 3002 loss=2.777, ppl=6.85, wps=5800.3, ups=0.09, wpb=64820, bsz=128, num_updates=3944, lr=9.99764e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=45036
2021-06-19 07:09:32 | INFO | train_inner | epoch 002: 970 / 3002 loss=2.678, ppl=6.4, wps=5901.5, ups=0.09, wpb=64790, bsz=128, num_updates=3945, lr=9.99764e-05, gnorm=2.265, loss_scale=8, train_wall=11, gb_free=2.8, wall=45046
2021-06-19 07:09:43 | INFO | train_inner | epoch 002: 971 / 3002 loss=2.749, ppl=6.72, wps=5846.6, ups=0.09, wpb=64829, bsz=128, num_updates=3946, lr=9.99764e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=45058
2021-06-19 07:09:54 | INFO | train_inner | epoch 002: 972 / 3002 loss=2.729, ppl=6.63, wps=5918.6, ups=0.09, wpb=64818, bsz=128, num_updates=3947, lr=9.99764e-05, gnorm=2.242, loss_scale=8, train_wall=11, gb_free=2.8, wall=45069
2021-06-19 07:10:05 | INFO | train_inner | epoch 002: 973 / 3002 loss=2.748, ppl=6.72, wps=5759.8, ups=0.09, wpb=64848, bsz=128, num_updates=3948, lr=9.99764e-05, gnorm=2.554, loss_scale=8, train_wall=11, gb_free=2.8, wall=45080
2021-06-19 07:10:16 | INFO | train_inner | epoch 002: 974 / 3002 loss=2.737, ppl=6.67, wps=5892.6, ups=0.09, wpb=64868, bsz=128, num_updates=3949, lr=9.99764e-05, gnorm=2.393, loss_scale=8, train_wall=11, gb_free=2.8, wall=45091
2021-06-19 07:10:28 | INFO | train_inner | epoch 002: 975 / 3002 loss=2.844, ppl=7.18, wps=5830.7, ups=0.09, wpb=64871, bsz=128, num_updates=3950, lr=9.99764e-05, gnorm=2.262, loss_scale=8, train_wall=11, gb_free=2.8, wall=45102
2021-06-19 07:10:39 | INFO | train_inner | epoch 002: 976 / 3002 loss=2.688, ppl=6.44, wps=5721.7, ups=0.09, wpb=64825, bsz=128, num_updates=3951, lr=9.99764e-05, gnorm=2.347, loss_scale=8, train_wall=11, gb_free=2.8, wall=45113
2021-06-19 07:10:50 | INFO | train_inner | epoch 002: 977 / 3002 loss=2.609, ppl=6.1, wps=5902.8, ups=0.09, wpb=64787, bsz=128, num_updates=3952, lr=9.99764e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=45124
2021-06-19 07:11:01 | INFO | train_inner | epoch 002: 978 / 3002 loss=2.765, ppl=6.8, wps=5908.3, ups=0.09, wpb=64808, bsz=128, num_updates=3953, lr=9.99764e-05, gnorm=2.416, loss_scale=8, train_wall=10, gb_free=2.8, wall=45135
2021-06-19 07:11:12 | INFO | train_inner | epoch 002: 979 / 3002 loss=2.779, ppl=6.86, wps=5805.9, ups=0.09, wpb=64899, bsz=128, num_updates=3954, lr=9.99764e-05, gnorm=2.269, loss_scale=8, train_wall=11, gb_free=2.8, wall=45146
2021-06-19 07:11:23 | INFO | train_inner | epoch 002: 980 / 3002 loss=2.685, ppl=6.43, wps=5881.4, ups=0.09, wpb=64786, bsz=128, num_updates=3955, lr=9.99764e-05, gnorm=2.742, loss_scale=8, train_wall=11, gb_free=2.8, wall=45157
2021-06-19 07:11:34 | INFO | train_inner | epoch 002: 981 / 3002 loss=2.795, ppl=6.94, wps=5819.4, ups=0.09, wpb=64842, bsz=128, num_updates=3956, lr=9.99764e-05, gnorm=2.528, loss_scale=8, train_wall=11, gb_free=2.8, wall=45169
2021-06-19 07:11:45 | INFO | train_inner | epoch 002: 982 / 3002 loss=2.797, ppl=6.95, wps=5786.4, ups=0.09, wpb=64745, bsz=128, num_updates=3957, lr=9.99763e-05, gnorm=3.612, loss_scale=8, train_wall=11, gb_free=2.8, wall=45180
2021-06-19 07:11:56 | INFO | train_inner | epoch 002: 983 / 3002 loss=2.956, ppl=7.76, wps=5973.5, ups=0.09, wpb=64819, bsz=128, num_updates=3958, lr=9.99763e-05, gnorm=2.357, loss_scale=8, train_wall=10, gb_free=2.8, wall=45191
2021-06-19 07:12:07 | INFO | train_inner | epoch 002: 984 / 3002 loss=2.734, ppl=6.65, wps=5948, ups=0.09, wpb=64769, bsz=128, num_updates=3959, lr=9.99763e-05, gnorm=2.261, loss_scale=8, train_wall=10, gb_free=2.8, wall=45201
2021-06-19 07:12:18 | INFO | train_inner | epoch 002: 985 / 3002 loss=2.825, ppl=7.09, wps=5879.2, ups=0.09, wpb=64780, bsz=128, num_updates=3960, lr=9.99763e-05, gnorm=2.199, loss_scale=8, train_wall=11, gb_free=2.8, wall=45212
2021-06-19 07:12:29 | INFO | train_inner | epoch 002: 986 / 3002 loss=2.946, ppl=7.71, wps=5972.9, ups=0.09, wpb=64804, bsz=128, num_updates=3961, lr=9.99763e-05, gnorm=2.916, loss_scale=8, train_wall=10, gb_free=2.8, wall=45223
2021-06-19 07:12:40 | INFO | train_inner | epoch 002: 987 / 3002 loss=2.915, ppl=7.54, wps=5778.3, ups=0.09, wpb=64713, bsz=128, num_updates=3962, lr=9.99763e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=45235
2021-06-19 07:12:51 | INFO | train_inner | epoch 002: 988 / 3002 loss=2.772, ppl=6.83, wps=5845.3, ups=0.09, wpb=64890, bsz=128, num_updates=3963, lr=9.99763e-05, gnorm=2.335, loss_scale=8, train_wall=11, gb_free=2.8, wall=45246
2021-06-19 07:13:02 | INFO | train_inner | epoch 002: 989 / 3002 loss=2.829, ppl=7.11, wps=5824.3, ups=0.09, wpb=64791, bsz=128, num_updates=3964, lr=9.99763e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=45257
2021-06-19 07:13:14 | INFO | train_inner | epoch 002: 990 / 3002 loss=2.763, ppl=6.79, wps=5839.3, ups=0.09, wpb=64893, bsz=128, num_updates=3965, lr=9.99763e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=45268
2021-06-19 07:13:25 | INFO | train_inner | epoch 002: 991 / 3002 loss=2.707, ppl=6.53, wps=5831.8, ups=0.09, wpb=64832, bsz=128, num_updates=3966, lr=9.99763e-05, gnorm=2.187, loss_scale=8, train_wall=11, gb_free=2.8, wall=45279
2021-06-19 07:13:36 | INFO | train_inner | epoch 002: 992 / 3002 loss=2.786, ppl=6.9, wps=5912.2, ups=0.09, wpb=64847, bsz=128, num_updates=3967, lr=9.99763e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=45290
2021-06-19 07:13:47 | INFO | train_inner | epoch 002: 993 / 3002 loss=2.921, ppl=7.58, wps=5729.9, ups=0.09, wpb=64851, bsz=128, num_updates=3968, lr=9.99763e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=45301
2021-06-19 07:13:58 | INFO | train_inner | epoch 002: 994 / 3002 loss=2.865, ppl=7.29, wps=5788.6, ups=0.09, wpb=64820, bsz=128, num_updates=3969, lr=9.99762e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=45312
2021-06-19 07:14:09 | INFO | train_inner | epoch 002: 995 / 3002 loss=2.719, ppl=6.58, wps=5910.9, ups=0.09, wpb=64850, bsz=128, num_updates=3970, lr=9.99762e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=45323
2021-06-19 07:14:20 | INFO | train_inner | epoch 002: 996 / 3002 loss=2.631, ppl=6.19, wps=5837.4, ups=0.09, wpb=64779, bsz=128, num_updates=3971, lr=9.99762e-05, gnorm=2.425, loss_scale=8, train_wall=11, gb_free=2.8, wall=45335
2021-06-19 07:14:31 | INFO | train_inner | epoch 002: 997 / 3002 loss=2.887, ppl=7.4, wps=6000.1, ups=0.09, wpb=64883, bsz=128, num_updates=3972, lr=9.99762e-05, gnorm=2.299, loss_scale=8, train_wall=10, gb_free=2.8, wall=45345
2021-06-19 07:14:42 | INFO | train_inner | epoch 002: 998 / 3002 loss=2.808, ppl=7, wps=5911.1, ups=0.09, wpb=64920, bsz=128, num_updates=3973, lr=9.99762e-05, gnorm=2.312, loss_scale=8, train_wall=11, gb_free=2.8, wall=45356
2021-06-19 07:14:53 | INFO | train_inner | epoch 002: 999 / 3002 loss=2.727, ppl=6.62, wps=5868.5, ups=0.09, wpb=64863, bsz=128, num_updates=3974, lr=9.99762e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=45367
2021-06-19 07:15:04 | INFO | train_inner | epoch 002: 1000 / 3002 loss=2.996, ppl=7.98, wps=5801.7, ups=0.09, wpb=64793, bsz=128, num_updates=3975, lr=9.99762e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=45379
2021-06-19 07:15:15 | INFO | train_inner | epoch 002: 1001 / 3002 loss=2.713, ppl=6.56, wps=5768.5, ups=0.09, wpb=64879, bsz=128, num_updates=3976, lr=9.99762e-05, gnorm=2.579, loss_scale=8, train_wall=11, gb_free=2.8, wall=45390
2021-06-19 07:15:27 | INFO | train_inner | epoch 002: 1002 / 3002 loss=2.811, ppl=7.02, wps=5768.9, ups=0.09, wpb=64817, bsz=128, num_updates=3977, lr=9.99762e-05, gnorm=2.316, loss_scale=8, train_wall=11, gb_free=2.8, wall=45401
2021-06-19 07:15:38 | INFO | train_inner | epoch 002: 1003 / 3002 loss=2.832, ppl=7.12, wps=5913.2, ups=0.09, wpb=64884, bsz=128, num_updates=3978, lr=9.99762e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=45412
2021-06-19 07:15:49 | INFO | train_inner | epoch 002: 1004 / 3002 loss=2.836, ppl=7.14, wps=5875.6, ups=0.09, wpb=64894, bsz=128, num_updates=3979, lr=9.99762e-05, gnorm=2.301, loss_scale=8, train_wall=11, gb_free=2.8, wall=45423
2021-06-19 07:16:00 | INFO | train_inner | epoch 002: 1005 / 3002 loss=2.779, ppl=6.87, wps=5871.5, ups=0.09, wpb=64865, bsz=128, num_updates=3980, lr=9.99762e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=45434
2021-06-19 07:16:11 | INFO | train_inner | epoch 002: 1006 / 3002 loss=2.745, ppl=6.7, wps=5877.7, ups=0.09, wpb=64831, bsz=128, num_updates=3981, lr=9.99762e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=45445
2021-06-19 07:16:22 | INFO | train_inner | epoch 002: 1007 / 3002 loss=2.712, ppl=6.55, wps=5736, ups=0.09, wpb=64891, bsz=128, num_updates=3982, lr=9.99761e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=45456
2021-06-19 07:16:33 | INFO | train_inner | epoch 002: 1008 / 3002 loss=2.749, ppl=6.72, wps=5895.9, ups=0.09, wpb=64862, bsz=128, num_updates=3983, lr=9.99761e-05, gnorm=2.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=45467
2021-06-19 07:16:44 | INFO | train_inner | epoch 002: 1009 / 3002 loss=2.917, ppl=7.55, wps=5819.6, ups=0.09, wpb=64710, bsz=128, num_updates=3984, lr=9.99761e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=45479
2021-06-19 07:16:55 | INFO | train_inner | epoch 002: 1010 / 3002 loss=2.702, ppl=6.51, wps=5946.3, ups=0.09, wpb=64865, bsz=128, num_updates=3985, lr=9.99761e-05, gnorm=2.281, loss_scale=8, train_wall=10, gb_free=2.8, wall=45489
2021-06-19 07:17:06 | INFO | train_inner | epoch 002: 1011 / 3002 loss=2.669, ppl=6.36, wps=5891, ups=0.09, wpb=64798, bsz=128, num_updates=3986, lr=9.99761e-05, gnorm=2.267, loss_scale=8, train_wall=11, gb_free=2.8, wall=45500
2021-06-19 07:17:17 | INFO | train_inner | epoch 002: 1012 / 3002 loss=2.74, ppl=6.68, wps=5812.7, ups=0.09, wpb=64816, bsz=128, num_updates=3987, lr=9.99761e-05, gnorm=2.306, loss_scale=8, train_wall=11, gb_free=2.8, wall=45512
2021-06-19 07:17:28 | INFO | train_inner | epoch 002: 1013 / 3002 loss=2.718, ppl=6.58, wps=5873.5, ups=0.09, wpb=64742, bsz=128, num_updates=3988, lr=9.99761e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=45523
2021-06-19 07:17:39 | INFO | train_inner | epoch 002: 1014 / 3002 loss=2.904, ppl=7.48, wps=5880.5, ups=0.09, wpb=64792, bsz=128, num_updates=3989, lr=9.99761e-05, gnorm=2.235, loss_scale=8, train_wall=11, gb_free=2.8, wall=45534
2021-06-19 07:17:50 | INFO | train_inner | epoch 002: 1015 / 3002 loss=2.816, ppl=7.04, wps=5845.9, ups=0.09, wpb=64810, bsz=128, num_updates=3990, lr=9.99761e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=45545
2021-06-19 07:18:02 | INFO | train_inner | epoch 002: 1016 / 3002 loss=2.606, ppl=6.09, wps=5848, ups=0.09, wpb=64869, bsz=128, num_updates=3991, lr=9.99761e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=45556
2021-06-19 07:18:12 | INFO | train_inner | epoch 002: 1017 / 3002 loss=2.779, ppl=6.86, wps=5908.6, ups=0.09, wpb=64832, bsz=128, num_updates=3992, lr=9.99761e-05, gnorm=2.317, loss_scale=8, train_wall=11, gb_free=2.8, wall=45567
2021-06-19 07:18:24 | INFO | train_inner | epoch 002: 1018 / 3002 loss=2.907, ppl=7.5, wps=5832.6, ups=0.09, wpb=64876, bsz=128, num_updates=3993, lr=9.99761e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=45578
2021-06-19 07:18:35 | INFO | train_inner | epoch 002: 1019 / 3002 loss=2.761, ppl=6.78, wps=5867.1, ups=0.09, wpb=64854, bsz=128, num_updates=3994, lr=9.9976e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=45589
2021-06-19 07:18:46 | INFO | train_inner | epoch 002: 1020 / 3002 loss=2.768, ppl=6.81, wps=5882.4, ups=0.09, wpb=64906, bsz=128, num_updates=3995, lr=9.9976e-05, gnorm=2.298, loss_scale=8, train_wall=11, gb_free=2.8, wall=45600
2021-06-19 07:18:57 | INFO | train_inner | epoch 002: 1021 / 3002 loss=2.808, ppl=7, wps=5830.4, ups=0.09, wpb=64785, bsz=128, num_updates=3996, lr=9.9976e-05, gnorm=2.179, loss_scale=8, train_wall=11, gb_free=2.8, wall=45611
2021-06-19 07:19:08 | INFO | train_inner | epoch 002: 1022 / 3002 loss=2.879, ppl=7.36, wps=5881.4, ups=0.09, wpb=64848, bsz=128, num_updates=3997, lr=9.9976e-05, gnorm=2.38, loss_scale=8, train_wall=11, gb_free=2.8, wall=45622
2021-06-19 07:19:19 | INFO | train_inner | epoch 002: 1023 / 3002 loss=2.94, ppl=7.67, wps=5793.8, ups=0.09, wpb=64857, bsz=128, num_updates=3998, lr=9.9976e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=45633
2021-06-19 07:19:30 | INFO | train_inner | epoch 002: 1024 / 3002 loss=2.759, ppl=6.77, wps=5880.8, ups=0.09, wpb=64847, bsz=128, num_updates=3999, lr=9.9976e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=45644
2021-06-19 07:19:41 | INFO | train_inner | epoch 002: 1025 / 3002 loss=2.799, ppl=6.96, wps=5906.4, ups=0.09, wpb=64820, bsz=128, num_updates=4000, lr=9.9976e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=45655
2021-06-19 07:19:52 | INFO | train_inner | epoch 002: 1026 / 3002 loss=2.761, ppl=6.78, wps=5782.1, ups=0.09, wpb=64881, bsz=128, num_updates=4001, lr=9.9976e-05, gnorm=2.361, loss_scale=8, train_wall=11, gb_free=2.8, wall=45667
2021-06-19 07:20:04 | INFO | train_inner | epoch 002: 1027 / 3002 loss=2.789, ppl=6.91, wps=5696, ups=0.09, wpb=64774, bsz=128, num_updates=4002, lr=9.9976e-05, gnorm=2.499, loss_scale=8, train_wall=11, gb_free=2.8, wall=45678
2021-06-19 07:20:15 | INFO | train_inner | epoch 002: 1028 / 3002 loss=2.844, ppl=7.18, wps=5872.3, ups=0.09, wpb=64825, bsz=128, num_updates=4003, lr=9.9976e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=45689
2021-06-19 07:20:26 | INFO | train_inner | epoch 002: 1029 / 3002 loss=2.884, ppl=7.38, wps=5888, ups=0.09, wpb=64776, bsz=128, num_updates=4004, lr=9.9976e-05, gnorm=2.282, loss_scale=8, train_wall=11, gb_free=2.8, wall=45700
2021-06-19 07:20:37 | INFO | train_inner | epoch 002: 1030 / 3002 loss=2.823, ppl=7.08, wps=5827.7, ups=0.09, wpb=64875, bsz=128, num_updates=4005, lr=9.9976e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=45711
2021-06-19 07:20:48 | INFO | train_inner | epoch 002: 1031 / 3002 loss=2.841, ppl=7.17, wps=5790, ups=0.09, wpb=64901, bsz=128, num_updates=4006, lr=9.9976e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=45722
2021-06-19 07:20:59 | INFO | train_inner | epoch 002: 1032 / 3002 loss=2.922, ppl=7.58, wps=5790.4, ups=0.09, wpb=64755, bsz=128, num_updates=4007, lr=9.99759e-05, gnorm=2.328, loss_scale=8, train_wall=11, gb_free=2.8, wall=45734
2021-06-19 07:21:10 | INFO | train_inner | epoch 002: 1033 / 3002 loss=2.905, ppl=7.49, wps=5774.3, ups=0.09, wpb=64783, bsz=128, num_updates=4008, lr=9.99759e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=45745
2021-06-19 07:21:22 | INFO | train_inner | epoch 002: 1034 / 3002 loss=2.648, ppl=6.27, wps=5823.2, ups=0.09, wpb=64885, bsz=128, num_updates=4009, lr=9.99759e-05, gnorm=2.2, loss_scale=8, train_wall=11, gb_free=2.8, wall=45756
2021-06-19 07:21:33 | INFO | train_inner | epoch 002: 1035 / 3002 loss=2.682, ppl=6.42, wps=5797.4, ups=0.09, wpb=64769, bsz=128, num_updates=4010, lr=9.99759e-05, gnorm=2.245, loss_scale=8, train_wall=11, gb_free=2.8, wall=45767
2021-06-19 07:21:44 | INFO | train_inner | epoch 002: 1036 / 3002 loss=2.924, ppl=7.59, wps=5838, ups=0.09, wpb=64840, bsz=128, num_updates=4011, lr=9.99759e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=45778
2021-06-19 07:21:55 | INFO | train_inner | epoch 002: 1037 / 3002 loss=2.79, ppl=6.92, wps=5924.7, ups=0.09, wpb=64829, bsz=128, num_updates=4012, lr=9.99759e-05, gnorm=2.417, loss_scale=8, train_wall=10, gb_free=2.8, wall=45789
2021-06-19 07:22:06 | INFO | train_inner | epoch 002: 1038 / 3002 loss=2.762, ppl=6.78, wps=5779.3, ups=0.09, wpb=64864, bsz=128, num_updates=4013, lr=9.99759e-05, gnorm=2.265, loss_scale=8, train_wall=11, gb_free=2.8, wall=45800
2021-06-19 07:22:17 | INFO | train_inner | epoch 002: 1039 / 3002 loss=2.965, ppl=7.81, wps=5852.4, ups=0.09, wpb=64763, bsz=128, num_updates=4014, lr=9.99759e-05, gnorm=3.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=45811
2021-06-19 07:22:28 | INFO | train_inner | epoch 002: 1040 / 3002 loss=2.727, ppl=6.62, wps=5862.6, ups=0.09, wpb=64860, bsz=128, num_updates=4015, lr=9.99759e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=45822
2021-06-19 07:22:39 | INFO | train_inner | epoch 002: 1041 / 3002 loss=2.858, ppl=7.25, wps=5787.6, ups=0.09, wpb=64771, bsz=128, num_updates=4016, lr=9.99759e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=45834
2021-06-19 07:22:50 | INFO | train_inner | epoch 002: 1042 / 3002 loss=2.602, ppl=6.07, wps=5946.5, ups=0.09, wpb=64791, bsz=128, num_updates=4017, lr=9.99759e-05, gnorm=2.75, loss_scale=8, train_wall=10, gb_free=2.8, wall=45845
2021-06-19 07:23:01 | INFO | train_inner | epoch 002: 1043 / 3002 loss=2.761, ppl=6.78, wps=5836.2, ups=0.09, wpb=64800, bsz=128, num_updates=4018, lr=9.99759e-05, gnorm=2.205, loss_scale=8, train_wall=11, gb_free=2.8, wall=45856
2021-06-19 07:23:12 | INFO | train_inner | epoch 002: 1044 / 3002 loss=2.738, ppl=6.67, wps=5851.3, ups=0.09, wpb=64840, bsz=128, num_updates=4019, lr=9.99758e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=45867
2021-06-19 07:23:24 | INFO | train_inner | epoch 002: 1045 / 3002 loss=2.709, ppl=6.54, wps=5785.7, ups=0.09, wpb=64895, bsz=128, num_updates=4020, lr=9.99758e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=45878
2021-06-19 07:23:35 | INFO | train_inner | epoch 002: 1046 / 3002 loss=2.825, ppl=7.08, wps=5848.8, ups=0.09, wpb=64815, bsz=128, num_updates=4021, lr=9.99758e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=45889
2021-06-19 07:23:46 | INFO | train_inner | epoch 002: 1047 / 3002 loss=2.944, ppl=7.7, wps=5873.4, ups=0.09, wpb=64795, bsz=128, num_updates=4022, lr=9.99758e-05, gnorm=7.503, loss_scale=8, train_wall=11, gb_free=2.8, wall=45900
2021-06-19 07:23:57 | INFO | train_inner | epoch 002: 1048 / 3002 loss=2.799, ppl=6.96, wps=5853.1, ups=0.09, wpb=64861, bsz=128, num_updates=4023, lr=9.99758e-05, gnorm=2.446, loss_scale=8, train_wall=11, gb_free=2.8, wall=45911
2021-06-19 07:24:08 | INFO | train_inner | epoch 002: 1049 / 3002 loss=2.822, ppl=7.07, wps=5822, ups=0.09, wpb=64813, bsz=128, num_updates=4024, lr=9.99758e-05, gnorm=2.461, loss_scale=8, train_wall=11, gb_free=2.8, wall=45922
2021-06-19 07:24:19 | INFO | train_inner | epoch 002: 1050 / 3002 loss=2.616, ppl=6.13, wps=5706.8, ups=0.09, wpb=64743, bsz=128, num_updates=4025, lr=9.99758e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=45934
2021-06-19 07:24:31 | INFO | train_inner | epoch 002: 1051 / 3002 loss=2.884, ppl=7.38, wps=5778.4, ups=0.09, wpb=64828, bsz=128, num_updates=4026, lr=9.99758e-05, gnorm=3.113, loss_scale=8, train_wall=11, gb_free=2.8, wall=45945
2021-06-19 07:24:41 | INFO | train_inner | epoch 002: 1052 / 3002 loss=2.854, ppl=7.23, wps=6004.6, ups=0.09, wpb=64841, bsz=128, num_updates=4027, lr=9.99758e-05, gnorm=2.764, loss_scale=8, train_wall=10, gb_free=2.8, wall=45956
2021-06-19 07:24:52 | INFO | train_inner | epoch 002: 1053 / 3002 loss=2.72, ppl=6.59, wps=5821.5, ups=0.09, wpb=64823, bsz=128, num_updates=4028, lr=9.99758e-05, gnorm=2.509, loss_scale=8, train_wall=11, gb_free=2.8, wall=45967
2021-06-19 07:25:03 | INFO | train_inner | epoch 002: 1054 / 3002 loss=2.686, ppl=6.43, wps=5942, ups=0.09, wpb=64871, bsz=128, num_updates=4029, lr=9.99758e-05, gnorm=2.505, loss_scale=8, train_wall=10, gb_free=2.8, wall=45978
2021-06-19 07:25:14 | INFO | train_inner | epoch 002: 1055 / 3002 loss=2.75, ppl=6.73, wps=5849.8, ups=0.09, wpb=64864, bsz=128, num_updates=4030, lr=9.99758e-05, gnorm=2.694, loss_scale=8, train_wall=11, gb_free=2.8, wall=45989
2021-06-19 07:25:26 | INFO | train_inner | epoch 002: 1056 / 3002 loss=2.913, ppl=7.53, wps=5806.8, ups=0.09, wpb=64801, bsz=128, num_updates=4031, lr=9.99758e-05, gnorm=2.438, loss_scale=8, train_wall=11, gb_free=2.8, wall=46000
2021-06-19 07:25:37 | INFO | train_inner | epoch 002: 1057 / 3002 loss=2.787, ppl=6.9, wps=5829.3, ups=0.09, wpb=64938, bsz=128, num_updates=4032, lr=9.99757e-05, gnorm=2.319, loss_scale=8, train_wall=11, gb_free=2.8, wall=46011
2021-06-19 07:25:48 | INFO | train_inner | epoch 002: 1058 / 3002 loss=2.639, ppl=6.23, wps=5957.2, ups=0.09, wpb=64837, bsz=128, num_updates=4033, lr=9.99757e-05, gnorm=2.295, loss_scale=8, train_wall=10, gb_free=2.8, wall=46022
2021-06-19 07:25:59 | INFO | train_inner | epoch 002: 1059 / 3002 loss=2.7, ppl=6.5, wps=5875.6, ups=0.09, wpb=64816, bsz=128, num_updates=4034, lr=9.99757e-05, gnorm=2.427, loss_scale=8, train_wall=11, gb_free=2.8, wall=46033
2021-06-19 07:26:10 | INFO | train_inner | epoch 002: 1060 / 3002 loss=2.988, ppl=7.93, wps=5755.5, ups=0.09, wpb=64728, bsz=128, num_updates=4035, lr=9.99757e-05, gnorm=2.57, loss_scale=8, train_wall=11, gb_free=2.8, wall=46044
2021-06-19 07:26:21 | INFO | train_inner | epoch 002: 1061 / 3002 loss=2.681, ppl=6.41, wps=5759.4, ups=0.09, wpb=64695, bsz=128, num_updates=4036, lr=9.99757e-05, gnorm=2.68, loss_scale=8, train_wall=11, gb_free=2.8, wall=46055
2021-06-19 07:26:33 | INFO | train_inner | epoch 002: 1062 / 3002 loss=2.728, ppl=6.63, wps=5677, ups=0.09, wpb=64684, bsz=128, num_updates=4037, lr=9.99757e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=46067
2021-06-19 07:26:44 | INFO | train_inner | epoch 002: 1063 / 3002 loss=2.751, ppl=6.73, wps=5849.3, ups=0.09, wpb=64940, bsz=128, num_updates=4038, lr=9.99757e-05, gnorm=3.687, loss_scale=8, train_wall=11, gb_free=2.8, wall=46078
2021-06-19 07:26:55 | INFO | train_inner | epoch 002: 1064 / 3002 loss=2.781, ppl=6.87, wps=5818, ups=0.09, wpb=64794, bsz=128, num_updates=4039, lr=9.99757e-05, gnorm=2.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=46089
2021-06-19 07:27:06 | INFO | train_inner | epoch 002: 1065 / 3002 loss=2.728, ppl=6.63, wps=5754, ups=0.09, wpb=64774, bsz=128, num_updates=4040, lr=9.99757e-05, gnorm=2.678, loss_scale=8, train_wall=11, gb_free=2.8, wall=46100
2021-06-19 07:27:17 | INFO | train_inner | epoch 002: 1066 / 3002 loss=2.697, ppl=6.48, wps=5847.8, ups=0.09, wpb=64783, bsz=128, num_updates=4041, lr=9.99757e-05, gnorm=2.406, loss_scale=8, train_wall=11, gb_free=2.8, wall=46111
2021-06-19 07:27:28 | INFO | train_inner | epoch 002: 1067 / 3002 loss=2.877, ppl=7.35, wps=5778.3, ups=0.09, wpb=64842, bsz=128, num_updates=4042, lr=9.99757e-05, gnorm=2.373, loss_scale=8, train_wall=11, gb_free=2.8, wall=46123
2021-06-19 07:27:39 | INFO | train_inner | epoch 002: 1068 / 3002 loss=2.786, ppl=6.89, wps=5834.2, ups=0.09, wpb=64813, bsz=128, num_updates=4043, lr=9.99757e-05, gnorm=4.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=46134
2021-06-19 07:27:50 | INFO | train_inner | epoch 002: 1069 / 3002 loss=2.764, ppl=6.79, wps=5890.7, ups=0.09, wpb=64813, bsz=128, num_updates=4044, lr=9.99756e-05, gnorm=2.516, loss_scale=16, train_wall=11, gb_free=2.8, wall=46145
2021-06-19 07:28:02 | INFO | train_inner | epoch 002: 1070 / 3002 loss=2.742, ppl=6.69, wps=5841.3, ups=0.09, wpb=64797, bsz=128, num_updates=4045, lr=9.99756e-05, gnorm=2.499, loss_scale=16, train_wall=11, gb_free=2.8, wall=46156
2021-06-19 07:28:12 | INFO | train_inner | epoch 002: 1071 / 3002 loss=2.715, ppl=6.57, wps=5935.9, ups=0.09, wpb=64863, bsz=128, num_updates=4046, lr=9.99756e-05, gnorm=2.411, loss_scale=16, train_wall=10, gb_free=2.8, wall=46167
2021-06-19 07:28:24 | INFO | train_inner | epoch 002: 1072 / 3002 loss=2.771, ppl=6.83, wps=5812.2, ups=0.09, wpb=64808, bsz=128, num_updates=4047, lr=9.99756e-05, gnorm=2.489, loss_scale=16, train_wall=11, gb_free=2.8, wall=46178
2021-06-19 07:28:35 | INFO | train_inner | epoch 002: 1073 / 3002 loss=2.911, ppl=7.52, wps=5922, ups=0.09, wpb=64800, bsz=128, num_updates=4048, lr=9.99756e-05, gnorm=2.47, loss_scale=16, train_wall=10, gb_free=2.8, wall=46189
2021-06-19 07:28:45 | INFO | train_inner | epoch 002: 1074 / 3002 loss=2.889, ppl=7.41, wps=6028.8, ups=0.09, wpb=64873, bsz=128, num_updates=4049, lr=9.99756e-05, gnorm=2.297, loss_scale=16, train_wall=10, gb_free=2.8, wall=46200
2021-06-19 07:28:56 | INFO | train_inner | epoch 002: 1075 / 3002 loss=2.819, ppl=7.06, wps=5813.6, ups=0.09, wpb=64813, bsz=128, num_updates=4050, lr=9.99756e-05, gnorm=2.253, loss_scale=16, train_wall=11, gb_free=2.8, wall=46211
2021-06-19 07:29:08 | INFO | train_inner | epoch 002: 1076 / 3002 loss=2.647, ppl=6.26, wps=5853.1, ups=0.09, wpb=64741, bsz=128, num_updates=4051, lr=9.99756e-05, gnorm=2.313, loss_scale=16, train_wall=11, gb_free=2.8, wall=46222
2021-06-19 07:29:19 | INFO | train_inner | epoch 002: 1077 / 3002 loss=2.789, ppl=6.91, wps=5862, ups=0.09, wpb=64813, bsz=128, num_updates=4052, lr=9.99756e-05, gnorm=2.259, loss_scale=16, train_wall=11, gb_free=2.8, wall=46233
2021-06-19 07:29:30 | INFO | train_inner | epoch 002: 1078 / 3002 loss=2.709, ppl=6.54, wps=5802.1, ups=0.09, wpb=64779, bsz=128, num_updates=4053, lr=9.99756e-05, gnorm=2.205, loss_scale=16, train_wall=11, gb_free=2.8, wall=46244
2021-06-19 07:29:41 | INFO | train_inner | epoch 002: 1079 / 3002 loss=2.835, ppl=7.14, wps=5975.6, ups=0.09, wpb=64866, bsz=128, num_updates=4054, lr=9.99756e-05, gnorm=2.225, loss_scale=16, train_wall=10, gb_free=2.8, wall=46255
2021-06-19 07:29:52 | INFO | train_inner | epoch 002: 1080 / 3002 loss=2.784, ppl=6.89, wps=5870.8, ups=0.09, wpb=64853, bsz=128, num_updates=4055, lr=9.99756e-05, gnorm=2.342, loss_scale=16, train_wall=11, gb_free=2.8, wall=46266
2021-06-19 07:30:03 | INFO | train_inner | epoch 002: 1081 / 3002 loss=2.754, ppl=6.74, wps=5805.3, ups=0.09, wpb=64782, bsz=128, num_updates=4056, lr=9.99756e-05, gnorm=2.482, loss_scale=16, train_wall=11, gb_free=2.8, wall=46277
2021-06-19 07:30:14 | INFO | train_inner | epoch 002: 1082 / 3002 loss=2.719, ppl=6.59, wps=5847.7, ups=0.09, wpb=64733, bsz=128, num_updates=4057, lr=9.99755e-05, gnorm=2.338, loss_scale=16, train_wall=11, gb_free=2.8, wall=46288
2021-06-19 07:30:25 | INFO | train_inner | epoch 002: 1083 / 3002 loss=2.707, ppl=6.53, wps=5821.3, ups=0.09, wpb=64845, bsz=128, num_updates=4058, lr=9.99755e-05, gnorm=3.172, loss_scale=16, train_wall=11, gb_free=2.8, wall=46299
2021-06-19 07:30:36 | INFO | train_inner | epoch 002: 1084 / 3002 loss=2.766, ppl=6.8, wps=5896.9, ups=0.09, wpb=64782, bsz=128, num_updates=4059, lr=9.99755e-05, gnorm=2.466, loss_scale=16, train_wall=11, gb_free=2.8, wall=46310
2021-06-19 07:30:47 | INFO | train_inner | epoch 002: 1085 / 3002 loss=2.817, ppl=7.05, wps=5698.8, ups=0.09, wpb=64836, bsz=128, num_updates=4060, lr=9.99755e-05, gnorm=2.576, loss_scale=16, train_wall=11, gb_free=2.8, wall=46322
2021-06-19 07:30:59 | INFO | train_inner | epoch 002: 1086 / 3002 loss=2.804, ppl=6.98, wps=5791.2, ups=0.09, wpb=64843, bsz=128, num_updates=4061, lr=9.99755e-05, gnorm=2.467, loss_scale=16, train_wall=11, gb_free=2.8, wall=46333
2021-06-19 07:31:10 | INFO | train_inner | epoch 002: 1087 / 3002 loss=2.883, ppl=7.38, wps=5862, ups=0.09, wpb=64884, bsz=128, num_updates=4062, lr=9.99755e-05, gnorm=2.594, loss_scale=16, train_wall=11, gb_free=2.8, wall=46344
2021-06-19 07:31:21 | INFO | train_inner | epoch 002: 1088 / 3002 loss=2.843, ppl=7.18, wps=5697.4, ups=0.09, wpb=64789, bsz=128, num_updates=4063, lr=9.99755e-05, gnorm=2.358, loss_scale=16, train_wall=11, gb_free=2.8, wall=46355
2021-06-19 07:31:32 | INFO | train_inner | epoch 002: 1089 / 3002 loss=2.847, ppl=7.19, wps=5862.2, ups=0.09, wpb=64828, bsz=128, num_updates=4064, lr=9.99755e-05, gnorm=2.217, loss_scale=16, train_wall=11, gb_free=2.8, wall=46366
2021-06-19 07:31:43 | INFO | train_inner | epoch 002: 1090 / 3002 loss=2.822, ppl=7.07, wps=5765.1, ups=0.09, wpb=64890, bsz=128, num_updates=4065, lr=9.99755e-05, gnorm=2.364, loss_scale=16, train_wall=11, gb_free=2.8, wall=46378
2021-06-19 07:31:54 | INFO | train_inner | epoch 002: 1091 / 3002 loss=2.812, ppl=7.02, wps=5871.1, ups=0.09, wpb=64894, bsz=128, num_updates=4066, lr=9.99755e-05, gnorm=2.358, loss_scale=16, train_wall=11, gb_free=2.8, wall=46389
2021-06-19 07:32:05 | INFO | train_inner | epoch 002: 1092 / 3002 loss=2.825, ppl=7.09, wps=5846.6, ups=0.09, wpb=64799, bsz=128, num_updates=4067, lr=9.99755e-05, gnorm=3.747, loss_scale=16, train_wall=11, gb_free=2.8, wall=46400
2021-06-19 07:32:17 | INFO | train_inner | epoch 002: 1093 / 3002 loss=2.601, ppl=6.07, wps=5849.7, ups=0.09, wpb=64768, bsz=128, num_updates=4068, lr=9.99755e-05, gnorm=2.689, loss_scale=16, train_wall=11, gb_free=2.8, wall=46411
2021-06-19 07:32:28 | INFO | train_inner | epoch 002: 1094 / 3002 loss=2.736, ppl=6.66, wps=5823.2, ups=0.09, wpb=64820, bsz=128, num_updates=4069, lr=9.99754e-05, gnorm=2.21, loss_scale=16, train_wall=11, gb_free=2.8, wall=46422
2021-06-19 07:32:39 | INFO | train_inner | epoch 002: 1095 / 3002 loss=2.876, ppl=7.34, wps=5887.4, ups=0.09, wpb=64913, bsz=128, num_updates=4070, lr=9.99754e-05, gnorm=2.723, loss_scale=16, train_wall=11, gb_free=2.8, wall=46433
2021-06-19 07:32:50 | INFO | train_inner | epoch 002: 1096 / 3002 loss=2.846, ppl=7.19, wps=5724.7, ups=0.09, wpb=64821, bsz=128, num_updates=4071, lr=9.99754e-05, gnorm=2.669, loss_scale=16, train_wall=11, gb_free=2.8, wall=46444
2021-06-19 07:33:01 | INFO | train_inner | epoch 002: 1097 / 3002 loss=2.756, ppl=6.76, wps=5732.6, ups=0.09, wpb=64860, bsz=128, num_updates=4072, lr=9.99754e-05, gnorm=2.477, loss_scale=16, train_wall=11, gb_free=2.8, wall=46456
2021-06-19 07:33:12 | INFO | train_inner | epoch 002: 1098 / 3002 loss=2.751, ppl=6.73, wps=5827.9, ups=0.09, wpb=64802, bsz=128, num_updates=4073, lr=9.99754e-05, gnorm=2.217, loss_scale=16, train_wall=11, gb_free=2.8, wall=46467
2021-06-19 07:33:23 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-19 07:33:35 | INFO | train_inner | epoch 002: 1100 / 3002 loss=2.719, ppl=6.58, wps=2932.4, ups=0.05, wpb=64770, bsz=128, num_updates=4074, lr=9.99754e-05, gnorm=2.312, loss_scale=8, train_wall=21, gb_free=2.8, wall=46489
2021-06-19 07:33:46 | INFO | train_inner | epoch 002: 1101 / 3002 loss=2.618, ppl=6.14, wps=5902.8, ups=0.09, wpb=64837, bsz=128, num_updates=4075, lr=9.99754e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=46500
2021-06-19 07:33:57 | INFO | train_inner | epoch 002: 1102 / 3002 loss=2.605, ppl=6.08, wps=5852.1, ups=0.09, wpb=64819, bsz=128, num_updates=4076, lr=9.99754e-05, gnorm=2.51, loss_scale=8, train_wall=11, gb_free=2.8, wall=46511
2021-06-19 07:34:08 | INFO | train_inner | epoch 002: 1103 / 3002 loss=2.743, ppl=6.69, wps=5833.1, ups=0.09, wpb=64832, bsz=128, num_updates=4077, lr=9.99754e-05, gnorm=2.28, loss_scale=8, train_wall=11, gb_free=2.8, wall=46522
2021-06-19 07:34:19 | INFO | train_inner | epoch 002: 1104 / 3002 loss=2.816, ppl=7.04, wps=5824.9, ups=0.09, wpb=64799, bsz=128, num_updates=4078, lr=9.99754e-05, gnorm=2.278, loss_scale=8, train_wall=11, gb_free=2.8, wall=46533
2021-06-19 07:34:30 | INFO | train_inner | epoch 002: 1105 / 3002 loss=2.792, ppl=6.93, wps=5777.8, ups=0.09, wpb=64732, bsz=128, num_updates=4079, lr=9.99754e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=46544
2021-06-19 07:34:41 | INFO | train_inner | epoch 002: 1106 / 3002 loss=2.857, ppl=7.25, wps=5836.9, ups=0.09, wpb=64780, bsz=128, num_updates=4080, lr=9.99754e-05, gnorm=2.148, loss_scale=8, train_wall=11, gb_free=2.8, wall=46556
2021-06-19 07:34:52 | INFO | train_inner | epoch 002: 1107 / 3002 loss=2.857, ppl=7.25, wps=5793.8, ups=0.09, wpb=64821, bsz=128, num_updates=4081, lr=9.99754e-05, gnorm=2.191, loss_scale=8, train_wall=11, gb_free=2.8, wall=46567
2021-06-19 07:35:03 | INFO | train_inner | epoch 002: 1108 / 3002 loss=2.848, ppl=7.2, wps=5832.6, ups=0.09, wpb=64913, bsz=128, num_updates=4082, lr=9.99753e-05, gnorm=2.81, loss_scale=8, train_wall=11, gb_free=2.8, wall=46578
2021-06-19 07:35:15 | INFO | train_inner | epoch 002: 1109 / 3002 loss=2.708, ppl=6.53, wps=5872.9, ups=0.09, wpb=64849, bsz=128, num_updates=4083, lr=9.99753e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=46589
2021-06-19 07:35:25 | INFO | train_inner | epoch 002: 1110 / 3002 loss=2.85, ppl=7.21, wps=5933.3, ups=0.09, wpb=64865, bsz=128, num_updates=4084, lr=9.99753e-05, gnorm=2.312, loss_scale=8, train_wall=10, gb_free=2.8, wall=46600
2021-06-19 07:35:36 | INFO | train_inner | epoch 002: 1111 / 3002 loss=2.694, ppl=6.47, wps=6014.9, ups=0.09, wpb=64792, bsz=128, num_updates=4085, lr=9.99753e-05, gnorm=2.244, loss_scale=8, train_wall=10, gb_free=2.8, wall=46611
2021-06-19 07:35:47 | INFO | train_inner | epoch 002: 1112 / 3002 loss=2.705, ppl=6.52, wps=5805.1, ups=0.09, wpb=64806, bsz=128, num_updates=4086, lr=9.99753e-05, gnorm=2.223, loss_scale=8, train_wall=11, gb_free=2.8, wall=46622
2021-06-19 07:35:59 | INFO | train_inner | epoch 002: 1113 / 3002 loss=2.715, ppl=6.57, wps=5819.1, ups=0.09, wpb=64778, bsz=128, num_updates=4087, lr=9.99753e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=46633
2021-06-19 07:36:10 | INFO | train_inner | epoch 002: 1114 / 3002 loss=2.963, ppl=7.8, wps=5860.5, ups=0.09, wpb=64813, bsz=128, num_updates=4088, lr=9.99753e-05, gnorm=2.309, loss_scale=8, train_wall=11, gb_free=2.8, wall=46644
2021-06-19 07:36:21 | INFO | train_inner | epoch 002: 1115 / 3002 loss=2.858, ppl=7.25, wps=5754.7, ups=0.09, wpb=64846, bsz=128, num_updates=4089, lr=9.99753e-05, gnorm=2.317, loss_scale=8, train_wall=11, gb_free=2.8, wall=46655
2021-06-19 07:36:32 | INFO | train_inner | epoch 002: 1116 / 3002 loss=2.768, ppl=6.81, wps=5831.4, ups=0.09, wpb=64876, bsz=128, num_updates=4090, lr=9.99753e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=46666
2021-06-19 07:36:43 | INFO | train_inner | epoch 002: 1117 / 3002 loss=2.747, ppl=6.71, wps=5830.8, ups=0.09, wpb=64770, bsz=128, num_updates=4091, lr=9.99753e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=46677
2021-06-19 07:36:54 | INFO | train_inner | epoch 002: 1118 / 3002 loss=2.855, ppl=7.24, wps=5772, ups=0.09, wpb=64822, bsz=128, num_updates=4092, lr=9.99753e-05, gnorm=2.483, loss_scale=8, train_wall=11, gb_free=2.8, wall=46689
2021-06-19 07:37:05 | INFO | train_inner | epoch 002: 1119 / 3002 loss=2.875, ppl=7.34, wps=5887.8, ups=0.09, wpb=64808, bsz=128, num_updates=4093, lr=9.99753e-05, gnorm=2.34, loss_scale=8, train_wall=11, gb_free=2.8, wall=46700
2021-06-19 07:37:16 | INFO | train_inner | epoch 002: 1120 / 3002 loss=2.654, ppl=6.29, wps=5907.6, ups=0.09, wpb=64755, bsz=128, num_updates=4094, lr=9.99752e-05, gnorm=2.379, loss_scale=8, train_wall=10, gb_free=2.8, wall=46711
2021-06-19 07:37:27 | INFO | train_inner | epoch 002: 1121 / 3002 loss=2.829, ppl=7.11, wps=5899.9, ups=0.09, wpb=64818, bsz=128, num_updates=4095, lr=9.99752e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=46722
2021-06-19 07:37:38 | INFO | train_inner | epoch 002: 1122 / 3002 loss=2.803, ppl=6.98, wps=5919.2, ups=0.09, wpb=64840, bsz=128, num_updates=4096, lr=9.99752e-05, gnorm=2.327, loss_scale=8, train_wall=11, gb_free=2.8, wall=46733
2021-06-19 07:37:49 | INFO | train_inner | epoch 002: 1123 / 3002 loss=2.881, ppl=7.37, wps=5854.6, ups=0.09, wpb=64838, bsz=128, num_updates=4097, lr=9.99752e-05, gnorm=2.364, loss_scale=8, train_wall=11, gb_free=2.8, wall=46744
2021-06-19 07:38:00 | INFO | train_inner | epoch 002: 1124 / 3002 loss=2.706, ppl=6.52, wps=5832.6, ups=0.09, wpb=64915, bsz=128, num_updates=4098, lr=9.99752e-05, gnorm=2.31, loss_scale=8, train_wall=11, gb_free=2.8, wall=46755
2021-06-19 07:38:12 | INFO | train_inner | epoch 002: 1125 / 3002 loss=2.718, ppl=6.58, wps=5843.3, ups=0.09, wpb=64870, bsz=128, num_updates=4099, lr=9.99752e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=46766
2021-06-19 07:38:22 | INFO | train_inner | epoch 002: 1126 / 3002 loss=2.708, ppl=6.53, wps=5927.2, ups=0.09, wpb=64826, bsz=128, num_updates=4100, lr=9.99752e-05, gnorm=2.314, loss_scale=8, train_wall=10, gb_free=2.8, wall=46777
2021-06-19 07:38:34 | INFO | train_inner | epoch 002: 1127 / 3002 loss=2.817, ppl=7.05, wps=5864.6, ups=0.09, wpb=64857, bsz=128, num_updates=4101, lr=9.99752e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=46788
2021-06-19 07:38:45 | INFO | train_inner | epoch 002: 1128 / 3002 loss=2.724, ppl=6.61, wps=5830.9, ups=0.09, wpb=64830, bsz=128, num_updates=4102, lr=9.99752e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=46799
2021-06-19 07:38:56 | INFO | train_inner | epoch 002: 1129 / 3002 loss=2.894, ppl=7.43, wps=5805.2, ups=0.09, wpb=64780, bsz=128, num_updates=4103, lr=9.99752e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=46810
2021-06-19 07:39:07 | INFO | train_inner | epoch 002: 1130 / 3002 loss=2.77, ppl=6.82, wps=5776.5, ups=0.09, wpb=64855, bsz=128, num_updates=4104, lr=9.99752e-05, gnorm=2.298, loss_scale=8, train_wall=11, gb_free=2.8, wall=46821
2021-06-19 07:39:18 | INFO | train_inner | epoch 002: 1131 / 3002 loss=2.636, ppl=6.22, wps=5812.3, ups=0.09, wpb=64898, bsz=128, num_updates=4105, lr=9.99752e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=46833
2021-06-19 07:39:29 | INFO | train_inner | epoch 002: 1132 / 3002 loss=2.82, ppl=7.06, wps=5873.8, ups=0.09, wpb=64788, bsz=128, num_updates=4106, lr=9.99752e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=46844
2021-06-19 07:39:40 | INFO | train_inner | epoch 002: 1133 / 3002 loss=2.879, ppl=7.36, wps=5772.1, ups=0.09, wpb=64787, bsz=128, num_updates=4107, lr=9.99751e-05, gnorm=2.382, loss_scale=8, train_wall=11, gb_free=2.8, wall=46855
2021-06-19 07:39:52 | INFO | train_inner | epoch 002: 1134 / 3002 loss=3.042, ppl=8.23, wps=5806.2, ups=0.09, wpb=64756, bsz=128, num_updates=4108, lr=9.99751e-05, gnorm=2.35, loss_scale=8, train_wall=11, gb_free=2.8, wall=46866
2021-06-19 07:40:03 | INFO | train_inner | epoch 002: 1135 / 3002 loss=2.854, ppl=7.23, wps=5797.3, ups=0.09, wpb=64818, bsz=128, num_updates=4109, lr=9.99751e-05, gnorm=2.362, loss_scale=8, train_wall=11, gb_free=2.8, wall=46877
2021-06-19 07:40:14 | INFO | train_inner | epoch 002: 1136 / 3002 loss=2.799, ppl=6.96, wps=5867.8, ups=0.09, wpb=64886, bsz=128, num_updates=4110, lr=9.99751e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=46888
2021-06-19 07:40:25 | INFO | train_inner | epoch 002: 1137 / 3002 loss=2.753, ppl=6.74, wps=5918.7, ups=0.09, wpb=64796, bsz=128, num_updates=4111, lr=9.99751e-05, gnorm=2.54, loss_scale=8, train_wall=10, gb_free=2.8, wall=46899
2021-06-19 07:40:36 | INFO | train_inner | epoch 002: 1138 / 3002 loss=2.843, ppl=7.17, wps=5888.5, ups=0.09, wpb=64816, bsz=128, num_updates=4112, lr=9.99751e-05, gnorm=2.465, loss_scale=8, train_wall=11, gb_free=2.8, wall=46910
2021-06-19 07:40:47 | INFO | train_inner | epoch 002: 1139 / 3002 loss=2.693, ppl=6.47, wps=5859.7, ups=0.09, wpb=64810, bsz=128, num_updates=4113, lr=9.99751e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=46921
2021-06-19 07:40:58 | INFO | train_inner | epoch 002: 1140 / 3002 loss=2.678, ppl=6.4, wps=5886.3, ups=0.09, wpb=64810, bsz=128, num_updates=4114, lr=9.99751e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=46932
2021-06-19 07:41:09 | INFO | train_inner | epoch 002: 1141 / 3002 loss=2.634, ppl=6.21, wps=5825.7, ups=0.09, wpb=64899, bsz=128, num_updates=4115, lr=9.99751e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=46943
2021-06-19 07:41:20 | INFO | train_inner | epoch 002: 1142 / 3002 loss=2.77, ppl=6.82, wps=5889.6, ups=0.09, wpb=64876, bsz=128, num_updates=4116, lr=9.99751e-05, gnorm=2.17, loss_scale=8, train_wall=11, gb_free=2.8, wall=46954
2021-06-19 07:41:31 | INFO | train_inner | epoch 002: 1143 / 3002 loss=2.852, ppl=7.22, wps=5817.4, ups=0.09, wpb=64849, bsz=128, num_updates=4117, lr=9.99751e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=46966
2021-06-19 07:41:42 | INFO | train_inner | epoch 002: 1144 / 3002 loss=2.778, ppl=6.86, wps=5778.3, ups=0.09, wpb=64903, bsz=128, num_updates=4118, lr=9.99751e-05, gnorm=2.487, loss_scale=8, train_wall=11, gb_free=2.8, wall=46977
2021-06-19 07:41:54 | INFO | train_inner | epoch 002: 1145 / 3002 loss=2.709, ppl=6.54, wps=5805.8, ups=0.09, wpb=64849, bsz=128, num_updates=4119, lr=9.9975e-05, gnorm=2.275, loss_scale=8, train_wall=11, gb_free=2.8, wall=46988
2021-06-19 07:42:05 | INFO | train_inner | epoch 002: 1146 / 3002 loss=2.729, ppl=6.63, wps=5910.2, ups=0.09, wpb=64860, bsz=128, num_updates=4120, lr=9.9975e-05, gnorm=2.122, loss_scale=8, train_wall=11, gb_free=2.8, wall=46999
2021-06-19 07:42:16 | INFO | train_inner | epoch 002: 1147 / 3002 loss=2.817, ppl=7.05, wps=5847.6, ups=0.09, wpb=64833, bsz=128, num_updates=4121, lr=9.9975e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=47010
2021-06-19 07:42:27 | INFO | train_inner | epoch 002: 1148 / 3002 loss=2.751, ppl=6.73, wps=5781.1, ups=0.09, wpb=64846, bsz=128, num_updates=4122, lr=9.9975e-05, gnorm=2.254, loss_scale=8, train_wall=11, gb_free=2.8, wall=47021
2021-06-19 07:42:38 | INFO | train_inner | epoch 002: 1149 / 3002 loss=2.604, ppl=6.08, wps=5826.7, ups=0.09, wpb=64812, bsz=128, num_updates=4123, lr=9.9975e-05, gnorm=2.152, loss_scale=8, train_wall=11, gb_free=2.8, wall=47032
2021-06-19 07:42:49 | INFO | train_inner | epoch 002: 1150 / 3002 loss=2.774, ppl=6.84, wps=5772.8, ups=0.09, wpb=64875, bsz=128, num_updates=4124, lr=9.9975e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=47044
2021-06-19 07:43:00 | INFO | train_inner | epoch 002: 1151 / 3002 loss=2.708, ppl=6.54, wps=5910.5, ups=0.09, wpb=64824, bsz=128, num_updates=4125, lr=9.9975e-05, gnorm=2.236, loss_scale=8, train_wall=11, gb_free=2.8, wall=47055
2021-06-19 07:43:11 | INFO | train_inner | epoch 002: 1152 / 3002 loss=2.685, ppl=6.43, wps=5896.4, ups=0.09, wpb=64807, bsz=128, num_updates=4126, lr=9.9975e-05, gnorm=2.191, loss_scale=8, train_wall=11, gb_free=2.8, wall=47066
2021-06-19 07:43:22 | INFO | train_inner | epoch 002: 1153 / 3002 loss=2.807, ppl=7, wps=5908.5, ups=0.09, wpb=64805, bsz=128, num_updates=4127, lr=9.9975e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=47076
2021-06-19 07:43:33 | INFO | train_inner | epoch 002: 1154 / 3002 loss=2.791, ppl=6.92, wps=5945.5, ups=0.09, wpb=64781, bsz=128, num_updates=4128, lr=9.9975e-05, gnorm=2.356, loss_scale=8, train_wall=10, gb_free=2.8, wall=47087
2021-06-19 07:43:44 | INFO | train_inner | epoch 002: 1155 / 3002 loss=2.815, ppl=7.03, wps=5972.5, ups=0.09, wpb=64852, bsz=128, num_updates=4129, lr=9.9975e-05, gnorm=2.312, loss_scale=8, train_wall=10, gb_free=2.8, wall=47098
2021-06-19 07:43:55 | INFO | train_inner | epoch 002: 1156 / 3002 loss=2.675, ppl=6.39, wps=5947, ups=0.09, wpb=64838, bsz=128, num_updates=4130, lr=9.9975e-05, gnorm=2.362, loss_scale=8, train_wall=10, gb_free=2.8, wall=47109
2021-06-19 07:44:06 | INFO | train_inner | epoch 002: 1157 / 3002 loss=2.883, ppl=7.37, wps=5832.8, ups=0.09, wpb=64786, bsz=128, num_updates=4131, lr=9.99749e-05, gnorm=2.311, loss_scale=8, train_wall=11, gb_free=2.8, wall=47120
2021-06-19 07:44:17 | INFO | train_inner | epoch 002: 1158 / 3002 loss=2.732, ppl=6.64, wps=5935.1, ups=0.09, wpb=64937, bsz=128, num_updates=4132, lr=9.99749e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=47131
2021-06-19 07:44:28 | INFO | train_inner | epoch 002: 1159 / 3002 loss=2.771, ppl=6.82, wps=5850.7, ups=0.09, wpb=64823, bsz=128, num_updates=4133, lr=9.99749e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=47142
2021-06-19 07:44:39 | INFO | train_inner | epoch 002: 1160 / 3002 loss=2.762, ppl=6.78, wps=5912.2, ups=0.09, wpb=64817, bsz=128, num_updates=4134, lr=9.99749e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=47153
2021-06-19 07:44:50 | INFO | train_inner | epoch 002: 1161 / 3002 loss=2.808, ppl=7, wps=5849.2, ups=0.09, wpb=64782, bsz=128, num_updates=4135, lr=9.99749e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=47164
2021-06-19 07:45:01 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 07:45:12 | INFO | train_inner | epoch 002: 1163 / 3002 loss=2.849, ppl=7.2, wps=2971.6, ups=0.05, wpb=64865, bsz=128, num_updates=4136, lr=9.99749e-05, gnorm=2.266, loss_scale=4, train_wall=21, gb_free=2.8, wall=47186
2021-06-19 07:45:23 | INFO | train_inner | epoch 002: 1164 / 3002 loss=2.744, ppl=6.7, wps=5869.2, ups=0.09, wpb=64836, bsz=128, num_updates=4137, lr=9.99749e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=47197
2021-06-19 07:45:34 | INFO | train_inner | epoch 002: 1165 / 3002 loss=2.838, ppl=7.15, wps=5820.6, ups=0.09, wpb=64833, bsz=128, num_updates=4138, lr=9.99749e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=47208
2021-06-19 07:45:45 | INFO | train_inner | epoch 002: 1166 / 3002 loss=2.785, ppl=6.89, wps=5844.1, ups=0.09, wpb=64839, bsz=128, num_updates=4139, lr=9.99749e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=47219
2021-06-19 07:45:56 | INFO | train_inner | epoch 002: 1167 / 3002 loss=2.809, ppl=7.01, wps=5823.7, ups=0.09, wpb=64816, bsz=128, num_updates=4140, lr=9.99749e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=47231
2021-06-19 07:46:07 | INFO | train_inner | epoch 002: 1168 / 3002 loss=2.906, ppl=7.5, wps=5850, ups=0.09, wpb=64856, bsz=128, num_updates=4141, lr=9.99749e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=47242
2021-06-19 07:46:18 | INFO | train_inner | epoch 002: 1169 / 3002 loss=2.797, ppl=6.95, wps=5933.1, ups=0.09, wpb=64844, bsz=128, num_updates=4142, lr=9.99749e-05, gnorm=2.312, loss_scale=4, train_wall=10, gb_free=2.8, wall=47253
2021-06-19 07:46:29 | INFO | train_inner | epoch 002: 1170 / 3002 loss=2.765, ppl=6.8, wps=5929.6, ups=0.09, wpb=64855, bsz=128, num_updates=4143, lr=9.99749e-05, gnorm=2.258, loss_scale=4, train_wall=11, gb_free=2.8, wall=47264
2021-06-19 07:46:40 | INFO | train_inner | epoch 002: 1171 / 3002 loss=2.817, ppl=7.05, wps=5774.8, ups=0.09, wpb=64812, bsz=128, num_updates=4144, lr=9.99748e-05, gnorm=2.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=47275
2021-06-19 07:46:51 | INFO | train_inner | epoch 002: 1172 / 3002 loss=2.671, ppl=6.37, wps=5870.7, ups=0.09, wpb=64893, bsz=128, num_updates=4145, lr=9.99748e-05, gnorm=2.258, loss_scale=4, train_wall=11, gb_free=2.8, wall=47286
2021-06-19 07:47:03 | INFO | train_inner | epoch 002: 1173 / 3002 loss=2.736, ppl=6.66, wps=5848, ups=0.09, wpb=64867, bsz=128, num_updates=4146, lr=9.99748e-05, gnorm=2.284, loss_scale=4, train_wall=11, gb_free=2.8, wall=47297
2021-06-19 07:47:14 | INFO | train_inner | epoch 002: 1174 / 3002 loss=2.907, ppl=7.5, wps=5808.2, ups=0.09, wpb=64794, bsz=128, num_updates=4147, lr=9.99748e-05, gnorm=2.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=47308
2021-06-19 07:47:25 | INFO | train_inner | epoch 002: 1175 / 3002 loss=2.709, ppl=6.54, wps=5837.1, ups=0.09, wpb=64810, bsz=128, num_updates=4148, lr=9.99748e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=47319
2021-06-19 07:47:36 | INFO | train_inner | epoch 002: 1176 / 3002 loss=2.749, ppl=6.72, wps=5872.1, ups=0.09, wpb=64845, bsz=128, num_updates=4149, lr=9.99748e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=47330
2021-06-19 07:47:47 | INFO | train_inner | epoch 002: 1177 / 3002 loss=2.726, ppl=6.62, wps=5882, ups=0.09, wpb=64834, bsz=128, num_updates=4150, lr=9.99748e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=47341
2021-06-19 07:47:58 | INFO | train_inner | epoch 002: 1178 / 3002 loss=2.728, ppl=6.63, wps=5928.8, ups=0.09, wpb=64832, bsz=128, num_updates=4151, lr=9.99748e-05, gnorm=2.268, loss_scale=4, train_wall=10, gb_free=2.8, wall=47352
2021-06-19 07:48:09 | INFO | train_inner | epoch 002: 1179 / 3002 loss=2.768, ppl=6.81, wps=5844.4, ups=0.09, wpb=64871, bsz=128, num_updates=4152, lr=9.99748e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=47363
2021-06-19 07:48:20 | INFO | train_inner | epoch 002: 1180 / 3002 loss=2.968, ppl=7.82, wps=5850.4, ups=0.09, wpb=64736, bsz=128, num_updates=4153, lr=9.99748e-05, gnorm=2.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=47374
2021-06-19 07:48:31 | INFO | train_inner | epoch 002: 1181 / 3002 loss=2.691, ppl=6.46, wps=5781.3, ups=0.09, wpb=64831, bsz=128, num_updates=4154, lr=9.99748e-05, gnorm=2.307, loss_scale=4, train_wall=11, gb_free=2.8, wall=47386
2021-06-19 07:48:42 | INFO | train_inner | epoch 002: 1182 / 3002 loss=2.561, ppl=5.9, wps=5837.9, ups=0.09, wpb=64763, bsz=128, num_updates=4155, lr=9.99748e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=47397
2021-06-19 07:48:53 | INFO | train_inner | epoch 002: 1183 / 3002 loss=2.859, ppl=7.26, wps=5821.1, ups=0.09, wpb=64900, bsz=128, num_updates=4156, lr=9.99747e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=47408
2021-06-19 07:49:05 | INFO | train_inner | epoch 002: 1184 / 3002 loss=2.66, ppl=6.32, wps=5819.5, ups=0.09, wpb=64793, bsz=128, num_updates=4157, lr=9.99747e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=47419
2021-06-19 07:49:16 | INFO | train_inner | epoch 002: 1185 / 3002 loss=2.816, ppl=7.04, wps=5865.3, ups=0.09, wpb=64804, bsz=128, num_updates=4158, lr=9.99747e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=47430
2021-06-19 07:49:27 | INFO | train_inner | epoch 002: 1186 / 3002 loss=2.871, ppl=7.32, wps=5885.5, ups=0.09, wpb=64871, bsz=128, num_updates=4159, lr=9.99747e-05, gnorm=2.204, loss_scale=4, train_wall=11, gb_free=2.8, wall=47441
2021-06-19 07:49:38 | INFO | train_inner | epoch 002: 1187 / 3002 loss=2.722, ppl=6.6, wps=5869.9, ups=0.09, wpb=64746, bsz=128, num_updates=4160, lr=9.99747e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=47452
2021-06-19 07:49:49 | INFO | train_inner | epoch 002: 1188 / 3002 loss=2.873, ppl=7.32, wps=5883.1, ups=0.09, wpb=64891, bsz=128, num_updates=4161, lr=9.99747e-05, gnorm=3.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=47463
2021-06-19 07:50:00 | INFO | train_inner | epoch 002: 1189 / 3002 loss=2.757, ppl=6.76, wps=5963, ups=0.09, wpb=64905, bsz=128, num_updates=4162, lr=9.99747e-05, gnorm=2.207, loss_scale=4, train_wall=10, gb_free=2.8, wall=47474
2021-06-19 07:50:11 | INFO | train_inner | epoch 002: 1190 / 3002 loss=2.737, ppl=6.67, wps=5815.7, ups=0.09, wpb=64838, bsz=128, num_updates=4163, lr=9.99747e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=47485
2021-06-19 07:50:22 | INFO | train_inner | epoch 002: 1191 / 3002 loss=2.741, ppl=6.69, wps=5935.6, ups=0.09, wpb=64856, bsz=128, num_updates=4164, lr=9.99747e-05, gnorm=2.254, loss_scale=4, train_wall=10, gb_free=2.8, wall=47496
2021-06-19 07:50:33 | INFO | train_inner | epoch 002: 1192 / 3002 loss=2.742, ppl=6.69, wps=5884.2, ups=0.09, wpb=64806, bsz=128, num_updates=4165, lr=9.99747e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=47507
2021-06-19 07:50:44 | INFO | train_inner | epoch 002: 1193 / 3002 loss=2.783, ppl=6.88, wps=5881.2, ups=0.09, wpb=64780, bsz=128, num_updates=4166, lr=9.99747e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=47518
2021-06-19 07:50:55 | INFO | train_inner | epoch 002: 1194 / 3002 loss=2.7, ppl=6.5, wps=5874.4, ups=0.09, wpb=64871, bsz=128, num_updates=4167, lr=9.99747e-05, gnorm=2.404, loss_scale=4, train_wall=11, gb_free=2.8, wall=47529
2021-06-19 07:51:06 | INFO | train_inner | epoch 002: 1195 / 3002 loss=2.672, ppl=6.37, wps=5854.6, ups=0.09, wpb=64858, bsz=128, num_updates=4168, lr=9.99747e-05, gnorm=2.29, loss_scale=4, train_wall=11, gb_free=2.8, wall=47540
2021-06-19 07:51:17 | INFO | train_inner | epoch 002: 1196 / 3002 loss=2.849, ppl=7.2, wps=5949.5, ups=0.09, wpb=64955, bsz=128, num_updates=4169, lr=9.99746e-05, gnorm=2.327, loss_scale=4, train_wall=10, gb_free=2.8, wall=47551
2021-06-19 07:51:28 | INFO | train_inner | epoch 002: 1197 / 3002 loss=2.899, ppl=7.46, wps=5817, ups=0.09, wpb=64808, bsz=128, num_updates=4170, lr=9.99746e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=47562
2021-06-19 07:51:39 | INFO | train_inner | epoch 002: 1198 / 3002 loss=2.858, ppl=7.25, wps=5806.1, ups=0.09, wpb=64916, bsz=128, num_updates=4171, lr=9.99746e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=47573
2021-06-19 07:51:50 | INFO | train_inner | epoch 002: 1199 / 3002 loss=2.78, ppl=6.87, wps=5818.2, ups=0.09, wpb=64822, bsz=128, num_updates=4172, lr=9.99746e-05, gnorm=2.372, loss_scale=4, train_wall=11, gb_free=2.8, wall=47585
2021-06-19 07:52:01 | INFO | train_inner | epoch 002: 1200 / 3002 loss=2.855, ppl=7.24, wps=5815.3, ups=0.09, wpb=64709, bsz=128, num_updates=4173, lr=9.99746e-05, gnorm=2.759, loss_scale=4, train_wall=11, gb_free=2.8, wall=47596
2021-06-19 07:52:13 | INFO | train_inner | epoch 002: 1201 / 3002 loss=2.75, ppl=6.73, wps=5774.6, ups=0.09, wpb=64805, bsz=128, num_updates=4174, lr=9.99746e-05, gnorm=2.273, loss_scale=4, train_wall=11, gb_free=2.8, wall=47607
2021-06-19 07:52:24 | INFO | train_inner | epoch 002: 1202 / 3002 loss=2.899, ppl=7.46, wps=5795.7, ups=0.09, wpb=64846, bsz=128, num_updates=4175, lr=9.99746e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=47618
2021-06-19 07:52:35 | INFO | train_inner | epoch 002: 1203 / 3002 loss=2.831, ppl=7.12, wps=5779.4, ups=0.09, wpb=64785, bsz=128, num_updates=4176, lr=9.99746e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=47629
2021-06-19 07:52:46 | INFO | train_inner | epoch 002: 1204 / 3002 loss=2.885, ppl=7.39, wps=5774.2, ups=0.09, wpb=64783, bsz=128, num_updates=4177, lr=9.99746e-05, gnorm=2.247, loss_scale=4, train_wall=11, gb_free=2.8, wall=47640
2021-06-19 07:52:57 | INFO | train_inner | epoch 002: 1205 / 3002 loss=2.808, ppl=7, wps=5863.1, ups=0.09, wpb=64777, bsz=128, num_updates=4178, lr=9.99746e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=47652
2021-06-19 07:53:08 | INFO | train_inner | epoch 002: 1206 / 3002 loss=2.67, ppl=6.37, wps=5835.4, ups=0.09, wpb=64868, bsz=128, num_updates=4179, lr=9.99746e-05, gnorm=2.296, loss_scale=4, train_wall=11, gb_free=2.8, wall=47663
2021-06-19 07:53:19 | INFO | train_inner | epoch 002: 1207 / 3002 loss=2.814, ppl=7.03, wps=5864.7, ups=0.09, wpb=64760, bsz=128, num_updates=4180, lr=9.99746e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=47674
2021-06-19 07:53:30 | INFO | train_inner | epoch 002: 1208 / 3002 loss=2.862, ppl=7.27, wps=5885.8, ups=0.09, wpb=64864, bsz=128, num_updates=4181, lr=9.99745e-05, gnorm=2.467, loss_scale=4, train_wall=11, gb_free=2.8, wall=47685
2021-06-19 07:53:41 | INFO | train_inner | epoch 002: 1209 / 3002 loss=2.871, ppl=7.31, wps=5927.5, ups=0.09, wpb=64823, bsz=128, num_updates=4182, lr=9.99745e-05, gnorm=2.267, loss_scale=4, train_wall=10, gb_free=2.8, wall=47696
2021-06-19 07:53:52 | INFO | train_inner | epoch 002: 1210 / 3002 loss=2.598, ppl=6.06, wps=5960.7, ups=0.09, wpb=64823, bsz=128, num_updates=4183, lr=9.99745e-05, gnorm=2.105, loss_scale=4, train_wall=10, gb_free=2.8, wall=47707
2021-06-19 07:54:03 | INFO | train_inner | epoch 002: 1211 / 3002 loss=2.816, ppl=7.04, wps=5914.8, ups=0.09, wpb=64846, bsz=128, num_updates=4184, lr=9.99745e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=47718
2021-06-19 07:54:14 | INFO | train_inner | epoch 002: 1212 / 3002 loss=2.827, ppl=7.09, wps=5918.9, ups=0.09, wpb=64838, bsz=128, num_updates=4185, lr=9.99745e-05, gnorm=2.242, loss_scale=4, train_wall=10, gb_free=2.8, wall=47728
2021-06-19 07:54:25 | INFO | train_inner | epoch 002: 1213 / 3002 loss=2.946, ppl=7.71, wps=5908.6, ups=0.09, wpb=64770, bsz=128, num_updates=4186, lr=9.99745e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=47739
2021-06-19 07:54:36 | INFO | train_inner | epoch 002: 1214 / 3002 loss=2.605, ppl=6.09, wps=5843.1, ups=0.09, wpb=64877, bsz=128, num_updates=4187, lr=9.99745e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=47751
2021-06-19 07:54:47 | INFO | train_inner | epoch 002: 1215 / 3002 loss=2.873, ppl=7.33, wps=5908.9, ups=0.09, wpb=64859, bsz=128, num_updates=4188, lr=9.99745e-05, gnorm=2.272, loss_scale=4, train_wall=11, gb_free=2.8, wall=47761
2021-06-19 07:54:58 | INFO | train_inner | epoch 002: 1216 / 3002 loss=2.674, ppl=6.38, wps=5800.4, ups=0.09, wpb=64827, bsz=128, num_updates=4189, lr=9.99745e-05, gnorm=2.538, loss_scale=4, train_wall=11, gb_free=2.8, wall=47773
2021-06-19 07:55:10 | INFO | train_inner | epoch 002: 1217 / 3002 loss=2.788, ppl=6.91, wps=5810.4, ups=0.09, wpb=64876, bsz=128, num_updates=4190, lr=9.99745e-05, gnorm=2.296, loss_scale=4, train_wall=11, gb_free=2.8, wall=47784
2021-06-19 07:55:21 | INFO | train_inner | epoch 002: 1218 / 3002 loss=2.815, ppl=7.04, wps=5805.4, ups=0.09, wpb=64763, bsz=128, num_updates=4191, lr=9.99745e-05, gnorm=2.281, loss_scale=4, train_wall=11, gb_free=2.8, wall=47795
2021-06-19 07:55:32 | INFO | train_inner | epoch 002: 1219 / 3002 loss=2.833, ppl=7.12, wps=5787.9, ups=0.09, wpb=64858, bsz=128, num_updates=4192, lr=9.99745e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=47806
2021-06-19 07:55:43 | INFO | train_inner | epoch 002: 1220 / 3002 loss=2.785, ppl=6.89, wps=5877.8, ups=0.09, wpb=64828, bsz=128, num_updates=4193, lr=9.99745e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=47817
2021-06-19 07:55:54 | INFO | train_inner | epoch 002: 1221 / 3002 loss=2.825, ppl=7.09, wps=5812.2, ups=0.09, wpb=64871, bsz=128, num_updates=4194, lr=9.99744e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=47828
2021-06-19 07:56:05 | INFO | train_inner | epoch 002: 1222 / 3002 loss=2.734, ppl=6.65, wps=5828.1, ups=0.09, wpb=64799, bsz=128, num_updates=4195, lr=9.99744e-05, gnorm=2.332, loss_scale=4, train_wall=11, gb_free=2.8, wall=47840
2021-06-19 07:56:16 | INFO | train_inner | epoch 002: 1223 / 3002 loss=2.826, ppl=7.09, wps=5850.2, ups=0.09, wpb=64837, bsz=128, num_updates=4196, lr=9.99744e-05, gnorm=2.447, loss_scale=4, train_wall=11, gb_free=2.8, wall=47851
2021-06-19 07:56:27 | INFO | train_inner | epoch 002: 1224 / 3002 loss=2.785, ppl=6.89, wps=5931.3, ups=0.09, wpb=64835, bsz=128, num_updates=4197, lr=9.99744e-05, gnorm=2.268, loss_scale=4, train_wall=10, gb_free=2.8, wall=47862
2021-06-19 07:56:38 | INFO | train_inner | epoch 002: 1225 / 3002 loss=2.608, ppl=6.1, wps=5865.9, ups=0.09, wpb=64832, bsz=128, num_updates=4198, lr=9.99744e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=47873
2021-06-19 07:56:49 | INFO | train_inner | epoch 002: 1226 / 3002 loss=2.709, ppl=6.54, wps=5957.1, ups=0.09, wpb=64846, bsz=128, num_updates=4199, lr=9.99744e-05, gnorm=2.346, loss_scale=4, train_wall=10, gb_free=2.8, wall=47883
2021-06-19 07:57:00 | INFO | train_inner | epoch 002: 1227 / 3002 loss=2.859, ppl=7.26, wps=5786.1, ups=0.09, wpb=64800, bsz=128, num_updates=4200, lr=9.99744e-05, gnorm=2.287, loss_scale=4, train_wall=11, gb_free=2.8, wall=47895
2021-06-19 07:57:12 | INFO | train_inner | epoch 002: 1228 / 3002 loss=2.79, ppl=6.92, wps=5787.2, ups=0.09, wpb=64807, bsz=128, num_updates=4201, lr=9.99744e-05, gnorm=2.376, loss_scale=4, train_wall=11, gb_free=2.8, wall=47906
2021-06-19 07:57:22 | INFO | train_inner | epoch 002: 1229 / 3002 loss=2.968, ppl=7.82, wps=5922.5, ups=0.09, wpb=64777, bsz=128, num_updates=4202, lr=9.99744e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=47917
2021-06-19 07:57:33 | INFO | train_inner | epoch 002: 1230 / 3002 loss=2.641, ppl=6.24, wps=5897.6, ups=0.09, wpb=64894, bsz=128, num_updates=4203, lr=9.99744e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=47928
2021-06-19 07:57:45 | INFO | train_inner | epoch 002: 1231 / 3002 loss=2.785, ppl=6.89, wps=5865.2, ups=0.09, wpb=64796, bsz=128, num_updates=4204, lr=9.99744e-05, gnorm=2.315, loss_scale=4, train_wall=11, gb_free=2.8, wall=47939
2021-06-19 07:57:56 | INFO | train_inner | epoch 002: 1232 / 3002 loss=2.822, ppl=7.07, wps=5812.6, ups=0.09, wpb=64845, bsz=128, num_updates=4205, lr=9.99744e-05, gnorm=2.279, loss_scale=4, train_wall=11, gb_free=2.8, wall=47950
2021-06-19 07:58:07 | INFO | train_inner | epoch 002: 1233 / 3002 loss=2.795, ppl=6.94, wps=5828.2, ups=0.09, wpb=64853, bsz=128, num_updates=4206, lr=9.99743e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=47961
2021-06-19 07:58:18 | INFO | train_inner | epoch 002: 1234 / 3002 loss=2.893, ppl=7.43, wps=5959, ups=0.09, wpb=64856, bsz=128, num_updates=4207, lr=9.99743e-05, gnorm=2.204, loss_scale=4, train_wall=10, gb_free=2.8, wall=47972
2021-06-19 07:58:29 | INFO | train_inner | epoch 002: 1235 / 3002 loss=2.621, ppl=6.15, wps=5822.5, ups=0.09, wpb=64819, bsz=128, num_updates=4208, lr=9.99743e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=47983
2021-06-19 07:58:40 | INFO | train_inner | epoch 002: 1236 / 3002 loss=2.728, ppl=6.63, wps=5789.4, ups=0.09, wpb=64826, bsz=128, num_updates=4209, lr=9.99743e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=47994
2021-06-19 07:58:51 | INFO | train_inner | epoch 002: 1237 / 3002 loss=2.817, ppl=7.05, wps=5806.4, ups=0.09, wpb=64858, bsz=128, num_updates=4210, lr=9.99743e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=48006
2021-06-19 07:59:02 | INFO | train_inner | epoch 002: 1238 / 3002 loss=2.841, ppl=7.17, wps=5852.1, ups=0.09, wpb=64781, bsz=128, num_updates=4211, lr=9.99743e-05, gnorm=2.319, loss_scale=4, train_wall=11, gb_free=2.8, wall=48017
2021-06-19 07:59:13 | INFO | train_inner | epoch 002: 1239 / 3002 loss=2.64, ppl=6.24, wps=5932.7, ups=0.09, wpb=64898, bsz=128, num_updates=4212, lr=9.99743e-05, gnorm=2.396, loss_scale=4, train_wall=10, gb_free=2.8, wall=48028
2021-06-19 07:59:24 | INFO | train_inner | epoch 002: 1240 / 3002 loss=2.817, ppl=7.05, wps=5933.9, ups=0.09, wpb=64784, bsz=128, num_updates=4213, lr=9.99743e-05, gnorm=2.286, loss_scale=4, train_wall=10, gb_free=2.8, wall=48038
2021-06-19 07:59:35 | INFO | train_inner | epoch 002: 1241 / 3002 loss=2.745, ppl=6.7, wps=5760.2, ups=0.09, wpb=64856, bsz=128, num_updates=4214, lr=9.99743e-05, gnorm=2.369, loss_scale=4, train_wall=11, gb_free=2.8, wall=48050
2021-06-19 07:59:46 | INFO | train_inner | epoch 002: 1242 / 3002 loss=2.747, ppl=6.71, wps=5871.8, ups=0.09, wpb=64833, bsz=128, num_updates=4215, lr=9.99743e-05, gnorm=2.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=48061
2021-06-19 07:59:57 | INFO | train_inner | epoch 002: 1243 / 3002 loss=2.898, ppl=7.46, wps=5845.1, ups=0.09, wpb=64772, bsz=128, num_updates=4216, lr=9.99743e-05, gnorm=2.437, loss_scale=4, train_wall=11, gb_free=2.8, wall=48072
2021-06-19 08:00:08 | INFO | train_inner | epoch 002: 1244 / 3002 loss=2.709, ppl=6.54, wps=5979.9, ups=0.09, wpb=64781, bsz=128, num_updates=4217, lr=9.99743e-05, gnorm=2.321, loss_scale=4, train_wall=10, gb_free=2.8, wall=48083
2021-06-19 08:00:19 | INFO | train_inner | epoch 002: 1245 / 3002 loss=2.9, ppl=7.46, wps=5806.5, ups=0.09, wpb=64773, bsz=128, num_updates=4218, lr=9.99743e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=48094
2021-06-19 08:00:31 | INFO | train_inner | epoch 002: 1246 / 3002 loss=2.822, ppl=7.07, wps=5786.1, ups=0.09, wpb=64745, bsz=128, num_updates=4219, lr=9.99742e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=48105
2021-06-19 08:00:42 | INFO | train_inner | epoch 002: 1247 / 3002 loss=2.731, ppl=6.64, wps=5761, ups=0.09, wpb=64847, bsz=128, num_updates=4220, lr=9.99742e-05, gnorm=2.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=48116
2021-06-19 08:00:53 | INFO | train_inner | epoch 002: 1248 / 3002 loss=2.815, ppl=7.04, wps=5945.2, ups=0.09, wpb=64847, bsz=128, num_updates=4221, lr=9.99742e-05, gnorm=2.178, loss_scale=4, train_wall=10, gb_free=2.8, wall=48127
2021-06-19 08:01:04 | INFO | train_inner | epoch 002: 1249 / 3002 loss=2.892, ppl=7.43, wps=5890, ups=0.09, wpb=64874, bsz=128, num_updates=4222, lr=9.99742e-05, gnorm=2.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=48138
2021-06-19 08:01:15 | INFO | train_inner | epoch 002: 1250 / 3002 loss=2.769, ppl=6.82, wps=5895.8, ups=0.09, wpb=64801, bsz=128, num_updates=4223, lr=9.99742e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=48149
2021-06-19 08:01:26 | INFO | train_inner | epoch 002: 1251 / 3002 loss=2.755, ppl=6.75, wps=5763.6, ups=0.09, wpb=64836, bsz=128, num_updates=4224, lr=9.99742e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=48160
2021-06-19 08:01:37 | INFO | train_inner | epoch 002: 1252 / 3002 loss=2.833, ppl=7.13, wps=5786.5, ups=0.09, wpb=64778, bsz=128, num_updates=4225, lr=9.99742e-05, gnorm=2.338, loss_scale=4, train_wall=11, gb_free=2.8, wall=48172
2021-06-19 08:01:48 | INFO | train_inner | epoch 002: 1253 / 3002 loss=2.796, ppl=6.95, wps=5936, ups=0.09, wpb=64846, bsz=128, num_updates=4226, lr=9.99742e-05, gnorm=2.19, loss_scale=4, train_wall=10, gb_free=2.8, wall=48183
2021-06-19 08:01:59 | INFO | train_inner | epoch 002: 1254 / 3002 loss=2.912, ppl=7.53, wps=5857.9, ups=0.09, wpb=64821, bsz=128, num_updates=4227, lr=9.99742e-05, gnorm=2.306, loss_scale=4, train_wall=11, gb_free=2.8, wall=48194
2021-06-19 08:02:10 | INFO | train_inner | epoch 002: 1255 / 3002 loss=2.664, ppl=6.34, wps=5828.4, ups=0.09, wpb=64791, bsz=128, num_updates=4228, lr=9.99742e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=48205
2021-06-19 08:02:21 | INFO | train_inner | epoch 002: 1256 / 3002 loss=2.728, ppl=6.63, wps=5974.4, ups=0.09, wpb=64814, bsz=128, num_updates=4229, lr=9.99742e-05, gnorm=2.142, loss_scale=4, train_wall=10, gb_free=2.8, wall=48216
2021-06-19 08:02:32 | INFO | train_inner | epoch 002: 1257 / 3002 loss=2.675, ppl=6.39, wps=5939.7, ups=0.09, wpb=64867, bsz=128, num_updates=4230, lr=9.99742e-05, gnorm=2.183, loss_scale=4, train_wall=10, gb_free=2.8, wall=48227
2021-06-19 08:02:43 | INFO | train_inner | epoch 002: 1258 / 3002 loss=2.9, ppl=7.46, wps=5894.4, ups=0.09, wpb=64789, bsz=128, num_updates=4231, lr=9.99741e-05, gnorm=2.252, loss_scale=4, train_wall=11, gb_free=2.8, wall=48237
2021-06-19 08:02:54 | INFO | train_inner | epoch 002: 1259 / 3002 loss=2.667, ppl=6.35, wps=5745, ups=0.09, wpb=64844, bsz=128, num_updates=4232, lr=9.99741e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=48249
2021-06-19 08:03:06 | INFO | train_inner | epoch 002: 1260 / 3002 loss=2.78, ppl=6.87, wps=5837.8, ups=0.09, wpb=64792, bsz=128, num_updates=4233, lr=9.99741e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=48260
2021-06-19 08:03:17 | INFO | train_inner | epoch 002: 1261 / 3002 loss=2.799, ppl=6.96, wps=5813.5, ups=0.09, wpb=64852, bsz=128, num_updates=4234, lr=9.99741e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=48271
2021-06-19 08:03:28 | INFO | train_inner | epoch 002: 1262 / 3002 loss=2.789, ppl=6.91, wps=5870.8, ups=0.09, wpb=64759, bsz=128, num_updates=4235, lr=9.99741e-05, gnorm=2.254, loss_scale=4, train_wall=11, gb_free=2.8, wall=48282
2021-06-19 08:03:39 | INFO | train_inner | epoch 002: 1263 / 3002 loss=2.566, ppl=5.92, wps=5909, ups=0.09, wpb=64926, bsz=128, num_updates=4236, lr=9.99741e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=48293
2021-06-19 08:03:50 | INFO | train_inner | epoch 002: 1264 / 3002 loss=2.742, ppl=6.69, wps=5852.9, ups=0.09, wpb=64921, bsz=128, num_updates=4237, lr=9.99741e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=48304
2021-06-19 08:04:01 | INFO | train_inner | epoch 002: 1265 / 3002 loss=2.679, ppl=6.41, wps=5860.5, ups=0.09, wpb=64790, bsz=128, num_updates=4238, lr=9.99741e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=48315
2021-06-19 08:04:12 | INFO | train_inner | epoch 002: 1266 / 3002 loss=2.632, ppl=6.2, wps=5806.4, ups=0.09, wpb=64780, bsz=128, num_updates=4239, lr=9.99741e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=48326
2021-06-19 08:04:23 | INFO | train_inner | epoch 002: 1267 / 3002 loss=2.607, ppl=6.09, wps=5794, ups=0.09, wpb=64834, bsz=128, num_updates=4240, lr=9.99741e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=48338
2021-06-19 08:04:35 | INFO | train_inner | epoch 002: 1268 / 3002 loss=2.943, ppl=7.69, wps=5743.9, ups=0.09, wpb=64892, bsz=128, num_updates=4241, lr=9.99741e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=48349
2021-06-19 08:04:45 | INFO | train_inner | epoch 002: 1269 / 3002 loss=2.799, ppl=6.96, wps=5935.2, ups=0.09, wpb=64762, bsz=128, num_updates=4242, lr=9.99741e-05, gnorm=2.301, loss_scale=4, train_wall=10, gb_free=2.8, wall=48360
2021-06-19 08:04:56 | INFO | train_inner | epoch 002: 1270 / 3002 loss=2.682, ppl=6.42, wps=5873.2, ups=0.09, wpb=64824, bsz=128, num_updates=4243, lr=9.99741e-05, gnorm=2.73, loss_scale=4, train_wall=11, gb_free=2.8, wall=48371
2021-06-19 08:05:08 | INFO | train_inner | epoch 002: 1271 / 3002 loss=2.641, ppl=6.24, wps=5847.8, ups=0.09, wpb=64822, bsz=128, num_updates=4244, lr=9.9974e-05, gnorm=2.336, loss_scale=4, train_wall=11, gb_free=2.8, wall=48382
2021-06-19 08:05:19 | INFO | train_inner | epoch 002: 1272 / 3002 loss=2.737, ppl=6.67, wps=5804.8, ups=0.09, wpb=64851, bsz=128, num_updates=4245, lr=9.9974e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=48393
2021-06-19 08:05:30 | INFO | train_inner | epoch 002: 1273 / 3002 loss=2.782, ppl=6.88, wps=5898.5, ups=0.09, wpb=64861, bsz=128, num_updates=4246, lr=9.9974e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=48404
2021-06-19 08:05:41 | INFO | train_inner | epoch 002: 1274 / 3002 loss=2.747, ppl=6.71, wps=5870.6, ups=0.09, wpb=64858, bsz=128, num_updates=4247, lr=9.9974e-05, gnorm=2.273, loss_scale=4, train_wall=11, gb_free=2.8, wall=48415
2021-06-19 08:05:52 | INFO | train_inner | epoch 002: 1275 / 3002 loss=2.857, ppl=7.25, wps=5942.7, ups=0.09, wpb=64878, bsz=128, num_updates=4248, lr=9.9974e-05, gnorm=2.234, loss_scale=4, train_wall=10, gb_free=2.8, wall=48426
2021-06-19 08:06:03 | INFO | train_inner | epoch 002: 1276 / 3002 loss=2.79, ppl=6.92, wps=5766.6, ups=0.09, wpb=64790, bsz=128, num_updates=4249, lr=9.9974e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=48437
2021-06-19 08:06:14 | INFO | train_inner | epoch 002: 1277 / 3002 loss=2.617, ppl=6.13, wps=5910.2, ups=0.09, wpb=64882, bsz=128, num_updates=4250, lr=9.9974e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=48448
2021-06-19 08:06:25 | INFO | train_inner | epoch 002: 1278 / 3002 loss=2.646, ppl=6.26, wps=5847.5, ups=0.09, wpb=64878, bsz=128, num_updates=4251, lr=9.9974e-05, gnorm=2.285, loss_scale=4, train_wall=11, gb_free=2.8, wall=48459
2021-06-19 08:06:36 | INFO | train_inner | epoch 002: 1279 / 3002 loss=2.712, ppl=6.55, wps=5782, ups=0.09, wpb=64816, bsz=128, num_updates=4252, lr=9.9974e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=48471
2021-06-19 08:06:47 | INFO | train_inner | epoch 002: 1280 / 3002 loss=2.751, ppl=6.73, wps=5885, ups=0.09, wpb=64833, bsz=128, num_updates=4253, lr=9.9974e-05, gnorm=2.31, loss_scale=4, train_wall=11, gb_free=2.8, wall=48482
2021-06-19 08:06:58 | INFO | train_inner | epoch 002: 1281 / 3002 loss=2.833, ppl=7.13, wps=5871.4, ups=0.09, wpb=64810, bsz=128, num_updates=4254, lr=9.9974e-05, gnorm=2.397, loss_scale=4, train_wall=11, gb_free=2.8, wall=48493
2021-06-19 08:07:09 | INFO | train_inner | epoch 002: 1282 / 3002 loss=2.678, ppl=6.4, wps=5829, ups=0.09, wpb=64932, bsz=128, num_updates=4255, lr=9.9974e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=48504
2021-06-19 08:07:21 | INFO | train_inner | epoch 002: 1283 / 3002 loss=2.813, ppl=7.03, wps=5749.7, ups=0.09, wpb=64811, bsz=128, num_updates=4256, lr=9.99739e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=48515
2021-06-19 08:07:32 | INFO | train_inner | epoch 002: 1284 / 3002 loss=2.678, ppl=6.4, wps=5787.2, ups=0.09, wpb=64783, bsz=128, num_updates=4257, lr=9.99739e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=48526
2021-06-19 08:07:43 | INFO | train_inner | epoch 002: 1285 / 3002 loss=2.712, ppl=6.55, wps=5874.2, ups=0.09, wpb=64804, bsz=128, num_updates=4258, lr=9.99739e-05, gnorm=2.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=48537
2021-06-19 08:07:54 | INFO | train_inner | epoch 002: 1286 / 3002 loss=2.746, ppl=6.71, wps=5757.4, ups=0.09, wpb=64883, bsz=128, num_updates=4259, lr=9.99739e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=48548
2021-06-19 08:08:05 | INFO | train_inner | epoch 002: 1287 / 3002 loss=2.914, ppl=7.54, wps=5865.3, ups=0.09, wpb=64831, bsz=128, num_updates=4260, lr=9.99739e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=48560
2021-06-19 08:08:16 | INFO | train_inner | epoch 002: 1288 / 3002 loss=2.684, ppl=6.43, wps=5816.7, ups=0.09, wpb=64835, bsz=128, num_updates=4261, lr=9.99739e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=48571
2021-06-19 08:08:27 | INFO | train_inner | epoch 002: 1289 / 3002 loss=2.733, ppl=6.65, wps=5829.6, ups=0.09, wpb=64872, bsz=128, num_updates=4262, lr=9.99739e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=48582
2021-06-19 08:08:38 | INFO | train_inner | epoch 002: 1290 / 3002 loss=2.612, ppl=6.11, wps=5896.8, ups=0.09, wpb=64804, bsz=128, num_updates=4263, lr=9.99739e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=48593
2021-06-19 08:08:49 | INFO | train_inner | epoch 002: 1291 / 3002 loss=2.903, ppl=7.48, wps=6005.2, ups=0.09, wpb=64809, bsz=128, num_updates=4264, lr=9.99739e-05, gnorm=2.499, loss_scale=8, train_wall=10, gb_free=2.8, wall=48604
2021-06-19 08:09:00 | INFO | train_inner | epoch 002: 1292 / 3002 loss=2.7, ppl=6.5, wps=5845.5, ups=0.09, wpb=64801, bsz=128, num_updates=4265, lr=9.99739e-05, gnorm=2.519, loss_scale=8, train_wall=11, gb_free=2.8, wall=48615
2021-06-19 08:09:12 | INFO | train_inner | epoch 002: 1293 / 3002 loss=2.715, ppl=6.57, wps=5807.8, ups=0.09, wpb=64826, bsz=128, num_updates=4266, lr=9.99739e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=48626
2021-06-19 08:09:23 | INFO | train_inner | epoch 002: 1294 / 3002 loss=2.843, ppl=7.17, wps=5782.2, ups=0.09, wpb=64853, bsz=128, num_updates=4267, lr=9.99739e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=48637
2021-06-19 08:09:34 | INFO | train_inner | epoch 002: 1295 / 3002 loss=2.772, ppl=6.83, wps=5841.7, ups=0.09, wpb=64898, bsz=128, num_updates=4268, lr=9.99739e-05, gnorm=2.242, loss_scale=8, train_wall=11, gb_free=2.8, wall=48648
2021-06-19 08:09:45 | INFO | train_inner | epoch 002: 1296 / 3002 loss=2.732, ppl=6.64, wps=5884.3, ups=0.09, wpb=64813, bsz=128, num_updates=4269, lr=9.99738e-05, gnorm=2.236, loss_scale=8, train_wall=11, gb_free=2.8, wall=48659
2021-06-19 08:09:56 | INFO | train_inner | epoch 002: 1297 / 3002 loss=2.752, ppl=6.74, wps=5827.1, ups=0.09, wpb=64785, bsz=128, num_updates=4270, lr=9.99738e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=48670
2021-06-19 08:10:07 | INFO | train_inner | epoch 002: 1298 / 3002 loss=2.728, ppl=6.62, wps=5891.6, ups=0.09, wpb=64801, bsz=128, num_updates=4271, lr=9.99738e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=48681
2021-06-19 08:10:18 | INFO | train_inner | epoch 002: 1299 / 3002 loss=2.81, ppl=7.01, wps=5863.5, ups=0.09, wpb=64792, bsz=128, num_updates=4272, lr=9.99738e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=48692
2021-06-19 08:10:29 | INFO | train_inner | epoch 002: 1300 / 3002 loss=2.898, ppl=7.45, wps=5891.1, ups=0.09, wpb=64775, bsz=128, num_updates=4273, lr=9.99738e-05, gnorm=2.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=48703
2021-06-19 08:10:40 | INFO | train_inner | epoch 002: 1301 / 3002 loss=2.947, ppl=7.71, wps=5879.4, ups=0.09, wpb=64734, bsz=128, num_updates=4274, lr=9.99738e-05, gnorm=2.237, loss_scale=8, train_wall=11, gb_free=2.8, wall=48714
2021-06-19 08:10:51 | INFO | train_inner | epoch 002: 1302 / 3002 loss=2.827, ppl=7.1, wps=5815.1, ups=0.09, wpb=64795, bsz=128, num_updates=4275, lr=9.99738e-05, gnorm=2.283, loss_scale=8, train_wall=11, gb_free=2.8, wall=48726
2021-06-19 08:11:02 | INFO | train_inner | epoch 002: 1303 / 3002 loss=3.009, ppl=8.05, wps=5836.7, ups=0.09, wpb=64870, bsz=128, num_updates=4276, lr=9.99738e-05, gnorm=2.431, loss_scale=8, train_wall=11, gb_free=2.8, wall=48737
2021-06-19 08:11:13 | INFO | train_inner | epoch 002: 1304 / 3002 loss=2.666, ppl=6.35, wps=5915.5, ups=0.09, wpb=64906, bsz=128, num_updates=4277, lr=9.99738e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=48748
2021-06-19 08:11:25 | INFO | train_inner | epoch 002: 1305 / 3002 loss=2.568, ppl=5.93, wps=5758, ups=0.09, wpb=64803, bsz=128, num_updates=4278, lr=9.99738e-05, gnorm=2.13, loss_scale=8, train_wall=11, gb_free=2.8, wall=48759
2021-06-19 08:11:36 | INFO | train_inner | epoch 002: 1306 / 3002 loss=2.893, ppl=7.43, wps=5891.2, ups=0.09, wpb=64840, bsz=128, num_updates=4279, lr=9.99738e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=48770
2021-06-19 08:11:47 | INFO | train_inner | epoch 002: 1307 / 3002 loss=2.667, ppl=6.35, wps=5819.2, ups=0.09, wpb=64902, bsz=128, num_updates=4280, lr=9.99738e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=48781
2021-06-19 08:11:58 | INFO | train_inner | epoch 002: 1308 / 3002 loss=2.771, ppl=6.83, wps=5900.3, ups=0.09, wpb=64871, bsz=128, num_updates=4281, lr=9.99737e-05, gnorm=2.344, loss_scale=8, train_wall=11, gb_free=2.8, wall=48792
2021-06-19 08:12:09 | INFO | train_inner | epoch 002: 1309 / 3002 loss=2.95, ppl=7.73, wps=5805.7, ups=0.09, wpb=64830, bsz=128, num_updates=4282, lr=9.99737e-05, gnorm=2.179, loss_scale=8, train_wall=11, gb_free=2.8, wall=48803
2021-06-19 08:12:20 | INFO | train_inner | epoch 002: 1310 / 3002 loss=2.613, ppl=6.12, wps=5964.7, ups=0.09, wpb=64863, bsz=128, num_updates=4283, lr=9.99737e-05, gnorm=2.157, loss_scale=8, train_wall=10, gb_free=2.8, wall=48814
2021-06-19 08:12:31 | INFO | train_inner | epoch 002: 1311 / 3002 loss=2.725, ppl=6.61, wps=5835.7, ups=0.09, wpb=64790, bsz=128, num_updates=4284, lr=9.99737e-05, gnorm=2.241, loss_scale=8, train_wall=11, gb_free=2.8, wall=48825
2021-06-19 08:12:42 | INFO | train_inner | epoch 002: 1312 / 3002 loss=2.757, ppl=6.76, wps=5831.4, ups=0.09, wpb=64837, bsz=128, num_updates=4285, lr=9.99737e-05, gnorm=2.208, loss_scale=8, train_wall=11, gb_free=2.8, wall=48836
2021-06-19 08:12:53 | INFO | train_inner | epoch 002: 1313 / 3002 loss=2.73, ppl=6.63, wps=5766.6, ups=0.09, wpb=64766, bsz=128, num_updates=4286, lr=9.99737e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=48848
2021-06-19 08:13:04 | INFO | train_inner | epoch 002: 1314 / 3002 loss=2.681, ppl=6.41, wps=5827.4, ups=0.09, wpb=64887, bsz=128, num_updates=4287, lr=9.99737e-05, gnorm=2.208, loss_scale=8, train_wall=11, gb_free=2.8, wall=48859
2021-06-19 08:13:16 | INFO | train_inner | epoch 002: 1315 / 3002 loss=2.675, ppl=6.39, wps=5717.4, ups=0.09, wpb=64819, bsz=128, num_updates=4288, lr=9.99737e-05, gnorm=2.252, loss_scale=8, train_wall=11, gb_free=2.8, wall=48870
2021-06-19 08:13:27 | INFO | train_inner | epoch 002: 1316 / 3002 loss=2.707, ppl=6.53, wps=5817.7, ups=0.09, wpb=64588, bsz=128, num_updates=4289, lr=9.99737e-05, gnorm=2.324, loss_scale=8, train_wall=11, gb_free=2.8, wall=48881
2021-06-19 08:13:38 | INFO | train_inner | epoch 002: 1317 / 3002 loss=2.866, ppl=7.29, wps=5906.3, ups=0.09, wpb=64888, bsz=128, num_updates=4290, lr=9.99737e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=48892
2021-06-19 08:13:49 | INFO | train_inner | epoch 002: 1318 / 3002 loss=2.574, ppl=5.96, wps=5870.1, ups=0.09, wpb=64855, bsz=128, num_updates=4291, lr=9.99737e-05, gnorm=2.193, loss_scale=8, train_wall=11, gb_free=2.8, wall=48903
2021-06-19 08:14:00 | INFO | train_inner | epoch 002: 1319 / 3002 loss=2.808, ppl=7, wps=5909.2, ups=0.09, wpb=64789, bsz=128, num_updates=4292, lr=9.99737e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=48914
2021-06-19 08:14:11 | INFO | train_inner | epoch 002: 1320 / 3002 loss=2.713, ppl=6.56, wps=5788.8, ups=0.09, wpb=64759, bsz=128, num_updates=4293, lr=9.99737e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=48925
2021-06-19 08:14:22 | INFO | train_inner | epoch 002: 1321 / 3002 loss=3.01, ppl=8.05, wps=5868.1, ups=0.09, wpb=64803, bsz=128, num_updates=4294, lr=9.99736e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=48936
2021-06-19 08:14:33 | INFO | train_inner | epoch 002: 1322 / 3002 loss=2.778, ppl=6.86, wps=5888.8, ups=0.09, wpb=64878, bsz=128, num_updates=4295, lr=9.99736e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=48947
2021-06-19 08:14:44 | INFO | train_inner | epoch 002: 1323 / 3002 loss=2.861, ppl=7.26, wps=5906.3, ups=0.09, wpb=64883, bsz=128, num_updates=4296, lr=9.99736e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=48958
2021-06-19 08:14:55 | INFO | train_inner | epoch 002: 1324 / 3002 loss=2.745, ppl=6.71, wps=5846.9, ups=0.09, wpb=64908, bsz=128, num_updates=4297, lr=9.99736e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=48969
2021-06-19 08:15:06 | INFO | train_inner | epoch 002: 1325 / 3002 loss=2.614, ppl=6.12, wps=5810.9, ups=0.09, wpb=64870, bsz=128, num_updates=4298, lr=9.99736e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=48981
2021-06-19 08:15:17 | INFO | train_inner | epoch 002: 1326 / 3002 loss=2.826, ppl=7.09, wps=5980, ups=0.09, wpb=64903, bsz=128, num_updates=4299, lr=9.99736e-05, gnorm=2.641, loss_scale=8, train_wall=10, gb_free=2.8, wall=48991
2021-06-19 08:15:28 | INFO | train_inner | epoch 002: 1327 / 3002 loss=2.692, ppl=6.46, wps=6159.7, ups=0.1, wpb=64741, bsz=128, num_updates=4300, lr=9.99736e-05, gnorm=2.237, loss_scale=8, train_wall=10, gb_free=2.8, wall=49002
2021-06-19 08:15:39 | INFO | train_inner | epoch 002: 1328 / 3002 loss=2.67, ppl=6.37, wps=5934.3, ups=0.09, wpb=64861, bsz=128, num_updates=4301, lr=9.99736e-05, gnorm=2.208, loss_scale=8, train_wall=10, gb_free=2.8, wall=49013
2021-06-19 08:15:50 | INFO | train_inner | epoch 002: 1329 / 3002 loss=2.756, ppl=6.75, wps=5764.9, ups=0.09, wpb=64768, bsz=128, num_updates=4302, lr=9.99736e-05, gnorm=2.254, loss_scale=8, train_wall=11, gb_free=2.8, wall=49024
2021-06-19 08:16:01 | INFO | train_inner | epoch 002: 1330 / 3002 loss=2.658, ppl=6.31, wps=5860.6, ups=0.09, wpb=64830, bsz=128, num_updates=4303, lr=9.99736e-05, gnorm=2.312, loss_scale=8, train_wall=11, gb_free=2.8, wall=49035
2021-06-19 08:16:12 | INFO | train_inner | epoch 002: 1331 / 3002 loss=2.759, ppl=6.77, wps=5783, ups=0.09, wpb=64883, bsz=128, num_updates=4304, lr=9.99736e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=49046
2021-06-19 08:16:23 | INFO | train_inner | epoch 002: 1332 / 3002 loss=2.689, ppl=6.45, wps=5873.7, ups=0.09, wpb=64800, bsz=128, num_updates=4305, lr=9.99736e-05, gnorm=2.303, loss_scale=8, train_wall=11, gb_free=2.8, wall=49057
2021-06-19 08:16:34 | INFO | train_inner | epoch 002: 1333 / 3002 loss=2.7, ppl=6.5, wps=5855.7, ups=0.09, wpb=64869, bsz=128, num_updates=4306, lr=9.99735e-05, gnorm=2.218, loss_scale=8, train_wall=11, gb_free=2.8, wall=49069
2021-06-19 08:16:45 | INFO | train_inner | epoch 002: 1334 / 3002 loss=2.695, ppl=6.48, wps=5813.4, ups=0.09, wpb=64852, bsz=128, num_updates=4307, lr=9.99735e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=49080
2021-06-19 08:16:56 | INFO | train_inner | epoch 002: 1335 / 3002 loss=2.908, ppl=7.5, wps=5805.2, ups=0.09, wpb=64773, bsz=128, num_updates=4308, lr=9.99735e-05, gnorm=2.333, loss_scale=8, train_wall=11, gb_free=2.8, wall=49091
2021-06-19 08:17:08 | INFO | train_inner | epoch 002: 1336 / 3002 loss=2.633, ppl=6.2, wps=5802.3, ups=0.09, wpb=64826, bsz=128, num_updates=4309, lr=9.99735e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=49102
2021-06-19 08:17:19 | INFO | train_inner | epoch 002: 1337 / 3002 loss=2.685, ppl=6.43, wps=5807.7, ups=0.09, wpb=64778, bsz=128, num_updates=4310, lr=9.99735e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=49113
2021-06-19 08:17:30 | INFO | train_inner | epoch 002: 1338 / 3002 loss=2.695, ppl=6.48, wps=5775.8, ups=0.09, wpb=64873, bsz=128, num_updates=4311, lr=9.99735e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=49124
2021-06-19 08:17:41 | INFO | train_inner | epoch 002: 1339 / 3002 loss=2.711, ppl=6.55, wps=5892.6, ups=0.09, wpb=64864, bsz=128, num_updates=4312, lr=9.99735e-05, gnorm=2.301, loss_scale=8, train_wall=11, gb_free=2.8, wall=49135
2021-06-19 08:17:52 | INFO | train_inner | epoch 002: 1340 / 3002 loss=2.697, ppl=6.49, wps=5792.6, ups=0.09, wpb=64845, bsz=128, num_updates=4313, lr=9.99735e-05, gnorm=2.237, loss_scale=8, train_wall=11, gb_free=2.8, wall=49147
2021-06-19 08:18:04 | INFO | train_inner | epoch 002: 1341 / 3002 loss=2.797, ppl=6.95, wps=5744.2, ups=0.09, wpb=64899, bsz=128, num_updates=4314, lr=9.99735e-05, gnorm=2.309, loss_scale=8, train_wall=11, gb_free=2.8, wall=49158
2021-06-19 08:18:15 | INFO | train_inner | epoch 002: 1342 / 3002 loss=2.714, ppl=6.56, wps=5842, ups=0.09, wpb=64931, bsz=128, num_updates=4315, lr=9.99735e-05, gnorm=2.376, loss_scale=8, train_wall=11, gb_free=2.8, wall=49169
2021-06-19 08:18:26 | INFO | train_inner | epoch 002: 1343 / 3002 loss=2.64, ppl=6.23, wps=5892.7, ups=0.09, wpb=64846, bsz=128, num_updates=4316, lr=9.99735e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=49180
2021-06-19 08:18:37 | INFO | train_inner | epoch 002: 1344 / 3002 loss=2.611, ppl=6.11, wps=5772.5, ups=0.09, wpb=64839, bsz=128, num_updates=4317, lr=9.99735e-05, gnorm=2.435, loss_scale=8, train_wall=11, gb_free=2.8, wall=49191
2021-06-19 08:18:48 | INFO | train_inner | epoch 002: 1345 / 3002 loss=2.68, ppl=6.41, wps=5844.7, ups=0.09, wpb=64848, bsz=128, num_updates=4318, lr=9.99735e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=49202
2021-06-19 08:18:59 | INFO | train_inner | epoch 002: 1346 / 3002 loss=2.676, ppl=6.39, wps=5881.7, ups=0.09, wpb=64888, bsz=128, num_updates=4319, lr=9.99734e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=49213
2021-06-19 08:19:10 | INFO | train_inner | epoch 002: 1347 / 3002 loss=2.776, ppl=6.85, wps=5848.8, ups=0.09, wpb=64779, bsz=128, num_updates=4320, lr=9.99734e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=49224
2021-06-19 08:19:21 | INFO | train_inner | epoch 002: 1348 / 3002 loss=2.793, ppl=6.93, wps=5802.8, ups=0.09, wpb=64826, bsz=128, num_updates=4321, lr=9.99734e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=49236
2021-06-19 08:19:33 | INFO | train_inner | epoch 002: 1349 / 3002 loss=2.756, ppl=6.76, wps=5754.5, ups=0.09, wpb=64802, bsz=128, num_updates=4322, lr=9.99734e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=49247
2021-06-19 08:19:44 | INFO | train_inner | epoch 002: 1350 / 3002 loss=2.863, ppl=7.27, wps=5829.5, ups=0.09, wpb=64770, bsz=128, num_updates=4323, lr=9.99734e-05, gnorm=2.318, loss_scale=8, train_wall=11, gb_free=2.8, wall=49258
2021-06-19 08:19:55 | INFO | train_inner | epoch 002: 1351 / 3002 loss=2.804, ppl=6.98, wps=5826.8, ups=0.09, wpb=64769, bsz=128, num_updates=4324, lr=9.99734e-05, gnorm=2.163, loss_scale=8, train_wall=11, gb_free=2.8, wall=49269
2021-06-19 08:20:06 | INFO | train_inner | epoch 002: 1352 / 3002 loss=2.712, ppl=6.55, wps=5811.2, ups=0.09, wpb=64835, bsz=128, num_updates=4325, lr=9.99734e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=49280
2021-06-19 08:20:17 | INFO | train_inner | epoch 002: 1353 / 3002 loss=2.803, ppl=6.98, wps=5779.8, ups=0.09, wpb=64885, bsz=128, num_updates=4326, lr=9.99734e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=49291
2021-06-19 08:20:28 | INFO | train_inner | epoch 002: 1354 / 3002 loss=2.67, ppl=6.36, wps=5846.7, ups=0.09, wpb=64692, bsz=128, num_updates=4327, lr=9.99734e-05, gnorm=2.298, loss_scale=8, train_wall=11, gb_free=2.8, wall=49303
2021-06-19 08:20:39 | INFO | train_inner | epoch 002: 1355 / 3002 loss=2.859, ppl=7.25, wps=5822.8, ups=0.09, wpb=64879, bsz=128, num_updates=4328, lr=9.99734e-05, gnorm=2.39, loss_scale=8, train_wall=11, gb_free=2.8, wall=49314
2021-06-19 08:20:50 | INFO | train_inner | epoch 002: 1356 / 3002 loss=2.788, ppl=6.9, wps=5903, ups=0.09, wpb=64847, bsz=128, num_updates=4329, lr=9.99734e-05, gnorm=2.324, loss_scale=8, train_wall=11, gb_free=2.8, wall=49325
2021-06-19 08:21:01 | INFO | train_inner | epoch 002: 1357 / 3002 loss=2.838, ppl=7.15, wps=5825.3, ups=0.09, wpb=64789, bsz=128, num_updates=4330, lr=9.99734e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=49336
2021-06-19 08:21:12 | INFO | train_inner | epoch 002: 1358 / 3002 loss=2.861, ppl=7.27, wps=5889.8, ups=0.09, wpb=64839, bsz=128, num_updates=4331, lr=9.99733e-05, gnorm=2.233, loss_scale=8, train_wall=11, gb_free=2.8, wall=49347
2021-06-19 08:21:24 | INFO | train_inner | epoch 002: 1359 / 3002 loss=2.766, ppl=6.8, wps=5774.1, ups=0.09, wpb=64695, bsz=128, num_updates=4332, lr=9.99733e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=49358
2021-06-19 08:21:35 | INFO | train_inner | epoch 002: 1360 / 3002 loss=2.751, ppl=6.73, wps=5809.9, ups=0.09, wpb=64814, bsz=128, num_updates=4333, lr=9.99733e-05, gnorm=2.163, loss_scale=8, train_wall=11, gb_free=2.8, wall=49369
2021-06-19 08:21:46 | INFO | train_inner | epoch 002: 1361 / 3002 loss=2.694, ppl=6.47, wps=5741.1, ups=0.09, wpb=64808, bsz=128, num_updates=4334, lr=9.99733e-05, gnorm=2.17, loss_scale=8, train_wall=11, gb_free=2.8, wall=49380
2021-06-19 08:21:57 | INFO | train_inner | epoch 002: 1362 / 3002 loss=2.812, ppl=7.02, wps=5857.3, ups=0.09, wpb=64772, bsz=128, num_updates=4335, lr=9.99733e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=49392
2021-06-19 08:22:08 | INFO | train_inner | epoch 002: 1363 / 3002 loss=2.791, ppl=6.92, wps=5849, ups=0.09, wpb=64770, bsz=128, num_updates=4336, lr=9.99733e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=49403
2021-06-19 08:22:19 | INFO | train_inner | epoch 002: 1364 / 3002 loss=2.809, ppl=7.01, wps=5770.8, ups=0.09, wpb=64737, bsz=128, num_updates=4337, lr=9.99733e-05, gnorm=2.379, loss_scale=8, train_wall=11, gb_free=2.8, wall=49414
2021-06-19 08:22:31 | INFO | train_inner | epoch 002: 1365 / 3002 loss=2.698, ppl=6.49, wps=5843.3, ups=0.09, wpb=64847, bsz=128, num_updates=4338, lr=9.99733e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=49425
2021-06-19 08:22:42 | INFO | train_inner | epoch 002: 1366 / 3002 loss=2.847, ppl=7.2, wps=5793.5, ups=0.09, wpb=64810, bsz=128, num_updates=4339, lr=9.99733e-05, gnorm=2.303, loss_scale=8, train_wall=11, gb_free=2.8, wall=49436
2021-06-19 08:22:53 | INFO | train_inner | epoch 002: 1367 / 3002 loss=2.717, ppl=6.58, wps=5790.1, ups=0.09, wpb=64717, bsz=128, num_updates=4340, lr=9.99733e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=49447
2021-06-19 08:23:04 | INFO | train_inner | epoch 002: 1368 / 3002 loss=2.801, ppl=6.97, wps=5860.4, ups=0.09, wpb=64843, bsz=128, num_updates=4341, lr=9.99733e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=49458
2021-06-19 08:23:15 | INFO | train_inner | epoch 002: 1369 / 3002 loss=2.747, ppl=6.71, wps=5987.5, ups=0.09, wpb=64829, bsz=128, num_updates=4342, lr=9.99733e-05, gnorm=2.175, loss_scale=8, train_wall=10, gb_free=2.8, wall=49469
2021-06-19 08:23:26 | INFO | train_inner | epoch 002: 1370 / 3002 loss=2.646, ppl=6.26, wps=5787.8, ups=0.09, wpb=64831, bsz=128, num_updates=4343, lr=9.99733e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=49480
2021-06-19 08:23:37 | INFO | train_inner | epoch 002: 1371 / 3002 loss=2.772, ppl=6.83, wps=5864.9, ups=0.09, wpb=64834, bsz=128, num_updates=4344, lr=9.99732e-05, gnorm=2.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=49491
2021-06-19 08:23:48 | INFO | train_inner | epoch 002: 1372 / 3002 loss=2.797, ppl=6.95, wps=5888.3, ups=0.09, wpb=64893, bsz=128, num_updates=4345, lr=9.99732e-05, gnorm=2.267, loss_scale=8, train_wall=11, gb_free=2.8, wall=49502
2021-06-19 08:23:59 | INFO | train_inner | epoch 002: 1373 / 3002 loss=2.754, ppl=6.75, wps=5855.3, ups=0.09, wpb=64869, bsz=128, num_updates=4346, lr=9.99732e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=49514
2021-06-19 08:24:10 | INFO | train_inner | epoch 002: 1374 / 3002 loss=2.525, ppl=5.76, wps=5875.7, ups=0.09, wpb=64928, bsz=128, num_updates=4347, lr=9.99732e-05, gnorm=2.187, loss_scale=8, train_wall=11, gb_free=2.8, wall=49525
2021-06-19 08:24:21 | INFO | train_inner | epoch 002: 1375 / 3002 loss=2.785, ppl=6.89, wps=5885.6, ups=0.09, wpb=64822, bsz=128, num_updates=4348, lr=9.99732e-05, gnorm=2.404, loss_scale=8, train_wall=11, gb_free=2.8, wall=49536
2021-06-19 08:24:32 | INFO | train_inner | epoch 002: 1376 / 3002 loss=2.788, ppl=6.91, wps=5881.8, ups=0.09, wpb=64794, bsz=128, num_updates=4349, lr=9.99732e-05, gnorm=2.336, loss_scale=8, train_wall=11, gb_free=2.8, wall=49547
2021-06-19 08:24:43 | INFO | train_inner | epoch 002: 1377 / 3002 loss=2.6, ppl=6.06, wps=5837.9, ups=0.09, wpb=64863, bsz=128, num_updates=4350, lr=9.99732e-05, gnorm=2.422, loss_scale=8, train_wall=11, gb_free=2.8, wall=49558
2021-06-19 08:24:54 | INFO | train_inner | epoch 002: 1378 / 3002 loss=2.642, ppl=6.24, wps=5897.6, ups=0.09, wpb=64886, bsz=128, num_updates=4351, lr=9.99732e-05, gnorm=2.363, loss_scale=8, train_wall=11, gb_free=2.8, wall=49569
2021-06-19 08:25:06 | INFO | train_inner | epoch 002: 1379 / 3002 loss=2.566, ppl=5.92, wps=5760.1, ups=0.09, wpb=64886, bsz=128, num_updates=4352, lr=9.99732e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=49580
2021-06-19 08:25:17 | INFO | train_inner | epoch 002: 1380 / 3002 loss=2.648, ppl=6.27, wps=5839.1, ups=0.09, wpb=64863, bsz=128, num_updates=4353, lr=9.99732e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=49591
2021-06-19 08:25:28 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 08:25:39 | INFO | train_inner | epoch 002: 1382 / 3002 loss=2.75, ppl=6.73, wps=2898.4, ups=0.04, wpb=64879, bsz=128, num_updates=4354, lr=9.99732e-05, gnorm=2.28, loss_scale=4, train_wall=21, gb_free=2.8, wall=49613
2021-06-19 08:25:50 | INFO | train_inner | epoch 002: 1383 / 3002 loss=2.854, ppl=7.23, wps=5817.6, ups=0.09, wpb=64770, bsz=128, num_updates=4355, lr=9.99732e-05, gnorm=2.322, loss_scale=4, train_wall=11, gb_free=2.8, wall=49625
2021-06-19 08:26:01 | INFO | train_inner | epoch 002: 1384 / 3002 loss=2.728, ppl=6.62, wps=5805.8, ups=0.09, wpb=64798, bsz=128, num_updates=4356, lr=9.99731e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=49636
2021-06-19 08:26:13 | INFO | train_inner | epoch 002: 1385 / 3002 loss=2.811, ppl=7.02, wps=5836.9, ups=0.09, wpb=64802, bsz=128, num_updates=4357, lr=9.99731e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=49647
2021-06-19 08:26:24 | INFO | train_inner | epoch 002: 1386 / 3002 loss=2.706, ppl=6.53, wps=5893, ups=0.09, wpb=64841, bsz=128, num_updates=4358, lr=9.99731e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=49658
2021-06-19 08:26:35 | INFO | train_inner | epoch 002: 1387 / 3002 loss=2.771, ppl=6.83, wps=5772.7, ups=0.09, wpb=64835, bsz=128, num_updates=4359, lr=9.99731e-05, gnorm=2.308, loss_scale=4, train_wall=11, gb_free=2.8, wall=49669
2021-06-19 08:26:46 | INFO | train_inner | epoch 002: 1388 / 3002 loss=2.702, ppl=6.51, wps=5889.4, ups=0.09, wpb=64861, bsz=128, num_updates=4360, lr=9.99731e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=49680
2021-06-19 08:26:57 | INFO | train_inner | epoch 002: 1389 / 3002 loss=2.696, ppl=6.48, wps=5914.8, ups=0.09, wpb=64853, bsz=128, num_updates=4361, lr=9.99731e-05, gnorm=2.281, loss_scale=4, train_wall=11, gb_free=2.8, wall=49691
2021-06-19 08:27:08 | INFO | train_inner | epoch 002: 1390 / 3002 loss=2.625, ppl=6.17, wps=5766.9, ups=0.09, wpb=64840, bsz=128, num_updates=4362, lr=9.99731e-05, gnorm=2.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=49702
2021-06-19 08:27:19 | INFO | train_inner | epoch 002: 1391 / 3002 loss=2.782, ppl=6.88, wps=5779.3, ups=0.09, wpb=64868, bsz=128, num_updates=4363, lr=9.99731e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=49714
2021-06-19 08:27:30 | INFO | train_inner | epoch 002: 1392 / 3002 loss=2.9, ppl=7.46, wps=5792.8, ups=0.09, wpb=64772, bsz=128, num_updates=4364, lr=9.99731e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=49725
2021-06-19 08:27:41 | INFO | train_inner | epoch 002: 1393 / 3002 loss=2.789, ppl=6.91, wps=5924.6, ups=0.09, wpb=64885, bsz=128, num_updates=4365, lr=9.99731e-05, gnorm=2.305, loss_scale=4, train_wall=10, gb_free=2.8, wall=49736
2021-06-19 08:27:53 | INFO | train_inner | epoch 002: 1394 / 3002 loss=2.629, ppl=6.19, wps=5725.3, ups=0.09, wpb=64804, bsz=128, num_updates=4366, lr=9.99731e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=49747
2021-06-19 08:28:04 | INFO | train_inner | epoch 002: 1395 / 3002 loss=2.763, ppl=6.79, wps=5759.2, ups=0.09, wpb=64811, bsz=128, num_updates=4367, lr=9.99731e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=49758
2021-06-19 08:28:15 | INFO | train_inner | epoch 002: 1396 / 3002 loss=3.079, ppl=8.45, wps=5879.3, ups=0.09, wpb=64805, bsz=128, num_updates=4368, lr=9.99731e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=49769
2021-06-19 08:28:26 | INFO | train_inner | epoch 002: 1397 / 3002 loss=2.769, ppl=6.82, wps=5832.5, ups=0.09, wpb=64856, bsz=128, num_updates=4369, lr=9.9973e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=49780
2021-06-19 08:28:37 | INFO | train_inner | epoch 002: 1398 / 3002 loss=2.677, ppl=6.4, wps=5797, ups=0.09, wpb=64832, bsz=128, num_updates=4370, lr=9.9973e-05, gnorm=2.119, loss_scale=4, train_wall=11, gb_free=2.8, wall=49792
2021-06-19 08:28:48 | INFO | train_inner | epoch 002: 1399 / 3002 loss=2.623, ppl=6.16, wps=5837.2, ups=0.09, wpb=64858, bsz=128, num_updates=4371, lr=9.9973e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=49803
2021-06-19 08:29:00 | INFO | train_inner | epoch 002: 1400 / 3002 loss=2.861, ppl=7.27, wps=5771.9, ups=0.09, wpb=64798, bsz=128, num_updates=4372, lr=9.9973e-05, gnorm=2.352, loss_scale=4, train_wall=11, gb_free=2.8, wall=49814
2021-06-19 08:29:11 | INFO | train_inner | epoch 002: 1401 / 3002 loss=2.688, ppl=6.44, wps=5881.9, ups=0.09, wpb=64811, bsz=128, num_updates=4373, lr=9.9973e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=49825
2021-06-19 08:29:22 | INFO | train_inner | epoch 002: 1402 / 3002 loss=2.677, ppl=6.4, wps=5840.5, ups=0.09, wpb=64862, bsz=128, num_updates=4374, lr=9.9973e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=49836
2021-06-19 08:29:33 | INFO | train_inner | epoch 002: 1403 / 3002 loss=2.742, ppl=6.69, wps=5899.8, ups=0.09, wpb=64869, bsz=128, num_updates=4375, lr=9.9973e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=49847
2021-06-19 08:29:44 | INFO | train_inner | epoch 002: 1404 / 3002 loss=2.681, ppl=6.41, wps=5886.6, ups=0.09, wpb=64886, bsz=128, num_updates=4376, lr=9.9973e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=49858
2021-06-19 08:29:55 | INFO | train_inner | epoch 002: 1405 / 3002 loss=2.76, ppl=6.77, wps=5843, ups=0.09, wpb=64838, bsz=128, num_updates=4377, lr=9.9973e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=49869
2021-06-19 08:30:06 | INFO | train_inner | epoch 002: 1406 / 3002 loss=2.695, ppl=6.47, wps=5884, ups=0.09, wpb=64850, bsz=128, num_updates=4378, lr=9.9973e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=49880
2021-06-19 08:30:17 | INFO | train_inner | epoch 002: 1407 / 3002 loss=2.807, ppl=7, wps=5810.4, ups=0.09, wpb=64859, bsz=128, num_updates=4379, lr=9.9973e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=49891
2021-06-19 08:30:28 | INFO | train_inner | epoch 002: 1408 / 3002 loss=2.712, ppl=6.55, wps=5880.8, ups=0.09, wpb=64796, bsz=128, num_updates=4380, lr=9.9973e-05, gnorm=2.479, loss_scale=4, train_wall=11, gb_free=2.8, wall=49902
2021-06-19 08:30:39 | INFO | train_inner | epoch 002: 1409 / 3002 loss=2.755, ppl=6.75, wps=5883.4, ups=0.09, wpb=64798, bsz=128, num_updates=4381, lr=9.99729e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=49913
2021-06-19 08:30:50 | INFO | train_inner | epoch 002: 1410 / 3002 loss=2.525, ppl=5.75, wps=5972.5, ups=0.09, wpb=64890, bsz=128, num_updates=4382, lr=9.99729e-05, gnorm=2.243, loss_scale=4, train_wall=10, gb_free=2.8, wall=49924
2021-06-19 08:31:01 | INFO | train_inner | epoch 002: 1411 / 3002 loss=2.81, ppl=7.01, wps=5991.3, ups=0.09, wpb=64905, bsz=128, num_updates=4383, lr=9.99729e-05, gnorm=2.244, loss_scale=4, train_wall=10, gb_free=2.8, wall=49935
2021-06-19 08:31:12 | INFO | train_inner | epoch 002: 1412 / 3002 loss=2.716, ppl=6.57, wps=5781.3, ups=0.09, wpb=64800, bsz=128, num_updates=4384, lr=9.99729e-05, gnorm=2.343, loss_scale=4, train_wall=11, gb_free=2.8, wall=49946
2021-06-19 08:31:23 | INFO | train_inner | epoch 002: 1413 / 3002 loss=2.702, ppl=6.51, wps=5849, ups=0.09, wpb=64914, bsz=128, num_updates=4385, lr=9.99729e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=49957
2021-06-19 08:31:34 | INFO | train_inner | epoch 002: 1414 / 3002 loss=2.822, ppl=7.07, wps=5814.6, ups=0.09, wpb=64842, bsz=128, num_updates=4386, lr=9.99729e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=49969
2021-06-19 08:31:45 | INFO | train_inner | epoch 002: 1415 / 3002 loss=2.652, ppl=6.28, wps=5838.5, ups=0.09, wpb=64745, bsz=128, num_updates=4387, lr=9.99729e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=49980
2021-06-19 08:31:56 | INFO | train_inner | epoch 002: 1416 / 3002 loss=2.739, ppl=6.68, wps=5890.5, ups=0.09, wpb=64826, bsz=128, num_updates=4388, lr=9.99729e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=49991
2021-06-19 08:32:08 | INFO | train_inner | epoch 002: 1417 / 3002 loss=2.767, ppl=6.81, wps=5775.4, ups=0.09, wpb=64836, bsz=128, num_updates=4389, lr=9.99729e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=50002
2021-06-19 08:32:18 | INFO | train_inner | epoch 002: 1418 / 3002 loss=2.73, ppl=6.64, wps=6041.3, ups=0.09, wpb=64787, bsz=128, num_updates=4390, lr=9.99729e-05, gnorm=2.202, loss_scale=4, train_wall=10, gb_free=2.8, wall=50013
2021-06-19 08:32:29 | INFO | train_inner | epoch 002: 1419 / 3002 loss=2.818, ppl=7.05, wps=5843.3, ups=0.09, wpb=64747, bsz=128, num_updates=4391, lr=9.99729e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=50024
2021-06-19 08:32:41 | INFO | train_inner | epoch 002: 1420 / 3002 loss=2.73, ppl=6.64, wps=5719.8, ups=0.09, wpb=64754, bsz=128, num_updates=4392, lr=9.99729e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=50035
2021-06-19 08:32:52 | INFO | train_inner | epoch 002: 1421 / 3002 loss=2.795, ppl=6.94, wps=5897.2, ups=0.09, wpb=64866, bsz=128, num_updates=4393, lr=9.99729e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=50046
2021-06-19 08:33:03 | INFO | train_inner | epoch 002: 1422 / 3002 loss=2.572, ppl=5.94, wps=5945.5, ups=0.09, wpb=64851, bsz=128, num_updates=4394, lr=9.99728e-05, gnorm=2.188, loss_scale=4, train_wall=10, gb_free=2.8, wall=50057
2021-06-19 08:33:14 | INFO | train_inner | epoch 002: 1423 / 3002 loss=2.77, ppl=6.82, wps=5862.9, ups=0.09, wpb=64874, bsz=128, num_updates=4395, lr=9.99728e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=50068
2021-06-19 08:33:25 | INFO | train_inner | epoch 002: 1424 / 3002 loss=2.729, ppl=6.63, wps=5793.3, ups=0.09, wpb=64726, bsz=128, num_updates=4396, lr=9.99728e-05, gnorm=2.361, loss_scale=4, train_wall=11, gb_free=2.8, wall=50079
2021-06-19 08:33:36 | INFO | train_inner | epoch 002: 1425 / 3002 loss=2.883, ppl=7.38, wps=5897.9, ups=0.09, wpb=64891, bsz=128, num_updates=4397, lr=9.99728e-05, gnorm=2.341, loss_scale=4, train_wall=11, gb_free=2.8, wall=50090
2021-06-19 08:33:47 | INFO | train_inner | epoch 002: 1426 / 3002 loss=2.681, ppl=6.41, wps=5838, ups=0.09, wpb=64861, bsz=128, num_updates=4398, lr=9.99728e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=50101
2021-06-19 08:33:58 | INFO | train_inner | epoch 002: 1427 / 3002 loss=2.601, ppl=6.07, wps=5823.9, ups=0.09, wpb=64799, bsz=128, num_updates=4399, lr=9.99728e-05, gnorm=2.161, loss_scale=4, train_wall=11, gb_free=2.8, wall=50112
2021-06-19 08:34:09 | INFO | train_inner | epoch 002: 1428 / 3002 loss=2.685, ppl=6.43, wps=5758.5, ups=0.09, wpb=64761, bsz=128, num_updates=4400, lr=9.99728e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=50124
2021-06-19 08:34:20 | INFO | train_inner | epoch 002: 1429 / 3002 loss=2.783, ppl=6.88, wps=5885.7, ups=0.09, wpb=64861, bsz=128, num_updates=4401, lr=9.99728e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=50135
2021-06-19 08:34:32 | INFO | train_inner | epoch 002: 1430 / 3002 loss=2.654, ppl=6.3, wps=5698.8, ups=0.09, wpb=64817, bsz=128, num_updates=4402, lr=9.99728e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=50146
2021-06-19 08:34:43 | INFO | train_inner | epoch 002: 1431 / 3002 loss=2.764, ppl=6.79, wps=5860.7, ups=0.09, wpb=64778, bsz=128, num_updates=4403, lr=9.99728e-05, gnorm=2.123, loss_scale=4, train_wall=11, gb_free=2.8, wall=50157
2021-06-19 08:34:54 | INFO | train_inner | epoch 002: 1432 / 3002 loss=2.64, ppl=6.23, wps=5851.1, ups=0.09, wpb=64842, bsz=128, num_updates=4404, lr=9.99728e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=50168
2021-06-19 08:35:05 | INFO | train_inner | epoch 002: 1433 / 3002 loss=2.593, ppl=6.03, wps=5957.6, ups=0.09, wpb=64763, bsz=128, num_updates=4405, lr=9.99728e-05, gnorm=2.156, loss_scale=4, train_wall=10, gb_free=2.8, wall=50179
2021-06-19 08:35:16 | INFO | train_inner | epoch 002: 1434 / 3002 loss=2.574, ppl=5.96, wps=5911.4, ups=0.09, wpb=64865, bsz=128, num_updates=4406, lr=9.99727e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=50190
2021-06-19 08:35:27 | INFO | train_inner | epoch 002: 1435 / 3002 loss=3.003, ppl=8.02, wps=5884.6, ups=0.09, wpb=64845, bsz=128, num_updates=4407, lr=9.99727e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=50201
2021-06-19 08:35:38 | INFO | train_inner | epoch 002: 1436 / 3002 loss=2.858, ppl=7.25, wps=5786.1, ups=0.09, wpb=64832, bsz=128, num_updates=4408, lr=9.99727e-05, gnorm=2.376, loss_scale=4, train_wall=11, gb_free=2.8, wall=50212
2021-06-19 08:35:49 | INFO | train_inner | epoch 002: 1437 / 3002 loss=2.679, ppl=6.4, wps=5898.2, ups=0.09, wpb=64905, bsz=128, num_updates=4409, lr=9.99727e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=50223
2021-06-19 08:36:00 | INFO | train_inner | epoch 002: 1438 / 3002 loss=2.632, ppl=6.2, wps=5863.7, ups=0.09, wpb=64805, bsz=128, num_updates=4410, lr=9.99727e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=50234
2021-06-19 08:36:11 | INFO | train_inner | epoch 002: 1439 / 3002 loss=2.749, ppl=6.72, wps=5897.5, ups=0.09, wpb=64810, bsz=128, num_updates=4411, lr=9.99727e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=50245
2021-06-19 08:36:22 | INFO | train_inner | epoch 002: 1440 / 3002 loss=2.793, ppl=6.93, wps=5724.6, ups=0.09, wpb=64788, bsz=128, num_updates=4412, lr=9.99727e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=50257
2021-06-19 08:36:33 | INFO | train_inner | epoch 002: 1441 / 3002 loss=2.754, ppl=6.75, wps=5851.5, ups=0.09, wpb=64812, bsz=128, num_updates=4413, lr=9.99727e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=50268
2021-06-19 08:36:44 | INFO | train_inner | epoch 002: 1442 / 3002 loss=2.714, ppl=6.56, wps=5879.7, ups=0.09, wpb=64834, bsz=128, num_updates=4414, lr=9.99727e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=50279
2021-06-19 08:36:56 | INFO | train_inner | epoch 002: 1443 / 3002 loss=2.769, ppl=6.82, wps=5799.1, ups=0.09, wpb=64819, bsz=128, num_updates=4415, lr=9.99727e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=50290
2021-06-19 08:37:07 | INFO | train_inner | epoch 002: 1444 / 3002 loss=2.695, ppl=6.47, wps=5760, ups=0.09, wpb=64826, bsz=128, num_updates=4416, lr=9.99727e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=50301
2021-06-19 08:37:18 | INFO | train_inner | epoch 002: 1445 / 3002 loss=2.825, ppl=7.08, wps=5913.1, ups=0.09, wpb=64817, bsz=128, num_updates=4417, lr=9.99727e-05, gnorm=2.392, loss_scale=4, train_wall=11, gb_free=2.8, wall=50312
2021-06-19 08:37:29 | INFO | train_inner | epoch 002: 1446 / 3002 loss=2.788, ppl=6.91, wps=5917.5, ups=0.09, wpb=64808, bsz=128, num_updates=4418, lr=9.99727e-05, gnorm=2.371, loss_scale=4, train_wall=11, gb_free=2.8, wall=50323
2021-06-19 08:37:40 | INFO | train_inner | epoch 002: 1447 / 3002 loss=2.522, ppl=5.74, wps=5903.1, ups=0.09, wpb=64901, bsz=128, num_updates=4419, lr=9.99726e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=50334
2021-06-19 08:37:51 | INFO | train_inner | epoch 002: 1448 / 3002 loss=2.848, ppl=7.2, wps=5913.3, ups=0.09, wpb=64852, bsz=128, num_updates=4420, lr=9.99726e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=50345
2021-06-19 08:38:02 | INFO | train_inner | epoch 002: 1449 / 3002 loss=2.727, ppl=6.62, wps=5803.3, ups=0.09, wpb=64788, bsz=128, num_updates=4421, lr=9.99726e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=50356
2021-06-19 08:38:13 | INFO | train_inner | epoch 002: 1450 / 3002 loss=2.718, ppl=6.58, wps=5859.1, ups=0.09, wpb=64862, bsz=128, num_updates=4422, lr=9.99726e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=50367
2021-06-19 08:38:24 | INFO | train_inner | epoch 002: 1451 / 3002 loss=2.599, ppl=6.06, wps=6001.8, ups=0.09, wpb=64808, bsz=128, num_updates=4423, lr=9.99726e-05, gnorm=2.291, loss_scale=4, train_wall=10, gb_free=2.8, wall=50378
2021-06-19 08:38:35 | INFO | train_inner | epoch 002: 1452 / 3002 loss=2.777, ppl=6.86, wps=5771.8, ups=0.09, wpb=64893, bsz=128, num_updates=4424, lr=9.99726e-05, gnorm=2.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=50389
2021-06-19 08:38:46 | INFO | train_inner | epoch 002: 1453 / 3002 loss=2.968, ppl=7.82, wps=5753.2, ups=0.09, wpb=64823, bsz=128, num_updates=4425, lr=9.99726e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=50401
2021-06-19 08:38:57 | INFO | train_inner | epoch 002: 1454 / 3002 loss=2.739, ppl=6.68, wps=5882, ups=0.09, wpb=64829, bsz=128, num_updates=4426, lr=9.99726e-05, gnorm=2.292, loss_scale=4, train_wall=11, gb_free=2.8, wall=50412
2021-06-19 08:39:08 | INFO | train_inner | epoch 002: 1455 / 3002 loss=2.914, ppl=7.54, wps=5862.9, ups=0.09, wpb=64842, bsz=128, num_updates=4427, lr=9.99726e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=50423
2021-06-19 08:39:19 | INFO | train_inner | epoch 002: 1456 / 3002 loss=2.777, ppl=6.86, wps=5785.8, ups=0.09, wpb=64722, bsz=128, num_updates=4428, lr=9.99726e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=50434
2021-06-19 08:39:31 | INFO | train_inner | epoch 002: 1457 / 3002 loss=2.796, ppl=6.95, wps=5820, ups=0.09, wpb=64882, bsz=128, num_updates=4429, lr=9.99726e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=50445
2021-06-19 08:39:42 | INFO | train_inner | epoch 002: 1458 / 3002 loss=2.588, ppl=6.01, wps=5788.5, ups=0.09, wpb=64827, bsz=128, num_updates=4430, lr=9.99726e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=50456
2021-06-19 08:39:53 | INFO | train_inner | epoch 002: 1459 / 3002 loss=2.519, ppl=5.73, wps=5912.4, ups=0.09, wpb=64873, bsz=128, num_updates=4431, lr=9.99725e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=50467
2021-06-19 08:40:04 | INFO | train_inner | epoch 002: 1460 / 3002 loss=2.746, ppl=6.71, wps=5833.4, ups=0.09, wpb=64825, bsz=128, num_updates=4432, lr=9.99725e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=50478
2021-06-19 08:40:15 | INFO | train_inner | epoch 002: 1461 / 3002 loss=2.797, ppl=6.95, wps=5862.3, ups=0.09, wpb=64796, bsz=128, num_updates=4433, lr=9.99725e-05, gnorm=2.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=50489
2021-06-19 08:40:26 | INFO | train_inner | epoch 002: 1462 / 3002 loss=2.794, ppl=6.94, wps=5882.3, ups=0.09, wpb=64859, bsz=128, num_updates=4434, lr=9.99725e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=50500
2021-06-19 08:40:37 | INFO | train_inner | epoch 002: 1463 / 3002 loss=2.874, ppl=7.33, wps=5863.3, ups=0.09, wpb=64826, bsz=128, num_updates=4435, lr=9.99725e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=50511
2021-06-19 08:40:48 | INFO | train_inner | epoch 002: 1464 / 3002 loss=2.724, ppl=6.61, wps=5825.3, ups=0.09, wpb=64873, bsz=128, num_updates=4436, lr=9.99725e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=50523
2021-06-19 08:40:59 | INFO | train_inner | epoch 002: 1465 / 3002 loss=2.681, ppl=6.41, wps=5858.5, ups=0.09, wpb=64784, bsz=128, num_updates=4437, lr=9.99725e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=50534
2021-06-19 08:41:11 | INFO | train_inner | epoch 002: 1466 / 3002 loss=3.061, ppl=8.35, wps=5696.9, ups=0.09, wpb=64713, bsz=128, num_updates=4438, lr=9.99725e-05, gnorm=2.32, loss_scale=4, train_wall=11, gb_free=2.8, wall=50545
2021-06-19 08:41:21 | INFO | train_inner | epoch 002: 1467 / 3002 loss=2.803, ppl=6.98, wps=6073.7, ups=0.09, wpb=64944, bsz=128, num_updates=4439, lr=9.99725e-05, gnorm=2.258, loss_scale=4, train_wall=10, gb_free=2.8, wall=50556
2021-06-19 08:41:32 | INFO | train_inner | epoch 002: 1468 / 3002 loss=2.621, ppl=6.15, wps=5987.2, ups=0.09, wpb=64873, bsz=128, num_updates=4440, lr=9.99725e-05, gnorm=2.242, loss_scale=4, train_wall=10, gb_free=2.8, wall=50566
2021-06-19 08:41:43 | INFO | train_inner | epoch 002: 1469 / 3002 loss=2.607, ppl=6.09, wps=5882.6, ups=0.09, wpb=64827, bsz=128, num_updates=4441, lr=9.99725e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=50577
2021-06-19 08:41:54 | INFO | train_inner | epoch 002: 1470 / 3002 loss=2.648, ppl=6.27, wps=5885.7, ups=0.09, wpb=64868, bsz=128, num_updates=4442, lr=9.99725e-05, gnorm=2.213, loss_scale=4, train_wall=11, gb_free=2.8, wall=50588
2021-06-19 08:42:05 | INFO | train_inner | epoch 002: 1471 / 3002 loss=2.879, ppl=7.35, wps=5770.1, ups=0.09, wpb=64809, bsz=128, num_updates=4443, lr=9.99725e-05, gnorm=2.352, loss_scale=4, train_wall=11, gb_free=2.8, wall=50600
2021-06-19 08:42:17 | INFO | train_inner | epoch 002: 1472 / 3002 loss=2.665, ppl=6.34, wps=5739.6, ups=0.09, wpb=64790, bsz=128, num_updates=4444, lr=9.99724e-05, gnorm=2.174, loss_scale=4, train_wall=11, gb_free=2.8, wall=50611
2021-06-19 08:42:28 | INFO | train_inner | epoch 002: 1473 / 3002 loss=2.83, ppl=7.11, wps=5877.4, ups=0.09, wpb=64732, bsz=128, num_updates=4445, lr=9.99724e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=50622
2021-06-19 08:42:39 | INFO | train_inner | epoch 002: 1474 / 3002 loss=2.563, ppl=5.91, wps=5910.4, ups=0.09, wpb=64893, bsz=128, num_updates=4446, lr=9.99724e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=50633
2021-06-19 08:42:50 | INFO | train_inner | epoch 002: 1475 / 3002 loss=2.72, ppl=6.59, wps=5889.8, ups=0.09, wpb=64853, bsz=128, num_updates=4447, lr=9.99724e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=50644
2021-06-19 08:43:01 | INFO | train_inner | epoch 002: 1476 / 3002 loss=2.955, ppl=7.75, wps=5950.7, ups=0.09, wpb=64830, bsz=128, num_updates=4448, lr=9.99724e-05, gnorm=2.097, loss_scale=4, train_wall=10, gb_free=2.8, wall=50655
2021-06-19 08:43:12 | INFO | train_inner | epoch 002: 1477 / 3002 loss=2.8, ppl=6.96, wps=5820.7, ups=0.09, wpb=64784, bsz=128, num_updates=4449, lr=9.99724e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=50666
2021-06-19 08:43:23 | INFO | train_inner | epoch 002: 1478 / 3002 loss=2.672, ppl=6.37, wps=5964, ups=0.09, wpb=64760, bsz=128, num_updates=4450, lr=9.99724e-05, gnorm=2.121, loss_scale=4, train_wall=10, gb_free=2.8, wall=50677
2021-06-19 08:43:34 | INFO | train_inner | epoch 002: 1479 / 3002 loss=2.794, ppl=6.94, wps=5890.6, ups=0.09, wpb=64846, bsz=128, num_updates=4451, lr=9.99724e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=50688
2021-06-19 08:43:45 | INFO | train_inner | epoch 002: 1480 / 3002 loss=2.77, ppl=6.82, wps=5870.5, ups=0.09, wpb=64835, bsz=128, num_updates=4452, lr=9.99724e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=50699
2021-06-19 08:43:56 | INFO | train_inner | epoch 002: 1481 / 3002 loss=2.722, ppl=6.6, wps=5802, ups=0.09, wpb=64854, bsz=128, num_updates=4453, lr=9.99724e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=50710
2021-06-19 08:44:07 | INFO | train_inner | epoch 002: 1482 / 3002 loss=2.626, ppl=6.17, wps=5836.8, ups=0.09, wpb=64801, bsz=128, num_updates=4454, lr=9.99724e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=50721
2021-06-19 08:44:18 | INFO | train_inner | epoch 002: 1483 / 3002 loss=2.679, ppl=6.4, wps=5910.2, ups=0.09, wpb=64895, bsz=128, num_updates=4455, lr=9.99724e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=50732
2021-06-19 08:44:29 | INFO | train_inner | epoch 002: 1484 / 3002 loss=2.759, ppl=6.77, wps=5859.5, ups=0.09, wpb=64837, bsz=128, num_updates=4456, lr=9.99723e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=50743
2021-06-19 08:44:40 | INFO | train_inner | epoch 002: 1485 / 3002 loss=2.74, ppl=6.68, wps=5806.2, ups=0.09, wpb=64779, bsz=128, num_updates=4457, lr=9.99723e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=50754
2021-06-19 08:44:51 | INFO | train_inner | epoch 002: 1486 / 3002 loss=2.815, ppl=7.04, wps=5803.6, ups=0.09, wpb=64824, bsz=128, num_updates=4458, lr=9.99723e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=50766
2021-06-19 08:45:02 | INFO | train_inner | epoch 002: 1487 / 3002 loss=2.713, ppl=6.56, wps=5796.8, ups=0.09, wpb=64739, bsz=128, num_updates=4459, lr=9.99723e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=50777
2021-06-19 08:45:13 | INFO | train_inner | epoch 002: 1488 / 3002 loss=2.67, ppl=6.36, wps=5910.4, ups=0.09, wpb=64877, bsz=128, num_updates=4460, lr=9.99723e-05, gnorm=2.29, loss_scale=4, train_wall=11, gb_free=2.8, wall=50788
2021-06-19 08:45:24 | INFO | train_inner | epoch 002: 1489 / 3002 loss=2.865, ppl=7.29, wps=5858.4, ups=0.09, wpb=64858, bsz=128, num_updates=4461, lr=9.99723e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=50799
2021-06-19 08:45:35 | INFO | train_inner | epoch 002: 1490 / 3002 loss=2.766, ppl=6.8, wps=5898.2, ups=0.09, wpb=64850, bsz=128, num_updates=4462, lr=9.99723e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=50810
2021-06-19 08:45:46 | INFO | train_inner | epoch 002: 1491 / 3002 loss=2.795, ppl=6.94, wps=5903.2, ups=0.09, wpb=64838, bsz=128, num_updates=4463, lr=9.99723e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=50821
2021-06-19 08:45:57 | INFO | train_inner | epoch 002: 1492 / 3002 loss=2.546, ppl=5.84, wps=5904.2, ups=0.09, wpb=64862, bsz=128, num_updates=4464, lr=9.99723e-05, gnorm=2.088, loss_scale=4, train_wall=11, gb_free=2.8, wall=50832
2021-06-19 08:46:09 | INFO | train_inner | epoch 002: 1493 / 3002 loss=2.718, ppl=6.58, wps=5809.4, ups=0.09, wpb=64842, bsz=128, num_updates=4465, lr=9.99723e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=50843
2021-06-19 08:46:20 | INFO | train_inner | epoch 002: 1494 / 3002 loss=2.69, ppl=6.45, wps=5960.8, ups=0.09, wpb=64841, bsz=128, num_updates=4466, lr=9.99723e-05, gnorm=2.263, loss_scale=4, train_wall=10, gb_free=2.8, wall=50854
2021-06-19 08:46:30 | INFO | train_inner | epoch 002: 1495 / 3002 loss=2.933, ppl=7.64, wps=5889.9, ups=0.09, wpb=64765, bsz=128, num_updates=4467, lr=9.99723e-05, gnorm=2.2, loss_scale=4, train_wall=11, gb_free=2.8, wall=50865
2021-06-19 08:46:42 | INFO | train_inner | epoch 002: 1496 / 3002 loss=2.879, ppl=7.36, wps=5889.6, ups=0.09, wpb=64953, bsz=128, num_updates=4468, lr=9.99723e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=50876
2021-06-19 08:46:52 | INFO | train_inner | epoch 002: 1497 / 3002 loss=2.837, ppl=7.14, wps=5945.1, ups=0.09, wpb=64836, bsz=128, num_updates=4469, lr=9.99722e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=50887
2021-06-19 08:47:04 | INFO | train_inner | epoch 002: 1498 / 3002 loss=2.651, ppl=6.28, wps=5860.9, ups=0.09, wpb=64855, bsz=128, num_updates=4470, lr=9.99722e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=50898
2021-06-19 08:47:15 | INFO | train_inner | epoch 002: 1499 / 3002 loss=2.629, ppl=6.19, wps=5851.8, ups=0.09, wpb=64806, bsz=128, num_updates=4471, lr=9.99722e-05, gnorm=2.237, loss_scale=4, train_wall=11, gb_free=2.8, wall=50909
2021-06-19 08:47:26 | INFO | train_inner | epoch 002: 1500 / 3002 loss=2.6, ppl=6.06, wps=5933.4, ups=0.09, wpb=64908, bsz=128, num_updates=4472, lr=9.99722e-05, gnorm=2.209, loss_scale=4, train_wall=10, gb_free=2.8, wall=50920
2021-06-19 08:47:37 | INFO | train_inner | epoch 002: 1501 / 3002 loss=2.884, ppl=7.38, wps=5888, ups=0.09, wpb=64845, bsz=128, num_updates=4473, lr=9.99722e-05, gnorm=2.287, loss_scale=4, train_wall=11, gb_free=2.8, wall=50931
2021-06-19 08:47:48 | INFO | train_inner | epoch 002: 1502 / 3002 loss=2.821, ppl=7.07, wps=5701, ups=0.09, wpb=64802, bsz=128, num_updates=4474, lr=9.99722e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=50942
2021-06-19 08:47:59 | INFO | train_inner | epoch 002: 1503 / 3002 loss=2.718, ppl=6.58, wps=5747.5, ups=0.09, wpb=64854, bsz=128, num_updates=4475, lr=9.99722e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=50954
2021-06-19 08:48:10 | INFO | train_inner | epoch 002: 1504 / 3002 loss=2.68, ppl=6.41, wps=5819.7, ups=0.09, wpb=64864, bsz=128, num_updates=4476, lr=9.99722e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=50965
2021-06-19 08:48:21 | INFO | train_inner | epoch 002: 1505 / 3002 loss=2.784, ppl=6.89, wps=5827.7, ups=0.09, wpb=64851, bsz=128, num_updates=4477, lr=9.99722e-05, gnorm=2.328, loss_scale=4, train_wall=11, gb_free=2.8, wall=50976
2021-06-19 08:48:32 | INFO | train_inner | epoch 002: 1506 / 3002 loss=2.705, ppl=6.52, wps=5882.6, ups=0.09, wpb=64806, bsz=128, num_updates=4478, lr=9.99722e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=50987
2021-06-19 08:48:43 | INFO | train_inner | epoch 002: 1507 / 3002 loss=2.801, ppl=6.97, wps=5899.9, ups=0.09, wpb=64863, bsz=128, num_updates=4479, lr=9.99722e-05, gnorm=2.317, loss_scale=4, train_wall=11, gb_free=2.8, wall=50998
2021-06-19 08:48:55 | INFO | train_inner | epoch 002: 1508 / 3002 loss=2.623, ppl=6.16, wps=5828.7, ups=0.09, wpb=64834, bsz=128, num_updates=4480, lr=9.99722e-05, gnorm=2.19, loss_scale=4, train_wall=11, gb_free=2.8, wall=51009
2021-06-19 08:49:05 | INFO | train_inner | epoch 002: 1509 / 3002 loss=2.808, ppl=7, wps=6008.9, ups=0.09, wpb=64785, bsz=128, num_updates=4481, lr=9.99721e-05, gnorm=2.139, loss_scale=8, train_wall=10, gb_free=2.8, wall=51020
2021-06-19 08:49:16 | INFO | train_inner | epoch 002: 1510 / 3002 loss=2.562, ppl=5.9, wps=5855.3, ups=0.09, wpb=64824, bsz=128, num_updates=4482, lr=9.99721e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=51031
2021-06-19 08:49:28 | INFO | train_inner | epoch 002: 1511 / 3002 loss=2.833, ppl=7.13, wps=5852.8, ups=0.09, wpb=64837, bsz=128, num_updates=4483, lr=9.99721e-05, gnorm=2.251, loss_scale=8, train_wall=11, gb_free=2.8, wall=51042
2021-06-19 08:49:39 | INFO | train_inner | epoch 002: 1512 / 3002 loss=2.819, ppl=7.06, wps=5876, ups=0.09, wpb=64781, bsz=128, num_updates=4484, lr=9.99721e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=51053
2021-06-19 08:49:50 | INFO | train_inner | epoch 002: 1513 / 3002 loss=2.656, ppl=6.3, wps=5812.6, ups=0.09, wpb=64863, bsz=128, num_updates=4485, lr=9.99721e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=51064
2021-06-19 08:50:01 | INFO | train_inner | epoch 002: 1514 / 3002 loss=2.853, ppl=7.22, wps=5852.4, ups=0.09, wpb=64781, bsz=128, num_updates=4486, lr=9.99721e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=51075
2021-06-19 08:50:12 | INFO | train_inner | epoch 002: 1515 / 3002 loss=2.702, ppl=6.51, wps=5802, ups=0.09, wpb=64819, bsz=128, num_updates=4487, lr=9.99721e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=51086
2021-06-19 08:50:23 | INFO | train_inner | epoch 002: 1516 / 3002 loss=2.69, ppl=6.45, wps=5897.4, ups=0.09, wpb=64776, bsz=128, num_updates=4488, lr=9.99721e-05, gnorm=2.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=51097
2021-06-19 08:50:34 | INFO | train_inner | epoch 002: 1517 / 3002 loss=2.659, ppl=6.32, wps=5724.5, ups=0.09, wpb=64877, bsz=128, num_updates=4489, lr=9.99721e-05, gnorm=2.199, loss_scale=8, train_wall=11, gb_free=2.8, wall=51109
2021-06-19 08:50:45 | INFO | train_inner | epoch 002: 1518 / 3002 loss=2.636, ppl=6.22, wps=5891.9, ups=0.09, wpb=64900, bsz=128, num_updates=4490, lr=9.99721e-05, gnorm=2.197, loss_scale=8, train_wall=11, gb_free=2.8, wall=51120
2021-06-19 08:50:56 | INFO | train_inner | epoch 002: 1519 / 3002 loss=2.688, ppl=6.44, wps=5876.4, ups=0.09, wpb=64883, bsz=128, num_updates=4491, lr=9.99721e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=51131
2021-06-19 08:51:07 | INFO | train_inner | epoch 002: 1520 / 3002 loss=2.675, ppl=6.38, wps=5948.8, ups=0.09, wpb=64805, bsz=128, num_updates=4492, lr=9.99721e-05, gnorm=2.243, loss_scale=8, train_wall=10, gb_free=2.8, wall=51142
2021-06-19 08:51:18 | INFO | train_inner | epoch 002: 1521 / 3002 loss=2.764, ppl=6.79, wps=5911.6, ups=0.09, wpb=64778, bsz=128, num_updates=4493, lr=9.99721e-05, gnorm=2.221, loss_scale=8, train_wall=10, gb_free=2.8, wall=51153
2021-06-19 08:51:29 | INFO | train_inner | epoch 002: 1522 / 3002 loss=2.541, ppl=5.82, wps=5781.5, ups=0.09, wpb=64851, bsz=128, num_updates=4494, lr=9.9972e-05, gnorm=13.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=51164
2021-06-19 08:51:41 | INFO | train_inner | epoch 002: 1523 / 3002 loss=2.61, ppl=6.11, wps=5780.1, ups=0.09, wpb=64845, bsz=128, num_updates=4495, lr=9.9972e-05, gnorm=2.341, loss_scale=8, train_wall=11, gb_free=2.8, wall=51175
2021-06-19 08:51:52 | INFO | train_inner | epoch 002: 1524 / 3002 loss=2.654, ppl=6.29, wps=5880.7, ups=0.09, wpb=64821, bsz=128, num_updates=4496, lr=9.9972e-05, gnorm=2.315, loss_scale=8, train_wall=11, gb_free=2.8, wall=51186
2021-06-19 08:52:03 | INFO | train_inner | epoch 002: 1525 / 3002 loss=2.785, ppl=6.89, wps=5851.9, ups=0.09, wpb=64876, bsz=128, num_updates=4497, lr=9.9972e-05, gnorm=2.306, loss_scale=8, train_wall=11, gb_free=2.8, wall=51197
2021-06-19 08:52:14 | INFO | train_inner | epoch 002: 1526 / 3002 loss=2.712, ppl=6.55, wps=5847.7, ups=0.09, wpb=64816, bsz=128, num_updates=4498, lr=9.9972e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=51208
2021-06-19 08:52:25 | INFO | train_inner | epoch 002: 1527 / 3002 loss=2.691, ppl=6.46, wps=5947.2, ups=0.09, wpb=64885, bsz=128, num_updates=4499, lr=9.9972e-05, gnorm=2.26, loss_scale=8, train_wall=10, gb_free=2.8, wall=51219
2021-06-19 08:52:36 | INFO | train_inner | epoch 002: 1528 / 3002 loss=2.864, ppl=7.28, wps=5878.6, ups=0.09, wpb=64889, bsz=128, num_updates=4500, lr=9.9972e-05, gnorm=2.509, loss_scale=8, train_wall=11, gb_free=2.8, wall=51230
2021-06-19 08:52:47 | INFO | train_inner | epoch 002: 1529 / 3002 loss=2.66, ppl=6.32, wps=5920.3, ups=0.09, wpb=64860, bsz=128, num_updates=4501, lr=9.9972e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=51241
2021-06-19 08:52:58 | INFO | train_inner | epoch 002: 1530 / 3002 loss=2.726, ppl=6.62, wps=5829.5, ups=0.09, wpb=64834, bsz=128, num_updates=4502, lr=9.9972e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=51252
2021-06-19 08:53:09 | INFO | train_inner | epoch 002: 1531 / 3002 loss=2.861, ppl=7.27, wps=5844.8, ups=0.09, wpb=64830, bsz=128, num_updates=4503, lr=9.9972e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=51263
2021-06-19 08:53:20 | INFO | train_inner | epoch 002: 1532 / 3002 loss=2.587, ppl=6.01, wps=5794, ups=0.09, wpb=64822, bsz=128, num_updates=4504, lr=9.9972e-05, gnorm=2.201, loss_scale=8, train_wall=11, gb_free=2.8, wall=51274
2021-06-19 08:53:31 | INFO | train_inner | epoch 002: 1533 / 3002 loss=2.879, ppl=7.36, wps=5819.8, ups=0.09, wpb=64827, bsz=128, num_updates=4505, lr=9.9972e-05, gnorm=2.315, loss_scale=8, train_wall=11, gb_free=2.8, wall=51286
2021-06-19 08:53:42 | INFO | train_inner | epoch 002: 1534 / 3002 loss=2.555, ppl=5.88, wps=5933.6, ups=0.09, wpb=64857, bsz=128, num_updates=4506, lr=9.99719e-05, gnorm=2.349, loss_scale=8, train_wall=10, gb_free=2.8, wall=51297
2021-06-19 08:53:53 | INFO | train_inner | epoch 002: 1535 / 3002 loss=2.927, ppl=7.6, wps=5862.8, ups=0.09, wpb=64858, bsz=128, num_updates=4507, lr=9.99719e-05, gnorm=2.281, loss_scale=8, train_wall=11, gb_free=2.8, wall=51308
2021-06-19 08:54:04 | INFO | train_inner | epoch 002: 1536 / 3002 loss=2.711, ppl=6.55, wps=5882.6, ups=0.09, wpb=64793, bsz=128, num_updates=4508, lr=9.99719e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=51319
2021-06-19 08:54:15 | INFO | train_inner | epoch 002: 1537 / 3002 loss=2.761, ppl=6.78, wps=5852.1, ups=0.09, wpb=64841, bsz=128, num_updates=4509, lr=9.99719e-05, gnorm=2.346, loss_scale=8, train_wall=11, gb_free=2.8, wall=51330
2021-06-19 08:54:26 | INFO | train_inner | epoch 002: 1538 / 3002 loss=2.683, ppl=6.42, wps=5903.4, ups=0.09, wpb=64858, bsz=128, num_updates=4510, lr=9.99719e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=51341
2021-06-19 08:54:37 | INFO | train_inner | epoch 002: 1539 / 3002 loss=2.73, ppl=6.63, wps=5916.9, ups=0.09, wpb=64837, bsz=128, num_updates=4511, lr=9.99719e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=51352
2021-06-19 08:54:49 | INFO | train_inner | epoch 002: 1540 / 3002 loss=2.767, ppl=6.81, wps=5760.9, ups=0.09, wpb=64882, bsz=128, num_updates=4512, lr=9.99719e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=51363
2021-06-19 08:55:00 | INFO | train_inner | epoch 002: 1541 / 3002 loss=2.981, ppl=7.89, wps=5808.4, ups=0.09, wpb=64746, bsz=128, num_updates=4513, lr=9.99719e-05, gnorm=2.325, loss_scale=8, train_wall=11, gb_free=2.8, wall=51374
2021-06-19 08:55:11 | INFO | train_inner | epoch 002: 1542 / 3002 loss=2.776, ppl=6.85, wps=5898.4, ups=0.09, wpb=64875, bsz=128, num_updates=4514, lr=9.99719e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=51385
2021-06-19 08:55:22 | INFO | train_inner | epoch 002: 1543 / 3002 loss=2.771, ppl=6.82, wps=5762.1, ups=0.09, wpb=64845, bsz=128, num_updates=4515, lr=9.99719e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=51396
2021-06-19 08:55:33 | INFO | train_inner | epoch 002: 1544 / 3002 loss=2.705, ppl=6.52, wps=5859, ups=0.09, wpb=64799, bsz=128, num_updates=4516, lr=9.99719e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=51407
2021-06-19 08:55:44 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 08:55:55 | INFO | train_inner | epoch 002: 1546 / 3002 loss=2.865, ppl=7.29, wps=2939.5, ups=0.05, wpb=64826, bsz=128, num_updates=4517, lr=9.99719e-05, gnorm=2.25, loss_scale=4, train_wall=21, gb_free=2.8, wall=51429
2021-06-19 08:56:06 | INFO | train_inner | epoch 002: 1547 / 3002 loss=2.814, ppl=7.03, wps=5809.5, ups=0.09, wpb=64863, bsz=128, num_updates=4518, lr=9.99719e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=51441
2021-06-19 08:56:17 | INFO | train_inner | epoch 002: 1548 / 3002 loss=2.689, ppl=6.45, wps=5886, ups=0.09, wpb=64888, bsz=128, num_updates=4519, lr=9.99718e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=51452
2021-06-19 08:56:28 | INFO | train_inner | epoch 002: 1549 / 3002 loss=2.681, ppl=6.41, wps=5796, ups=0.09, wpb=64895, bsz=128, num_updates=4520, lr=9.99718e-05, gnorm=2.254, loss_scale=4, train_wall=11, gb_free=2.8, wall=51463
2021-06-19 08:56:40 | INFO | train_inner | epoch 002: 1550 / 3002 loss=2.735, ppl=6.66, wps=5787, ups=0.09, wpb=64840, bsz=128, num_updates=4521, lr=9.99718e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=51474
2021-06-19 08:56:51 | INFO | train_inner | epoch 002: 1551 / 3002 loss=2.754, ppl=6.74, wps=5880, ups=0.09, wpb=64845, bsz=128, num_updates=4522, lr=9.99718e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=51485
2021-06-19 08:57:02 | INFO | train_inner | epoch 002: 1552 / 3002 loss=2.716, ppl=6.57, wps=5942.9, ups=0.09, wpb=64846, bsz=128, num_updates=4523, lr=9.99718e-05, gnorm=2.159, loss_scale=4, train_wall=10, gb_free=2.8, wall=51496
2021-06-19 08:57:13 | INFO | train_inner | epoch 002: 1553 / 3002 loss=2.74, ppl=6.68, wps=5897.3, ups=0.09, wpb=64840, bsz=128, num_updates=4524, lr=9.99718e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=51507
2021-06-19 08:57:24 | INFO | train_inner | epoch 002: 1554 / 3002 loss=2.645, ppl=6.25, wps=5900.8, ups=0.09, wpb=64796, bsz=128, num_updates=4525, lr=9.99718e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=51518
2021-06-19 08:57:34 | INFO | train_inner | epoch 002: 1555 / 3002 loss=2.8, ppl=6.96, wps=5939.7, ups=0.09, wpb=64885, bsz=128, num_updates=4526, lr=9.99718e-05, gnorm=2.233, loss_scale=4, train_wall=10, gb_free=2.8, wall=51529
2021-06-19 08:57:46 | INFO | train_inner | epoch 002: 1556 / 3002 loss=2.669, ppl=6.36, wps=5806.5, ups=0.09, wpb=64812, bsz=128, num_updates=4527, lr=9.99718e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=51540
2021-06-19 08:57:57 | INFO | train_inner | epoch 002: 1557 / 3002 loss=2.847, ppl=7.19, wps=5806.5, ups=0.09, wpb=64826, bsz=128, num_updates=4528, lr=9.99718e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=51551
2021-06-19 08:58:08 | INFO | train_inner | epoch 002: 1558 / 3002 loss=2.818, ppl=7.05, wps=5764.9, ups=0.09, wpb=64820, bsz=128, num_updates=4529, lr=9.99718e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=51562
2021-06-19 08:58:19 | INFO | train_inner | epoch 002: 1559 / 3002 loss=2.648, ppl=6.27, wps=5968.9, ups=0.09, wpb=64817, bsz=128, num_updates=4530, lr=9.99718e-05, gnorm=2.157, loss_scale=4, train_wall=10, gb_free=2.8, wall=51573
2021-06-19 08:58:30 | INFO | train_inner | epoch 002: 1560 / 3002 loss=2.814, ppl=7.03, wps=5760.7, ups=0.09, wpb=64818, bsz=128, num_updates=4531, lr=9.99717e-05, gnorm=2.326, loss_scale=4, train_wall=11, gb_free=2.8, wall=51585
2021-06-19 08:58:41 | INFO | train_inner | epoch 002: 1561 / 3002 loss=2.831, ppl=7.11, wps=5852.3, ups=0.09, wpb=64857, bsz=128, num_updates=4532, lr=9.99717e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=51596
2021-06-19 08:58:52 | INFO | train_inner | epoch 002: 1562 / 3002 loss=2.668, ppl=6.35, wps=5892, ups=0.09, wpb=64815, bsz=128, num_updates=4533, lr=9.99717e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=51607
2021-06-19 08:59:03 | INFO | train_inner | epoch 002: 1563 / 3002 loss=2.668, ppl=6.36, wps=5958.9, ups=0.09, wpb=64853, bsz=128, num_updates=4534, lr=9.99717e-05, gnorm=2.213, loss_scale=4, train_wall=10, gb_free=2.8, wall=51617
2021-06-19 08:59:14 | INFO | train_inner | epoch 002: 1564 / 3002 loss=2.906, ppl=7.5, wps=5794.5, ups=0.09, wpb=64771, bsz=128, num_updates=4535, lr=9.99717e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=51629
2021-06-19 08:59:26 | INFO | train_inner | epoch 002: 1565 / 3002 loss=2.878, ppl=7.35, wps=5747, ups=0.09, wpb=64857, bsz=128, num_updates=4536, lr=9.99717e-05, gnorm=2.365, loss_scale=4, train_wall=11, gb_free=2.8, wall=51640
2021-06-19 08:59:37 | INFO | train_inner | epoch 002: 1566 / 3002 loss=2.82, ppl=7.06, wps=5844, ups=0.09, wpb=64786, bsz=128, num_updates=4537, lr=9.99717e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=51651
2021-06-19 08:59:48 | INFO | train_inner | epoch 002: 1567 / 3002 loss=2.742, ppl=6.69, wps=5798.7, ups=0.09, wpb=64827, bsz=128, num_updates=4538, lr=9.99717e-05, gnorm=2.422, loss_scale=4, train_wall=11, gb_free=2.8, wall=51662
2021-06-19 08:59:59 | INFO | train_inner | epoch 002: 1568 / 3002 loss=2.619, ppl=6.14, wps=5757.5, ups=0.09, wpb=64825, bsz=128, num_updates=4539, lr=9.99717e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=51673
2021-06-19 09:00:10 | INFO | train_inner | epoch 002: 1569 / 3002 loss=2.776, ppl=6.85, wps=5716, ups=0.09, wpb=64803, bsz=128, num_updates=4540, lr=9.99717e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=51685
2021-06-19 09:00:22 | INFO | train_inner | epoch 002: 1570 / 3002 loss=2.797, ppl=6.95, wps=5875.3, ups=0.09, wpb=64848, bsz=128, num_updates=4541, lr=9.99717e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=51696
2021-06-19 09:00:32 | INFO | train_inner | epoch 002: 1571 / 3002 loss=2.704, ppl=6.52, wps=5939.4, ups=0.09, wpb=64818, bsz=128, num_updates=4542, lr=9.99717e-05, gnorm=2.188, loss_scale=4, train_wall=10, gb_free=2.8, wall=51707
2021-06-19 09:00:44 | INFO | train_inner | epoch 002: 1572 / 3002 loss=2.734, ppl=6.65, wps=5733.2, ups=0.09, wpb=64832, bsz=128, num_updates=4543, lr=9.99717e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=51718
2021-06-19 09:00:55 | INFO | train_inner | epoch 002: 1573 / 3002 loss=2.786, ppl=6.9, wps=5874.4, ups=0.09, wpb=64883, bsz=128, num_updates=4544, lr=9.99716e-05, gnorm=2.204, loss_scale=4, train_wall=11, gb_free=2.8, wall=51729
2021-06-19 09:01:06 | INFO | train_inner | epoch 002: 1574 / 3002 loss=2.64, ppl=6.24, wps=5931.2, ups=0.09, wpb=64837, bsz=128, num_updates=4545, lr=9.99716e-05, gnorm=2.449, loss_scale=4, train_wall=10, gb_free=2.8, wall=51740
2021-06-19 09:01:17 | INFO | train_inner | epoch 002: 1575 / 3002 loss=2.728, ppl=6.63, wps=5902.6, ups=0.09, wpb=64824, bsz=128, num_updates=4546, lr=9.99716e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=51751
2021-06-19 09:01:28 | INFO | train_inner | epoch 002: 1576 / 3002 loss=2.951, ppl=7.73, wps=5892.4, ups=0.09, wpb=64839, bsz=128, num_updates=4547, lr=9.99716e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=51762
2021-06-19 09:01:39 | INFO | train_inner | epoch 002: 1577 / 3002 loss=2.73, ppl=6.63, wps=5760.4, ups=0.09, wpb=64835, bsz=128, num_updates=4548, lr=9.99716e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=51773
2021-06-19 09:01:50 | INFO | train_inner | epoch 002: 1578 / 3002 loss=2.639, ppl=6.23, wps=5867.2, ups=0.09, wpb=64812, bsz=128, num_updates=4549, lr=9.99716e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=51784
2021-06-19 09:02:01 | INFO | train_inner | epoch 002: 1579 / 3002 loss=2.609, ppl=6.1, wps=5967, ups=0.09, wpb=64844, bsz=128, num_updates=4550, lr=9.99716e-05, gnorm=2.137, loss_scale=4, train_wall=10, gb_free=2.8, wall=51795
2021-06-19 09:02:12 | INFO | train_inner | epoch 002: 1580 / 3002 loss=2.704, ppl=6.51, wps=5775.3, ups=0.09, wpb=64770, bsz=128, num_updates=4551, lr=9.99716e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=51806
2021-06-19 09:02:23 | INFO | train_inner | epoch 002: 1581 / 3002 loss=2.786, ppl=6.9, wps=5824, ups=0.09, wpb=64905, bsz=128, num_updates=4552, lr=9.99716e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=51818
2021-06-19 09:02:34 | INFO | train_inner | epoch 002: 1582 / 3002 loss=2.711, ppl=6.55, wps=5875.8, ups=0.09, wpb=64794, bsz=128, num_updates=4553, lr=9.99716e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=51829
2021-06-19 09:02:45 | INFO | train_inner | epoch 002: 1583 / 3002 loss=2.751, ppl=6.73, wps=5935.3, ups=0.09, wpb=64867, bsz=128, num_updates=4554, lr=9.99716e-05, gnorm=2.106, loss_scale=4, train_wall=10, gb_free=2.8, wall=51840
2021-06-19 09:02:56 | INFO | train_inner | epoch 002: 1584 / 3002 loss=2.563, ppl=5.91, wps=5802.3, ups=0.09, wpb=64850, bsz=128, num_updates=4555, lr=9.99716e-05, gnorm=2.35, loss_scale=4, train_wall=11, gb_free=2.8, wall=51851
2021-06-19 09:03:07 | INFO | train_inner | epoch 002: 1585 / 3002 loss=2.602, ppl=6.07, wps=5835.5, ups=0.09, wpb=64846, bsz=128, num_updates=4556, lr=9.99715e-05, gnorm=2.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=51862
2021-06-19 09:03:19 | INFO | train_inner | epoch 002: 1586 / 3002 loss=2.779, ppl=6.86, wps=5805.8, ups=0.09, wpb=64909, bsz=128, num_updates=4557, lr=9.99715e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=51873
2021-06-19 09:03:30 | INFO | train_inner | epoch 002: 1587 / 3002 loss=2.901, ppl=7.47, wps=5799.9, ups=0.09, wpb=64808, bsz=128, num_updates=4558, lr=9.99715e-05, gnorm=2.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=51884
2021-06-19 09:03:41 | INFO | train_inner | epoch 002: 1588 / 3002 loss=2.537, ppl=5.8, wps=5809.1, ups=0.09, wpb=64853, bsz=128, num_updates=4559, lr=9.99715e-05, gnorm=2.336, loss_scale=4, train_wall=11, gb_free=2.8, wall=51895
2021-06-19 09:03:52 | INFO | train_inner | epoch 002: 1589 / 3002 loss=2.699, ppl=6.49, wps=5836.7, ups=0.09, wpb=64805, bsz=128, num_updates=4560, lr=9.99715e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=51906
2021-06-19 09:04:03 | INFO | train_inner | epoch 002: 1590 / 3002 loss=2.784, ppl=6.89, wps=5961.8, ups=0.09, wpb=64925, bsz=128, num_updates=4561, lr=9.99715e-05, gnorm=2.305, loss_scale=4, train_wall=10, gb_free=2.8, wall=51917
2021-06-19 09:04:14 | INFO | train_inner | epoch 002: 1591 / 3002 loss=2.66, ppl=6.32, wps=5886.9, ups=0.09, wpb=64899, bsz=128, num_updates=4562, lr=9.99715e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=51928
2021-06-19 09:04:25 | INFO | train_inner | epoch 002: 1592 / 3002 loss=2.814, ppl=7.03, wps=5888.8, ups=0.09, wpb=64837, bsz=128, num_updates=4563, lr=9.99715e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=51939
2021-06-19 09:04:36 | INFO | train_inner | epoch 002: 1593 / 3002 loss=2.873, ppl=7.33, wps=5832.9, ups=0.09, wpb=64846, bsz=128, num_updates=4564, lr=9.99715e-05, gnorm=2.237, loss_scale=4, train_wall=11, gb_free=2.8, wall=51950
2021-06-19 09:04:47 | INFO | train_inner | epoch 002: 1594 / 3002 loss=2.731, ppl=6.64, wps=5886.1, ups=0.09, wpb=64817, bsz=128, num_updates=4565, lr=9.99715e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=51961
2021-06-19 09:04:58 | INFO | train_inner | epoch 002: 1595 / 3002 loss=2.663, ppl=6.33, wps=5828.7, ups=0.09, wpb=64804, bsz=128, num_updates=4566, lr=9.99715e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=51973
2021-06-19 09:05:10 | INFO | train_inner | epoch 002: 1596 / 3002 loss=2.691, ppl=6.46, wps=5766.3, ups=0.09, wpb=64820, bsz=128, num_updates=4567, lr=9.99715e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=51984
2021-06-19 09:05:21 | INFO | train_inner | epoch 002: 1597 / 3002 loss=2.765, ppl=6.8, wps=5708.3, ups=0.09, wpb=64922, bsz=128, num_updates=4568, lr=9.99715e-05, gnorm=2.328, loss_scale=4, train_wall=11, gb_free=2.8, wall=51995
2021-06-19 09:05:32 | INFO | train_inner | epoch 002: 1598 / 3002 loss=2.78, ppl=6.87, wps=5691, ups=0.09, wpb=64772, bsz=128, num_updates=4569, lr=9.99714e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=52007
2021-06-19 09:05:43 | INFO | train_inner | epoch 002: 1599 / 3002 loss=2.639, ppl=6.23, wps=5779.7, ups=0.09, wpb=64868, bsz=128, num_updates=4570, lr=9.99714e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=52018
2021-06-19 09:05:55 | INFO | train_inner | epoch 002: 1600 / 3002 loss=2.695, ppl=6.48, wps=5881.2, ups=0.09, wpb=64913, bsz=128, num_updates=4571, lr=9.99714e-05, gnorm=2.21, loss_scale=4, train_wall=11, gb_free=2.8, wall=52029
2021-06-19 09:06:06 | INFO | train_inner | epoch 002: 1601 / 3002 loss=2.798, ppl=6.95, wps=5806.7, ups=0.09, wpb=64743, bsz=128, num_updates=4572, lr=9.99714e-05, gnorm=2.741, loss_scale=4, train_wall=11, gb_free=2.8, wall=52040
2021-06-19 09:06:17 | INFO | train_inner | epoch 002: 1602 / 3002 loss=2.796, ppl=6.95, wps=5840.3, ups=0.09, wpb=64823, bsz=128, num_updates=4573, lr=9.99714e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=52051
2021-06-19 09:06:28 | INFO | train_inner | epoch 002: 1603 / 3002 loss=2.752, ppl=6.73, wps=5880.8, ups=0.09, wpb=64889, bsz=128, num_updates=4574, lr=9.99714e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=52062
2021-06-19 09:06:39 | INFO | train_inner | epoch 002: 1604 / 3002 loss=2.586, ppl=6, wps=5831.9, ups=0.09, wpb=64764, bsz=128, num_updates=4575, lr=9.99714e-05, gnorm=3.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=52073
2021-06-19 09:06:50 | INFO | train_inner | epoch 002: 1605 / 3002 loss=2.723, ppl=6.6, wps=5761.8, ups=0.09, wpb=64830, bsz=128, num_updates=4576, lr=9.99714e-05, gnorm=2.54, loss_scale=4, train_wall=11, gb_free=2.8, wall=52084
2021-06-19 09:07:01 | INFO | train_inner | epoch 002: 1606 / 3002 loss=2.691, ppl=6.46, wps=5835.9, ups=0.09, wpb=64826, bsz=128, num_updates=4577, lr=9.99714e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=52096
2021-06-19 09:07:12 | INFO | train_inner | epoch 002: 1607 / 3002 loss=2.588, ppl=6.01, wps=5881.3, ups=0.09, wpb=64849, bsz=128, num_updates=4578, lr=9.99714e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=52107
2021-06-19 09:07:23 | INFO | train_inner | epoch 002: 1608 / 3002 loss=2.932, ppl=7.63, wps=5900.8, ups=0.09, wpb=64885, bsz=128, num_updates=4579, lr=9.99714e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=52118
2021-06-19 09:07:34 | INFO | train_inner | epoch 002: 1609 / 3002 loss=2.913, ppl=7.53, wps=5787.3, ups=0.09, wpb=64785, bsz=128, num_updates=4580, lr=9.99714e-05, gnorm=3.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=52129
2021-06-19 09:07:45 | INFO | train_inner | epoch 002: 1610 / 3002 loss=2.709, ppl=6.54, wps=5953.9, ups=0.09, wpb=64843, bsz=128, num_updates=4581, lr=9.99713e-05, gnorm=2.196, loss_scale=4, train_wall=10, gb_free=2.8, wall=52140
2021-06-19 09:07:57 | INFO | train_inner | epoch 002: 1611 / 3002 loss=2.8, ppl=6.96, wps=5808.6, ups=0.09, wpb=64755, bsz=128, num_updates=4582, lr=9.99713e-05, gnorm=7.908, loss_scale=4, train_wall=11, gb_free=2.8, wall=52151
2021-06-19 09:08:08 | INFO | train_inner | epoch 002: 1612 / 3002 loss=3.001, ppl=8, wps=5862.6, ups=0.09, wpb=64740, bsz=128, num_updates=4583, lr=9.99713e-05, gnorm=3.893, loss_scale=4, train_wall=11, gb_free=2.8, wall=52162
2021-06-19 09:08:19 | INFO | train_inner | epoch 002: 1613 / 3002 loss=2.799, ppl=6.96, wps=5922.7, ups=0.09, wpb=64846, bsz=128, num_updates=4584, lr=9.99713e-05, gnorm=2.221, loss_scale=4, train_wall=10, gb_free=2.8, wall=52173
2021-06-19 09:08:29 | INFO | train_inner | epoch 002: 1614 / 3002 loss=2.645, ppl=6.26, wps=5989.5, ups=0.09, wpb=64934, bsz=128, num_updates=4585, lr=9.99713e-05, gnorm=2.248, loss_scale=4, train_wall=10, gb_free=2.8, wall=52184
2021-06-19 09:08:41 | INFO | train_inner | epoch 002: 1615 / 3002 loss=2.823, ppl=7.08, wps=5785.9, ups=0.09, wpb=64834, bsz=128, num_updates=4586, lr=9.99713e-05, gnorm=3.786, loss_scale=4, train_wall=11, gb_free=2.8, wall=52195
2021-06-19 09:08:52 | INFO | train_inner | epoch 002: 1616 / 3002 loss=2.76, ppl=6.78, wps=5906.9, ups=0.09, wpb=64771, bsz=128, num_updates=4587, lr=9.99713e-05, gnorm=2.232, loss_scale=4, train_wall=10, gb_free=2.8, wall=52206
2021-06-19 09:09:02 | INFO | train_inner | epoch 002: 1617 / 3002 loss=2.756, ppl=6.75, wps=5943.4, ups=0.09, wpb=64851, bsz=128, num_updates=4588, lr=9.99713e-05, gnorm=2.276, loss_scale=4, train_wall=10, gb_free=2.8, wall=52217
2021-06-19 09:09:13 | INFO | train_inner | epoch 002: 1618 / 3002 loss=2.782, ppl=6.88, wps=5898.4, ups=0.09, wpb=64804, bsz=128, num_updates=4589, lr=9.99713e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=52228
2021-06-19 09:09:25 | INFO | train_inner | epoch 002: 1619 / 3002 loss=2.593, ppl=6.03, wps=5810.5, ups=0.09, wpb=64755, bsz=128, num_updates=4590, lr=9.99713e-05, gnorm=2.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=52239
2021-06-19 09:09:36 | INFO | train_inner | epoch 002: 1620 / 3002 loss=2.659, ppl=6.32, wps=5841.4, ups=0.09, wpb=64850, bsz=128, num_updates=4591, lr=9.99713e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=52250
2021-06-19 09:09:47 | INFO | train_inner | epoch 002: 1621 / 3002 loss=2.75, ppl=6.73, wps=5921.2, ups=0.09, wpb=64890, bsz=128, num_updates=4592, lr=9.99713e-05, gnorm=2.343, loss_scale=4, train_wall=11, gb_free=2.8, wall=52261
2021-06-19 09:09:58 | INFO | train_inner | epoch 002: 1622 / 3002 loss=2.76, ppl=6.77, wps=5780.5, ups=0.09, wpb=64785, bsz=128, num_updates=4593, lr=9.99713e-05, gnorm=2.301, loss_scale=4, train_wall=11, gb_free=2.8, wall=52272
2021-06-19 09:10:09 | INFO | train_inner | epoch 002: 1623 / 3002 loss=2.822, ppl=7.07, wps=5872.3, ups=0.09, wpb=64831, bsz=128, num_updates=4594, lr=9.99712e-05, gnorm=2.452, loss_scale=4, train_wall=11, gb_free=2.8, wall=52283
2021-06-19 09:10:20 | INFO | train_inner | epoch 002: 1624 / 3002 loss=2.744, ppl=6.7, wps=5860.3, ups=0.09, wpb=64757, bsz=128, num_updates=4595, lr=9.99712e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=52294
2021-06-19 09:10:31 | INFO | train_inner | epoch 002: 1625 / 3002 loss=2.618, ppl=6.14, wps=5873.8, ups=0.09, wpb=64892, bsz=128, num_updates=4596, lr=9.99712e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=52305
2021-06-19 09:10:42 | INFO | train_inner | epoch 002: 1626 / 3002 loss=2.692, ppl=6.46, wps=5898.7, ups=0.09, wpb=64794, bsz=128, num_updates=4597, lr=9.99712e-05, gnorm=2.143, loss_scale=4, train_wall=11, gb_free=2.8, wall=52316
2021-06-19 09:10:53 | INFO | train_inner | epoch 002: 1627 / 3002 loss=2.785, ppl=6.89, wps=5847.1, ups=0.09, wpb=64806, bsz=128, num_updates=4598, lr=9.99712e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=52327
2021-06-19 09:11:04 | INFO | train_inner | epoch 002: 1628 / 3002 loss=2.875, ppl=7.34, wps=5867.3, ups=0.09, wpb=64787, bsz=128, num_updates=4599, lr=9.99712e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=52338
2021-06-19 09:11:15 | INFO | train_inner | epoch 002: 1629 / 3002 loss=2.709, ppl=6.54, wps=5798.3, ups=0.09, wpb=64859, bsz=128, num_updates=4600, lr=9.99712e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=52350
2021-06-19 09:11:26 | INFO | train_inner | epoch 002: 1630 / 3002 loss=2.857, ppl=7.24, wps=5787.7, ups=0.09, wpb=64776, bsz=128, num_updates=4601, lr=9.99712e-05, gnorm=2.764, loss_scale=4, train_wall=11, gb_free=2.8, wall=52361
2021-06-19 09:11:37 | INFO | train_inner | epoch 002: 1631 / 3002 loss=2.655, ppl=6.3, wps=5903.9, ups=0.09, wpb=64791, bsz=128, num_updates=4602, lr=9.99712e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=52372
2021-06-19 09:11:49 | INFO | train_inner | epoch 002: 1632 / 3002 loss=2.694, ppl=6.47, wps=5804.6, ups=0.09, wpb=64914, bsz=128, num_updates=4603, lr=9.99712e-05, gnorm=2.731, loss_scale=4, train_wall=11, gb_free=2.8, wall=52383
2021-06-19 09:12:00 | INFO | train_inner | epoch 002: 1633 / 3002 loss=2.677, ppl=6.4, wps=5838.7, ups=0.09, wpb=64932, bsz=128, num_updates=4604, lr=9.99712e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=52394
2021-06-19 09:12:11 | INFO | train_inner | epoch 002: 1634 / 3002 loss=2.758, ppl=6.77, wps=5846.4, ups=0.09, wpb=64810, bsz=128, num_updates=4605, lr=9.99712e-05, gnorm=3.028, loss_scale=4, train_wall=11, gb_free=2.8, wall=52405
2021-06-19 09:12:22 | INFO | train_inner | epoch 002: 1635 / 3002 loss=2.717, ppl=6.57, wps=5890.7, ups=0.09, wpb=64793, bsz=128, num_updates=4606, lr=9.99711e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=52416
2021-06-19 09:12:33 | INFO | train_inner | epoch 002: 1636 / 3002 loss=2.655, ppl=6.3, wps=5764.4, ups=0.09, wpb=64895, bsz=128, num_updates=4607, lr=9.99711e-05, gnorm=2.327, loss_scale=4, train_wall=11, gb_free=2.8, wall=52427
2021-06-19 09:12:44 | INFO | train_inner | epoch 002: 1637 / 3002 loss=2.792, ppl=6.92, wps=5822.7, ups=0.09, wpb=64703, bsz=128, num_updates=4608, lr=9.99711e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=52439
2021-06-19 09:12:55 | INFO | train_inner | epoch 002: 1638 / 3002 loss=2.872, ppl=7.32, wps=5873.3, ups=0.09, wpb=64812, bsz=128, num_updates=4609, lr=9.99711e-05, gnorm=2.539, loss_scale=4, train_wall=11, gb_free=2.8, wall=52450
2021-06-19 09:13:06 | INFO | train_inner | epoch 002: 1639 / 3002 loss=2.761, ppl=6.78, wps=5946, ups=0.09, wpb=64791, bsz=128, num_updates=4610, lr=9.99711e-05, gnorm=2.754, loss_scale=4, train_wall=10, gb_free=2.8, wall=52460
2021-06-19 09:13:17 | INFO | train_inner | epoch 002: 1640 / 3002 loss=2.716, ppl=6.57, wps=5807.7, ups=0.09, wpb=64906, bsz=128, num_updates=4611, lr=9.99711e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=52472
2021-06-19 09:13:28 | INFO | train_inner | epoch 002: 1641 / 3002 loss=2.995, ppl=7.97, wps=5854.7, ups=0.09, wpb=64744, bsz=128, num_updates=4612, lr=9.99711e-05, gnorm=2.383, loss_scale=4, train_wall=11, gb_free=2.8, wall=52483
2021-06-19 09:13:40 | INFO | train_inner | epoch 002: 1642 / 3002 loss=2.645, ppl=6.26, wps=5786.7, ups=0.09, wpb=64837, bsz=128, num_updates=4613, lr=9.99711e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=52494
2021-06-19 09:13:51 | INFO | train_inner | epoch 002: 1643 / 3002 loss=2.873, ppl=7.32, wps=5741.8, ups=0.09, wpb=64831, bsz=128, num_updates=4614, lr=9.99711e-05, gnorm=2.348, loss_scale=4, train_wall=11, gb_free=2.8, wall=52505
2021-06-19 09:14:02 | INFO | train_inner | epoch 002: 1644 / 3002 loss=2.706, ppl=6.52, wps=5903.4, ups=0.09, wpb=64816, bsz=128, num_updates=4615, lr=9.99711e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=52516
2021-06-19 09:14:13 | INFO | train_inner | epoch 002: 1645 / 3002 loss=2.781, ppl=6.88, wps=5951, ups=0.09, wpb=64896, bsz=128, num_updates=4616, lr=9.99711e-05, gnorm=2.445, loss_scale=4, train_wall=10, gb_free=2.8, wall=52527
2021-06-19 09:14:24 | INFO | train_inner | epoch 002: 1646 / 3002 loss=2.883, ppl=7.38, wps=5783.7, ups=0.09, wpb=64803, bsz=128, num_updates=4617, lr=9.99711e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=52538
2021-06-19 09:14:35 | INFO | train_inner | epoch 002: 1647 / 3002 loss=2.819, ppl=7.06, wps=5799.9, ups=0.09, wpb=64788, bsz=128, num_updates=4618, lr=9.99711e-05, gnorm=12.636, loss_scale=4, train_wall=11, gb_free=2.8, wall=52549
2021-06-19 09:14:46 | INFO | train_inner | epoch 002: 1648 / 3002 loss=2.819, ppl=7.06, wps=5913.2, ups=0.09, wpb=64777, bsz=128, num_updates=4619, lr=9.9971e-05, gnorm=2.205, loss_scale=4, train_wall=10, gb_free=2.8, wall=52560
2021-06-19 09:14:57 | INFO | train_inner | epoch 002: 1649 / 3002 loss=2.816, ppl=7.04, wps=5875.3, ups=0.09, wpb=64840, bsz=128, num_updates=4620, lr=9.9971e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=52571
2021-06-19 09:15:08 | INFO | train_inner | epoch 002: 1650 / 3002 loss=2.627, ppl=6.18, wps=5869.7, ups=0.09, wpb=64865, bsz=128, num_updates=4621, lr=9.9971e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=52583
2021-06-19 09:15:19 | INFO | train_inner | epoch 002: 1651 / 3002 loss=2.636, ppl=6.22, wps=5853.6, ups=0.09, wpb=64797, bsz=128, num_updates=4622, lr=9.9971e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=52594
2021-06-19 09:15:30 | INFO | train_inner | epoch 002: 1652 / 3002 loss=2.792, ppl=6.92, wps=5807.2, ups=0.09, wpb=64872, bsz=128, num_updates=4623, lr=9.9971e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=52605
2021-06-19 09:15:41 | INFO | train_inner | epoch 002: 1653 / 3002 loss=2.772, ppl=6.83, wps=5939.9, ups=0.09, wpb=64803, bsz=128, num_updates=4624, lr=9.9971e-05, gnorm=2.641, loss_scale=4, train_wall=10, gb_free=2.8, wall=52616
2021-06-19 09:15:52 | INFO | train_inner | epoch 002: 1654 / 3002 loss=2.823, ppl=7.07, wps=5810.6, ups=0.09, wpb=64765, bsz=128, num_updates=4625, lr=9.9971e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=52627
2021-06-19 09:16:04 | INFO | train_inner | epoch 002: 1655 / 3002 loss=2.789, ppl=6.91, wps=5682.1, ups=0.09, wpb=64860, bsz=128, num_updates=4626, lr=9.9971e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=52638
2021-06-19 09:16:15 | INFO | train_inner | epoch 002: 1656 / 3002 loss=2.85, ppl=7.21, wps=5850.7, ups=0.09, wpb=64844, bsz=128, num_updates=4627, lr=9.9971e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=52649
2021-06-19 09:16:26 | INFO | train_inner | epoch 002: 1657 / 3002 loss=2.858, ppl=7.25, wps=5911.5, ups=0.09, wpb=64745, bsz=128, num_updates=4628, lr=9.9971e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=52660
2021-06-19 09:16:37 | INFO | train_inner | epoch 002: 1658 / 3002 loss=2.703, ppl=6.51, wps=5807.3, ups=0.09, wpb=64761, bsz=128, num_updates=4629, lr=9.9971e-05, gnorm=2.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=52671
2021-06-19 09:16:48 | INFO | train_inner | epoch 002: 1659 / 3002 loss=2.77, ppl=6.82, wps=5729.6, ups=0.09, wpb=64736, bsz=128, num_updates=4630, lr=9.9971e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=52683
2021-06-19 09:17:00 | INFO | train_inner | epoch 002: 1660 / 3002 loss=2.653, ppl=6.29, wps=5782.9, ups=0.09, wpb=64830, bsz=128, num_updates=4631, lr=9.99709e-05, gnorm=2.397, loss_scale=4, train_wall=11, gb_free=2.8, wall=52694
2021-06-19 09:17:11 | INFO | train_inner | epoch 002: 1661 / 3002 loss=2.697, ppl=6.48, wps=5875.2, ups=0.09, wpb=64761, bsz=128, num_updates=4632, lr=9.99709e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=52705
2021-06-19 09:17:22 | INFO | train_inner | epoch 002: 1662 / 3002 loss=2.837, ppl=7.15, wps=5815.6, ups=0.09, wpb=64839, bsz=128, num_updates=4633, lr=9.99709e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=52716
2021-06-19 09:17:33 | INFO | train_inner | epoch 002: 1663 / 3002 loss=2.749, ppl=6.72, wps=5869.6, ups=0.09, wpb=64920, bsz=128, num_updates=4634, lr=9.99709e-05, gnorm=3.591, loss_scale=4, train_wall=11, gb_free=2.8, wall=52727
2021-06-19 09:17:44 | INFO | train_inner | epoch 002: 1664 / 3002 loss=2.786, ppl=6.9, wps=5919.7, ups=0.09, wpb=64855, bsz=128, num_updates=4635, lr=9.99709e-05, gnorm=2.214, loss_scale=4, train_wall=10, gb_free=2.8, wall=52738
2021-06-19 09:17:55 | INFO | train_inner | epoch 002: 1665 / 3002 loss=2.787, ppl=6.9, wps=6001.4, ups=0.09, wpb=64908, bsz=128, num_updates=4636, lr=9.99709e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=52749
2021-06-19 09:18:06 | INFO | train_inner | epoch 002: 1666 / 3002 loss=2.73, ppl=6.63, wps=5841.2, ups=0.09, wpb=64935, bsz=128, num_updates=4637, lr=9.99709e-05, gnorm=2.13, loss_scale=4, train_wall=11, gb_free=2.8, wall=52760
2021-06-19 09:18:17 | INFO | train_inner | epoch 002: 1667 / 3002 loss=2.62, ppl=6.15, wps=5813.6, ups=0.09, wpb=64869, bsz=128, num_updates=4638, lr=9.99709e-05, gnorm=2.627, loss_scale=4, train_wall=11, gb_free=2.8, wall=52771
2021-06-19 09:18:28 | INFO | train_inner | epoch 002: 1668 / 3002 loss=2.665, ppl=6.34, wps=5940.6, ups=0.09, wpb=64802, bsz=128, num_updates=4639, lr=9.99709e-05, gnorm=2.271, loss_scale=4, train_wall=10, gb_free=2.8, wall=52782
2021-06-19 09:18:39 | INFO | train_inner | epoch 002: 1669 / 3002 loss=2.733, ppl=6.65, wps=5842.4, ups=0.09, wpb=64804, bsz=128, num_updates=4640, lr=9.99709e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=52793
2021-06-19 09:18:50 | INFO | train_inner | epoch 002: 1670 / 3002 loss=2.836, ppl=7.14, wps=5847.7, ups=0.09, wpb=64761, bsz=128, num_updates=4641, lr=9.99709e-05, gnorm=2.402, loss_scale=4, train_wall=11, gb_free=2.8, wall=52804
2021-06-19 09:19:01 | INFO | train_inner | epoch 002: 1671 / 3002 loss=2.768, ppl=6.81, wps=5893.7, ups=0.09, wpb=64764, bsz=128, num_updates=4642, lr=9.99709e-05, gnorm=2.181, loss_scale=4, train_wall=11, gb_free=2.8, wall=52815
2021-06-19 09:19:12 | INFO | train_inner | epoch 002: 1672 / 3002 loss=2.781, ppl=6.87, wps=5743.5, ups=0.09, wpb=64908, bsz=128, num_updates=4643, lr=9.99709e-05, gnorm=2.466, loss_scale=4, train_wall=11, gb_free=2.8, wall=52827
2021-06-19 09:19:23 | INFO | train_inner | epoch 002: 1673 / 3002 loss=2.692, ppl=6.46, wps=5862.9, ups=0.09, wpb=64801, bsz=128, num_updates=4644, lr=9.99708e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=52838
2021-06-19 09:19:34 | INFO | train_inner | epoch 002: 1674 / 3002 loss=2.819, ppl=7.05, wps=5822.2, ups=0.09, wpb=64780, bsz=128, num_updates=4645, lr=9.99708e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=52849
2021-06-19 09:19:46 | INFO | train_inner | epoch 002: 1675 / 3002 loss=2.716, ppl=6.57, wps=5845.1, ups=0.09, wpb=64836, bsz=128, num_updates=4646, lr=9.99708e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=52860
2021-06-19 09:19:57 | INFO | train_inner | epoch 002: 1676 / 3002 loss=2.651, ppl=6.28, wps=5768.5, ups=0.09, wpb=64842, bsz=128, num_updates=4647, lr=9.99708e-05, gnorm=2.277, loss_scale=8, train_wall=11, gb_free=2.8, wall=52871
2021-06-19 09:20:08 | INFO | train_inner | epoch 002: 1677 / 3002 loss=2.779, ppl=6.86, wps=5848.6, ups=0.09, wpb=64822, bsz=128, num_updates=4648, lr=9.99708e-05, gnorm=2.259, loss_scale=8, train_wall=11, gb_free=2.8, wall=52882
2021-06-19 09:20:19 | INFO | train_inner | epoch 002: 1678 / 3002 loss=2.684, ppl=6.43, wps=5820.6, ups=0.09, wpb=64846, bsz=128, num_updates=4649, lr=9.99708e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=52893
2021-06-19 09:20:30 | INFO | train_inner | epoch 002: 1679 / 3002 loss=2.847, ppl=7.19, wps=5730, ups=0.09, wpb=64791, bsz=128, num_updates=4650, lr=9.99708e-05, gnorm=2.466, loss_scale=8, train_wall=11, gb_free=2.8, wall=52905
2021-06-19 09:20:41 | INFO | train_inner | epoch 002: 1680 / 3002 loss=2.786, ppl=6.9, wps=5831.5, ups=0.09, wpb=64866, bsz=128, num_updates=4651, lr=9.99708e-05, gnorm=2.207, loss_scale=8, train_wall=11, gb_free=2.8, wall=52916
2021-06-19 09:20:52 | INFO | train_inner | epoch 002: 1681 / 3002 loss=2.676, ppl=6.39, wps=5865.2, ups=0.09, wpb=64810, bsz=128, num_updates=4652, lr=9.99708e-05, gnorm=2.337, loss_scale=8, train_wall=11, gb_free=2.8, wall=52927
2021-06-19 09:21:04 | INFO | train_inner | epoch 002: 1682 / 3002 loss=2.757, ppl=6.76, wps=5747.7, ups=0.09, wpb=64846, bsz=128, num_updates=4653, lr=9.99708e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=52938
2021-06-19 09:21:15 | INFO | train_inner | epoch 002: 1683 / 3002 loss=2.856, ppl=7.24, wps=5906, ups=0.09, wpb=64840, bsz=128, num_updates=4654, lr=9.99708e-05, gnorm=2.297, loss_scale=8, train_wall=11, gb_free=2.8, wall=52949
2021-06-19 09:21:26 | INFO | train_inner | epoch 002: 1684 / 3002 loss=2.773, ppl=6.84, wps=5783.6, ups=0.09, wpb=64846, bsz=128, num_updates=4655, lr=9.99708e-05, gnorm=2.255, loss_scale=8, train_wall=11, gb_free=2.8, wall=52960
2021-06-19 09:21:37 | INFO | train_inner | epoch 002: 1685 / 3002 loss=2.782, ppl=6.88, wps=5891.3, ups=0.09, wpb=64748, bsz=128, num_updates=4656, lr=9.99707e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=52971
2021-06-19 09:21:48 | INFO | train_inner | epoch 002: 1686 / 3002 loss=2.749, ppl=6.72, wps=5813.5, ups=0.09, wpb=64849, bsz=128, num_updates=4657, lr=9.99707e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=52982
2021-06-19 09:21:59 | INFO | train_inner | epoch 002: 1687 / 3002 loss=2.862, ppl=7.27, wps=5854.2, ups=0.09, wpb=64829, bsz=128, num_updates=4658, lr=9.99707e-05, gnorm=2.319, loss_scale=8, train_wall=11, gb_free=2.8, wall=52993
2021-06-19 09:22:10 | INFO | train_inner | epoch 002: 1688 / 3002 loss=2.761, ppl=6.78, wps=5796, ups=0.09, wpb=64748, bsz=128, num_updates=4659, lr=9.99707e-05, gnorm=2.316, loss_scale=8, train_wall=11, gb_free=2.8, wall=53005
2021-06-19 09:22:21 | INFO | train_inner | epoch 002: 1689 / 3002 loss=2.551, ppl=5.86, wps=5843.9, ups=0.09, wpb=64875, bsz=128, num_updates=4660, lr=9.99707e-05, gnorm=2.161, loss_scale=8, train_wall=11, gb_free=2.8, wall=53016
2021-06-19 09:22:32 | INFO | train_inner | epoch 002: 1690 / 3002 loss=2.809, ppl=7.01, wps=5875.3, ups=0.09, wpb=64796, bsz=128, num_updates=4661, lr=9.99707e-05, gnorm=2.332, loss_scale=8, train_wall=11, gb_free=2.8, wall=53027
2021-06-19 09:22:44 | INFO | train_inner | epoch 002: 1691 / 3002 loss=2.725, ppl=6.61, wps=5820.5, ups=0.09, wpb=64812, bsz=128, num_updates=4662, lr=9.99707e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=53038
2021-06-19 09:22:55 | INFO | train_inner | epoch 002: 1692 / 3002 loss=2.662, ppl=6.33, wps=5798.4, ups=0.09, wpb=64793, bsz=128, num_updates=4663, lr=9.99707e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=53049
2021-06-19 09:23:06 | INFO | train_inner | epoch 002: 1693 / 3002 loss=2.62, ppl=6.15, wps=5911, ups=0.09, wpb=64825, bsz=128, num_updates=4664, lr=9.99707e-05, gnorm=2.304, loss_scale=8, train_wall=10, gb_free=2.8, wall=53060
2021-06-19 09:23:17 | INFO | train_inner | epoch 002: 1694 / 3002 loss=2.734, ppl=6.65, wps=5822.5, ups=0.09, wpb=64849, bsz=128, num_updates=4665, lr=9.99707e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=53071
2021-06-19 09:23:28 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 09:23:39 | INFO | train_inner | epoch 002: 1696 / 3002 loss=2.824, ppl=7.08, wps=2891.5, ups=0.04, wpb=64803, bsz=128, num_updates=4666, lr=9.99707e-05, gnorm=2.259, loss_scale=4, train_wall=21, gb_free=2.8, wall=53094
2021-06-19 09:23:50 | INFO | train_inner | epoch 002: 1697 / 3002 loss=2.614, ppl=6.12, wps=5846.6, ups=0.09, wpb=64873, bsz=128, num_updates=4667, lr=9.99707e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=53105
2021-06-19 09:24:02 | INFO | train_inner | epoch 002: 1698 / 3002 loss=2.76, ppl=6.77, wps=5697.1, ups=0.09, wpb=64754, bsz=128, num_updates=4668, lr=9.99707e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=53116
2021-06-19 09:24:13 | INFO | train_inner | epoch 002: 1699 / 3002 loss=2.751, ppl=6.73, wps=5879.8, ups=0.09, wpb=64903, bsz=128, num_updates=4669, lr=9.99706e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=53127
2021-06-19 09:24:24 | INFO | train_inner | epoch 002: 1700 / 3002 loss=2.746, ppl=6.71, wps=5861.3, ups=0.09, wpb=64770, bsz=128, num_updates=4670, lr=9.99706e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=53138
2021-06-19 09:24:35 | INFO | train_inner | epoch 002: 1701 / 3002 loss=2.812, ppl=7.02, wps=5785.1, ups=0.09, wpb=64769, bsz=128, num_updates=4671, lr=9.99706e-05, gnorm=2.389, loss_scale=4, train_wall=11, gb_free=2.8, wall=53149
2021-06-19 09:24:46 | INFO | train_inner | epoch 002: 1702 / 3002 loss=2.619, ppl=6.14, wps=5971.4, ups=0.09, wpb=64895, bsz=128, num_updates=4672, lr=9.99706e-05, gnorm=2.226, loss_scale=4, train_wall=10, gb_free=2.8, wall=53160
2021-06-19 09:24:57 | INFO | train_inner | epoch 002: 1703 / 3002 loss=2.752, ppl=6.73, wps=5966, ups=0.09, wpb=64877, bsz=128, num_updates=4673, lr=9.99706e-05, gnorm=2.337, loss_scale=4, train_wall=10, gb_free=2.8, wall=53171
2021-06-19 09:25:08 | INFO | train_inner | epoch 002: 1704 / 3002 loss=2.607, ppl=6.09, wps=5853.4, ups=0.09, wpb=64843, bsz=128, num_updates=4674, lr=9.99706e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=53182
2021-06-19 09:25:19 | INFO | train_inner | epoch 002: 1705 / 3002 loss=2.695, ppl=6.48, wps=5853.5, ups=0.09, wpb=64857, bsz=128, num_updates=4675, lr=9.99706e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=53193
2021-06-19 09:25:30 | INFO | train_inner | epoch 002: 1706 / 3002 loss=2.685, ppl=6.43, wps=5771.5, ups=0.09, wpb=64840, bsz=128, num_updates=4676, lr=9.99706e-05, gnorm=2.21, loss_scale=4, train_wall=11, gb_free=2.8, wall=53204
2021-06-19 09:25:41 | INFO | train_inner | epoch 002: 1707 / 3002 loss=2.781, ppl=6.87, wps=5832, ups=0.09, wpb=64707, bsz=128, num_updates=4677, lr=9.99706e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=53216
2021-06-19 09:25:53 | INFO | train_inner | epoch 002: 1708 / 3002 loss=2.792, ppl=6.93, wps=5760.1, ups=0.09, wpb=64885, bsz=128, num_updates=4678, lr=9.99706e-05, gnorm=2.51, loss_scale=4, train_wall=11, gb_free=2.8, wall=53227
2021-06-19 09:26:03 | INFO | train_inner | epoch 002: 1709 / 3002 loss=2.785, ppl=6.89, wps=5959, ups=0.09, wpb=64905, bsz=128, num_updates=4679, lr=9.99706e-05, gnorm=2.129, loss_scale=4, train_wall=10, gb_free=2.8, wall=53238
2021-06-19 09:26:15 | INFO | train_inner | epoch 002: 1710 / 3002 loss=2.719, ppl=6.58, wps=5735.6, ups=0.09, wpb=64796, bsz=128, num_updates=4680, lr=9.99706e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=53249
2021-06-19 09:26:26 | INFO | train_inner | epoch 002: 1711 / 3002 loss=2.716, ppl=6.57, wps=5752.3, ups=0.09, wpb=64786, bsz=128, num_updates=4681, lr=9.99705e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=53260
2021-06-19 09:26:37 | INFO | train_inner | epoch 002: 1712 / 3002 loss=2.708, ppl=6.53, wps=5887.6, ups=0.09, wpb=64827, bsz=128, num_updates=4682, lr=9.99705e-05, gnorm=3.823, loss_scale=4, train_wall=11, gb_free=2.8, wall=53271
2021-06-19 09:26:48 | INFO | train_inner | epoch 002: 1713 / 3002 loss=2.667, ppl=6.35, wps=5798.7, ups=0.09, wpb=64812, bsz=128, num_updates=4683, lr=9.99705e-05, gnorm=2.586, loss_scale=4, train_wall=11, gb_free=2.8, wall=53282
2021-06-19 09:26:59 | INFO | train_inner | epoch 002: 1714 / 3002 loss=2.762, ppl=6.78, wps=5946.1, ups=0.09, wpb=64846, bsz=128, num_updates=4684, lr=9.99705e-05, gnorm=2.259, loss_scale=4, train_wall=10, gb_free=2.8, wall=53293
2021-06-19 09:27:10 | INFO | train_inner | epoch 002: 1715 / 3002 loss=2.861, ppl=7.26, wps=5930.8, ups=0.09, wpb=64901, bsz=128, num_updates=4685, lr=9.99705e-05, gnorm=2.57, loss_scale=4, train_wall=10, gb_free=2.8, wall=53304
2021-06-19 09:27:21 | INFO | train_inner | epoch 002: 1716 / 3002 loss=2.742, ppl=6.69, wps=5820, ups=0.09, wpb=64801, bsz=128, num_updates=4686, lr=9.99705e-05, gnorm=2.304, loss_scale=4, train_wall=11, gb_free=2.8, wall=53315
2021-06-19 09:27:32 | INFO | train_inner | epoch 002: 1717 / 3002 loss=2.871, ppl=7.31, wps=5865.6, ups=0.09, wpb=64767, bsz=128, num_updates=4687, lr=9.99705e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=53327
2021-06-19 09:27:43 | INFO | train_inner | epoch 002: 1718 / 3002 loss=2.826, ppl=7.09, wps=5963.7, ups=0.09, wpb=64935, bsz=128, num_updates=4688, lr=9.99705e-05, gnorm=2.265, loss_scale=4, train_wall=10, gb_free=2.8, wall=53337
2021-06-19 09:27:54 | INFO | train_inner | epoch 002: 1719 / 3002 loss=2.955, ppl=7.76, wps=5849.3, ups=0.09, wpb=64804, bsz=128, num_updates=4689, lr=9.99705e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=53348
2021-06-19 09:28:05 | INFO | train_inner | epoch 002: 1720 / 3002 loss=2.75, ppl=6.73, wps=5743.1, ups=0.09, wpb=64901, bsz=128, num_updates=4690, lr=9.99705e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=53360
2021-06-19 09:28:17 | INFO | train_inner | epoch 002: 1721 / 3002 loss=2.634, ppl=6.21, wps=5810.9, ups=0.09, wpb=64852, bsz=128, num_updates=4691, lr=9.99705e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=53371
2021-06-19 09:28:28 | INFO | train_inner | epoch 002: 1722 / 3002 loss=2.773, ppl=6.84, wps=5955.9, ups=0.09, wpb=64881, bsz=128, num_updates=4692, lr=9.99705e-05, gnorm=2.251, loss_scale=4, train_wall=10, gb_free=2.8, wall=53382
2021-06-19 09:28:39 | INFO | train_inner | epoch 002: 1723 / 3002 loss=2.862, ppl=7.27, wps=5900.7, ups=0.09, wpb=64864, bsz=128, num_updates=4693, lr=9.99705e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=53393
2021-06-19 09:28:50 | INFO | train_inner | epoch 002: 1724 / 3002 loss=2.709, ppl=6.54, wps=5814.5, ups=0.09, wpb=64892, bsz=128, num_updates=4694, lr=9.99704e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=53404
2021-06-19 09:29:01 | INFO | train_inner | epoch 002: 1725 / 3002 loss=2.568, ppl=5.93, wps=5971.9, ups=0.09, wpb=64832, bsz=128, num_updates=4695, lr=9.99704e-05, gnorm=2.199, loss_scale=4, train_wall=10, gb_free=2.8, wall=53415
2021-06-19 09:29:12 | INFO | train_inner | epoch 002: 1726 / 3002 loss=2.871, ppl=7.32, wps=5786.9, ups=0.09, wpb=64792, bsz=128, num_updates=4696, lr=9.99704e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=53426
2021-06-19 09:29:23 | INFO | train_inner | epoch 002: 1727 / 3002 loss=2.716, ppl=6.57, wps=5850.4, ups=0.09, wpb=64803, bsz=128, num_updates=4697, lr=9.99704e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=53437
2021-06-19 09:29:34 | INFO | train_inner | epoch 002: 1728 / 3002 loss=2.833, ppl=7.13, wps=5914.8, ups=0.09, wpb=64813, bsz=128, num_updates=4698, lr=9.99704e-05, gnorm=2.233, loss_scale=4, train_wall=10, gb_free=2.8, wall=53448
2021-06-19 09:29:45 | INFO | train_inner | epoch 002: 1729 / 3002 loss=2.861, ppl=7.27, wps=5868.3, ups=0.09, wpb=64789, bsz=128, num_updates=4699, lr=9.99704e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=53459
2021-06-19 09:29:56 | INFO | train_inner | epoch 002: 1730 / 3002 loss=2.72, ppl=6.59, wps=5960.5, ups=0.09, wpb=64807, bsz=128, num_updates=4700, lr=9.99704e-05, gnorm=2.464, loss_scale=4, train_wall=10, gb_free=2.8, wall=53470
2021-06-19 09:30:07 | INFO | train_inner | epoch 002: 1731 / 3002 loss=2.698, ppl=6.49, wps=5854.1, ups=0.09, wpb=64877, bsz=128, num_updates=4701, lr=9.99704e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=53481
2021-06-19 09:30:18 | INFO | train_inner | epoch 002: 1732 / 3002 loss=2.737, ppl=6.67, wps=5901.7, ups=0.09, wpb=64923, bsz=128, num_updates=4702, lr=9.99704e-05, gnorm=2.183, loss_scale=4, train_wall=11, gb_free=2.8, wall=53492
2021-06-19 09:30:29 | INFO | train_inner | epoch 002: 1733 / 3002 loss=2.671, ppl=6.37, wps=5920.7, ups=0.09, wpb=64763, bsz=128, num_updates=4703, lr=9.99704e-05, gnorm=2.193, loss_scale=4, train_wall=10, gb_free=2.8, wall=53503
2021-06-19 09:30:40 | INFO | train_inner | epoch 002: 1734 / 3002 loss=2.754, ppl=6.74, wps=5904.2, ups=0.09, wpb=64793, bsz=128, num_updates=4704, lr=9.99704e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=53514
2021-06-19 09:30:51 | INFO | train_inner | epoch 002: 1735 / 3002 loss=2.696, ppl=6.48, wps=5773.4, ups=0.09, wpb=64909, bsz=128, num_updates=4705, lr=9.99704e-05, gnorm=2.281, loss_scale=4, train_wall=11, gb_free=2.8, wall=53525
2021-06-19 09:31:02 | INFO | train_inner | epoch 002: 1736 / 3002 loss=2.667, ppl=6.35, wps=5822.4, ups=0.09, wpb=64822, bsz=128, num_updates=4706, lr=9.99703e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=53536
2021-06-19 09:31:13 | INFO | train_inner | epoch 002: 1737 / 3002 loss=2.534, ppl=5.79, wps=5768, ups=0.09, wpb=64799, bsz=128, num_updates=4707, lr=9.99703e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=53548
2021-06-19 09:31:24 | INFO | train_inner | epoch 002: 1738 / 3002 loss=2.71, ppl=6.54, wps=5813.4, ups=0.09, wpb=64820, bsz=128, num_updates=4708, lr=9.99703e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=53559
2021-06-19 09:31:36 | INFO | train_inner | epoch 002: 1739 / 3002 loss=2.565, ppl=5.92, wps=5768.5, ups=0.09, wpb=64875, bsz=128, num_updates=4709, lr=9.99703e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=53570
2021-06-19 09:31:47 | INFO | train_inner | epoch 002: 1740 / 3002 loss=2.66, ppl=6.32, wps=5915.7, ups=0.09, wpb=64902, bsz=128, num_updates=4710, lr=9.99703e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=53581
2021-06-19 09:31:58 | INFO | train_inner | epoch 002: 1741 / 3002 loss=2.604, ppl=6.08, wps=5792.2, ups=0.09, wpb=64914, bsz=128, num_updates=4711, lr=9.99703e-05, gnorm=2.378, loss_scale=4, train_wall=11, gb_free=2.8, wall=53592
2021-06-19 09:32:09 | INFO | train_inner | epoch 002: 1742 / 3002 loss=2.759, ppl=6.77, wps=5936.1, ups=0.09, wpb=64901, bsz=128, num_updates=4712, lr=9.99703e-05, gnorm=2.254, loss_scale=4, train_wall=10, gb_free=2.8, wall=53603
2021-06-19 09:32:20 | INFO | train_inner | epoch 002: 1743 / 3002 loss=2.817, ppl=7.05, wps=5923.1, ups=0.09, wpb=64785, bsz=128, num_updates=4713, lr=9.99703e-05, gnorm=2.235, loss_scale=4, train_wall=10, gb_free=2.8, wall=53614
2021-06-19 09:32:31 | INFO | train_inner | epoch 002: 1744 / 3002 loss=2.707, ppl=6.53, wps=5815.1, ups=0.09, wpb=64879, bsz=128, num_updates=4714, lr=9.99703e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=53625
2021-06-19 09:32:42 | INFO | train_inner | epoch 002: 1745 / 3002 loss=2.819, ppl=7.06, wps=5833.1, ups=0.09, wpb=64856, bsz=128, num_updates=4715, lr=9.99703e-05, gnorm=2.328, loss_scale=4, train_wall=11, gb_free=2.8, wall=53636
2021-06-19 09:32:53 | INFO | train_inner | epoch 002: 1746 / 3002 loss=2.812, ppl=7.02, wps=5965.6, ups=0.09, wpb=64855, bsz=128, num_updates=4716, lr=9.99703e-05, gnorm=2.337, loss_scale=4, train_wall=10, gb_free=2.8, wall=53647
2021-06-19 09:33:04 | INFO | train_inner | epoch 002: 1747 / 3002 loss=2.772, ppl=6.83, wps=5827.5, ups=0.09, wpb=64863, bsz=128, num_updates=4717, lr=9.99703e-05, gnorm=2.335, loss_scale=4, train_wall=11, gb_free=2.8, wall=53658
2021-06-19 09:33:15 | INFO | train_inner | epoch 002: 1748 / 3002 loss=2.71, ppl=6.54, wps=5892.3, ups=0.09, wpb=64814, bsz=128, num_updates=4718, lr=9.99703e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=53669
2021-06-19 09:33:26 | INFO | train_inner | epoch 002: 1749 / 3002 loss=2.63, ppl=6.19, wps=5908.3, ups=0.09, wpb=64751, bsz=128, num_updates=4719, lr=9.99702e-05, gnorm=2.407, loss_scale=4, train_wall=11, gb_free=2.8, wall=53680
2021-06-19 09:33:37 | INFO | train_inner | epoch 002: 1750 / 3002 loss=2.854, ppl=7.23, wps=5791.1, ups=0.09, wpb=64807, bsz=128, num_updates=4720, lr=9.99702e-05, gnorm=2.279, loss_scale=4, train_wall=11, gb_free=2.8, wall=53691
2021-06-19 09:33:48 | INFO | train_inner | epoch 002: 1751 / 3002 loss=2.892, ppl=7.43, wps=5903.9, ups=0.09, wpb=64835, bsz=128, num_updates=4721, lr=9.99702e-05, gnorm=2.316, loss_scale=4, train_wall=11, gb_free=2.8, wall=53702
2021-06-19 09:33:59 | INFO | train_inner | epoch 002: 1752 / 3002 loss=2.763, ppl=6.79, wps=6049.4, ups=0.09, wpb=64892, bsz=128, num_updates=4722, lr=9.99702e-05, gnorm=2.296, loss_scale=4, train_wall=10, gb_free=2.8, wall=53713
2021-06-19 09:34:10 | INFO | train_inner | epoch 002: 1753 / 3002 loss=2.802, ppl=6.98, wps=5814.5, ups=0.09, wpb=64742, bsz=128, num_updates=4723, lr=9.99702e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=53724
2021-06-19 09:34:21 | INFO | train_inner | epoch 002: 1754 / 3002 loss=2.712, ppl=6.55, wps=5813.4, ups=0.09, wpb=64831, bsz=128, num_updates=4724, lr=9.99702e-05, gnorm=2.414, loss_scale=4, train_wall=11, gb_free=2.8, wall=53735
2021-06-19 09:34:32 | INFO | train_inner | epoch 002: 1755 / 3002 loss=2.761, ppl=6.78, wps=5746.3, ups=0.09, wpb=64706, bsz=128, num_updates=4725, lr=9.99702e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=53747
2021-06-19 09:34:43 | INFO | train_inner | epoch 002: 1756 / 3002 loss=2.615, ppl=6.13, wps=5851.7, ups=0.09, wpb=64832, bsz=128, num_updates=4726, lr=9.99702e-05, gnorm=2.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=53758
2021-06-19 09:34:55 | INFO | train_inner | epoch 002: 1757 / 3002 loss=2.862, ppl=7.27, wps=5853, ups=0.09, wpb=64802, bsz=128, num_updates=4727, lr=9.99702e-05, gnorm=2.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=53769
2021-06-19 09:35:06 | INFO | train_inner | epoch 002: 1758 / 3002 loss=2.728, ppl=6.63, wps=5884.6, ups=0.09, wpb=64809, bsz=128, num_updates=4728, lr=9.99702e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=53780
2021-06-19 09:35:17 | INFO | train_inner | epoch 002: 1759 / 3002 loss=2.661, ppl=6.33, wps=5745, ups=0.09, wpb=64774, bsz=128, num_updates=4729, lr=9.99702e-05, gnorm=2.146, loss_scale=4, train_wall=11, gb_free=2.8, wall=53791
2021-06-19 09:35:28 | INFO | train_inner | epoch 002: 1760 / 3002 loss=2.83, ppl=7.11, wps=5915.9, ups=0.09, wpb=64765, bsz=128, num_updates=4730, lr=9.99702e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=53802
2021-06-19 09:35:39 | INFO | train_inner | epoch 002: 1761 / 3002 loss=2.684, ppl=6.43, wps=5813.8, ups=0.09, wpb=64820, bsz=128, num_updates=4731, lr=9.99701e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=53813
2021-06-19 09:35:50 | INFO | train_inner | epoch 002: 1762 / 3002 loss=2.685, ppl=6.43, wps=5928.6, ups=0.09, wpb=64811, bsz=128, num_updates=4732, lr=9.99701e-05, gnorm=2.201, loss_scale=4, train_wall=10, gb_free=2.8, wall=53824
2021-06-19 09:36:01 | INFO | train_inner | epoch 002: 1763 / 3002 loss=2.619, ppl=6.14, wps=5923.2, ups=0.09, wpb=64890, bsz=128, num_updates=4733, lr=9.99701e-05, gnorm=2.335, loss_scale=4, train_wall=10, gb_free=2.8, wall=53835
2021-06-19 09:36:12 | INFO | train_inner | epoch 002: 1764 / 3002 loss=2.786, ppl=6.9, wps=5900.3, ups=0.09, wpb=64781, bsz=128, num_updates=4734, lr=9.99701e-05, gnorm=2.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=53846
2021-06-19 09:36:23 | INFO | train_inner | epoch 002: 1765 / 3002 loss=2.732, ppl=6.64, wps=5852.7, ups=0.09, wpb=64736, bsz=128, num_updates=4735, lr=9.99701e-05, gnorm=2.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=53857
2021-06-19 09:36:34 | INFO | train_inner | epoch 002: 1766 / 3002 loss=2.7, ppl=6.5, wps=5812.2, ups=0.09, wpb=64782, bsz=128, num_updates=4736, lr=9.99701e-05, gnorm=2.416, loss_scale=4, train_wall=11, gb_free=2.8, wall=53868
2021-06-19 09:36:45 | INFO | train_inner | epoch 002: 1767 / 3002 loss=3.043, ppl=8.24, wps=5833.7, ups=0.09, wpb=64676, bsz=128, num_updates=4737, lr=9.99701e-05, gnorm=2.477, loss_scale=4, train_wall=11, gb_free=2.8, wall=53879
2021-06-19 09:36:56 | INFO | train_inner | epoch 002: 1768 / 3002 loss=2.931, ppl=7.62, wps=5928.7, ups=0.09, wpb=64819, bsz=128, num_updates=4738, lr=9.99701e-05, gnorm=2.293, loss_scale=4, train_wall=10, gb_free=2.8, wall=53890
2021-06-19 09:37:07 | INFO | train_inner | epoch 002: 1769 / 3002 loss=2.748, ppl=6.72, wps=5945.7, ups=0.09, wpb=64774, bsz=128, num_updates=4739, lr=9.99701e-05, gnorm=2.199, loss_scale=4, train_wall=10, gb_free=2.8, wall=53901
2021-06-19 09:37:18 | INFO | train_inner | epoch 002: 1770 / 3002 loss=2.668, ppl=6.36, wps=5834.1, ups=0.09, wpb=64947, bsz=128, num_updates=4740, lr=9.99701e-05, gnorm=2.346, loss_scale=4, train_wall=11, gb_free=2.8, wall=53912
2021-06-19 09:37:29 | INFO | train_inner | epoch 002: 1771 / 3002 loss=2.841, ppl=7.16, wps=5829.9, ups=0.09, wpb=64857, bsz=128, num_updates=4741, lr=9.99701e-05, gnorm=2.326, loss_scale=4, train_wall=11, gb_free=2.8, wall=53924
2021-06-19 09:37:40 | INFO | train_inner | epoch 002: 1772 / 3002 loss=2.633, ppl=6.2, wps=5834.4, ups=0.09, wpb=64797, bsz=128, num_updates=4742, lr=9.99701e-05, gnorm=2.304, loss_scale=4, train_wall=11, gb_free=2.8, wall=53935
2021-06-19 09:37:51 | INFO | train_inner | epoch 002: 1773 / 3002 loss=2.946, ppl=7.7, wps=5816.5, ups=0.09, wpb=64846, bsz=128, num_updates=4743, lr=9.99701e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=53946
2021-06-19 09:38:02 | INFO | train_inner | epoch 002: 1774 / 3002 loss=3.005, ppl=8.03, wps=5969, ups=0.09, wpb=64739, bsz=128, num_updates=4744, lr=9.997e-05, gnorm=2.246, loss_scale=4, train_wall=10, gb_free=2.8, wall=53957
2021-06-19 09:38:13 | INFO | train_inner | epoch 002: 1775 / 3002 loss=2.772, ppl=6.83, wps=5840.5, ups=0.09, wpb=64867, bsz=128, num_updates=4745, lr=9.997e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=53968
2021-06-19 09:38:25 | INFO | train_inner | epoch 002: 1776 / 3002 loss=2.758, ppl=6.77, wps=5842.3, ups=0.09, wpb=64855, bsz=128, num_updates=4746, lr=9.997e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=53979
2021-06-19 09:38:36 | INFO | train_inner | epoch 002: 1777 / 3002 loss=2.73, ppl=6.63, wps=5876.6, ups=0.09, wpb=64918, bsz=128, num_updates=4747, lr=9.997e-05, gnorm=2.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=53990
2021-06-19 09:38:46 | INFO | train_inner | epoch 002: 1778 / 3002 loss=2.851, ppl=7.22, wps=5922.6, ups=0.09, wpb=64845, bsz=128, num_updates=4748, lr=9.997e-05, gnorm=2.348, loss_scale=4, train_wall=11, gb_free=2.8, wall=54001
2021-06-19 09:38:58 | INFO | train_inner | epoch 002: 1779 / 3002 loss=2.746, ppl=6.71, wps=5781.7, ups=0.09, wpb=64804, bsz=128, num_updates=4749, lr=9.997e-05, gnorm=2.695, loss_scale=4, train_wall=11, gb_free=2.8, wall=54012
2021-06-19 09:39:09 | INFO | train_inner | epoch 002: 1780 / 3002 loss=2.771, ppl=6.83, wps=5824.4, ups=0.09, wpb=64791, bsz=128, num_updates=4750, lr=9.997e-05, gnorm=3.57, loss_scale=4, train_wall=11, gb_free=2.8, wall=54023
2021-06-19 09:39:20 | INFO | train_inner | epoch 002: 1781 / 3002 loss=2.714, ppl=6.56, wps=5751.1, ups=0.09, wpb=64833, bsz=128, num_updates=4751, lr=9.997e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=54034
2021-06-19 09:39:31 | INFO | train_inner | epoch 002: 1782 / 3002 loss=2.75, ppl=6.73, wps=5753.6, ups=0.09, wpb=64923, bsz=128, num_updates=4752, lr=9.997e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=54046
2021-06-19 09:39:42 | INFO | train_inner | epoch 002: 1783 / 3002 loss=2.744, ppl=6.7, wps=5878.5, ups=0.09, wpb=64864, bsz=128, num_updates=4753, lr=9.997e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=54057
2021-06-19 09:39:54 | INFO | train_inner | epoch 002: 1784 / 3002 loss=2.929, ppl=7.62, wps=5702.5, ups=0.09, wpb=64772, bsz=128, num_updates=4754, lr=9.997e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=54068
2021-06-19 09:40:05 | INFO | train_inner | epoch 002: 1785 / 3002 loss=2.733, ppl=6.65, wps=5744.3, ups=0.09, wpb=64752, bsz=128, num_updates=4755, lr=9.997e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=54079
2021-06-19 09:40:16 | INFO | train_inner | epoch 002: 1786 / 3002 loss=2.754, ppl=6.75, wps=5805.1, ups=0.09, wpb=64805, bsz=128, num_updates=4756, lr=9.99699e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=54091
2021-06-19 09:40:28 | INFO | train_inner | epoch 002: 1787 / 3002 loss=2.888, ppl=7.4, wps=5715.4, ups=0.09, wpb=64825, bsz=128, num_updates=4757, lr=9.99699e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=54102
2021-06-19 09:40:39 | INFO | train_inner | epoch 002: 1788 / 3002 loss=2.737, ppl=6.67, wps=5838.7, ups=0.09, wpb=64835, bsz=128, num_updates=4758, lr=9.99699e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=54113
2021-06-19 09:40:50 | INFO | train_inner | epoch 002: 1789 / 3002 loss=2.958, ppl=7.77, wps=5816.3, ups=0.09, wpb=64791, bsz=128, num_updates=4759, lr=9.99699e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=54124
2021-06-19 09:41:01 | INFO | train_inner | epoch 002: 1790 / 3002 loss=2.727, ppl=6.62, wps=5784.4, ups=0.09, wpb=64855, bsz=128, num_updates=4760, lr=9.99699e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=54135
2021-06-19 09:41:12 | INFO | train_inner | epoch 002: 1791 / 3002 loss=2.701, ppl=6.5, wps=5772.7, ups=0.09, wpb=64732, bsz=128, num_updates=4761, lr=9.99699e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=54147
2021-06-19 09:41:24 | INFO | train_inner | epoch 002: 1792 / 3002 loss=2.823, ppl=7.08, wps=5735.1, ups=0.09, wpb=64778, bsz=128, num_updates=4762, lr=9.99699e-05, gnorm=2.755, loss_scale=4, train_wall=11, gb_free=2.8, wall=54158
2021-06-19 09:41:35 | INFO | train_inner | epoch 002: 1793 / 3002 loss=2.667, ppl=6.35, wps=5847.8, ups=0.09, wpb=64802, bsz=128, num_updates=4763, lr=9.99699e-05, gnorm=2.582, loss_scale=4, train_wall=11, gb_free=2.8, wall=54169
2021-06-19 09:41:46 | INFO | train_inner | epoch 002: 1794 / 3002 loss=2.61, ppl=6.1, wps=5812.7, ups=0.09, wpb=64850, bsz=128, num_updates=4764, lr=9.99699e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=54180
2021-06-19 09:41:57 | INFO | train_inner | epoch 002: 1795 / 3002 loss=2.722, ppl=6.6, wps=5777.1, ups=0.09, wpb=64796, bsz=128, num_updates=4765, lr=9.99699e-05, gnorm=2.422, loss_scale=4, train_wall=11, gb_free=2.8, wall=54191
2021-06-19 09:42:08 | INFO | train_inner | epoch 002: 1796 / 3002 loss=3.019, ppl=8.1, wps=5850.3, ups=0.09, wpb=64886, bsz=128, num_updates=4766, lr=9.99699e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=54202
2021-06-19 09:42:19 | INFO | train_inner | epoch 002: 1797 / 3002 loss=2.741, ppl=6.69, wps=5874.6, ups=0.09, wpb=64818, bsz=128, num_updates=4767, lr=9.99699e-05, gnorm=4.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=54213
2021-06-19 09:42:30 | INFO | train_inner | epoch 002: 1798 / 3002 loss=2.68, ppl=6.41, wps=5930.7, ups=0.09, wpb=64825, bsz=128, num_updates=4768, lr=9.99699e-05, gnorm=2.614, loss_scale=4, train_wall=10, gb_free=2.8, wall=54224
2021-06-19 09:42:41 | INFO | train_inner | epoch 002: 1799 / 3002 loss=2.735, ppl=6.66, wps=5840, ups=0.09, wpb=64757, bsz=128, num_updates=4769, lr=9.99698e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=54235
2021-06-19 09:42:52 | INFO | train_inner | epoch 002: 1800 / 3002 loss=2.688, ppl=6.44, wps=5833.6, ups=0.09, wpb=64704, bsz=128, num_updates=4770, lr=9.99698e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=54247
2021-06-19 09:43:03 | INFO | train_inner | epoch 002: 1801 / 3002 loss=2.867, ppl=7.29, wps=5934.1, ups=0.09, wpb=64738, bsz=128, num_updates=4771, lr=9.99698e-05, gnorm=2.825, loss_scale=4, train_wall=10, gb_free=2.8, wall=54257
2021-06-19 09:43:14 | INFO | train_inner | epoch 002: 1802 / 3002 loss=2.79, ppl=6.92, wps=5808.3, ups=0.09, wpb=64787, bsz=128, num_updates=4772, lr=9.99698e-05, gnorm=2.354, loss_scale=4, train_wall=11, gb_free=2.8, wall=54269
2021-06-19 09:43:25 | INFO | train_inner | epoch 002: 1803 / 3002 loss=2.696, ppl=6.48, wps=5821.3, ups=0.09, wpb=64850, bsz=128, num_updates=4773, lr=9.99698e-05, gnorm=2.489, loss_scale=4, train_wall=11, gb_free=2.8, wall=54280
2021-06-19 09:43:37 | INFO | train_inner | epoch 002: 1804 / 3002 loss=2.796, ppl=6.94, wps=5781.9, ups=0.09, wpb=64824, bsz=128, num_updates=4774, lr=9.99698e-05, gnorm=5.679, loss_scale=4, train_wall=11, gb_free=2.8, wall=54291
2021-06-19 09:43:48 | INFO | train_inner | epoch 002: 1805 / 3002 loss=2.719, ppl=6.59, wps=5770.7, ups=0.09, wpb=64800, bsz=128, num_updates=4775, lr=9.99698e-05, gnorm=6.844, loss_scale=4, train_wall=11, gb_free=2.8, wall=54302
2021-06-19 09:43:59 | INFO | train_inner | epoch 002: 1806 / 3002 loss=2.692, ppl=6.46, wps=5878, ups=0.09, wpb=64813, bsz=128, num_updates=4776, lr=9.99698e-05, gnorm=2.462, loss_scale=4, train_wall=11, gb_free=2.8, wall=54313
2021-06-19 09:44:10 | INFO | train_inner | epoch 002: 1807 / 3002 loss=2.865, ppl=7.29, wps=5800.1, ups=0.09, wpb=64771, bsz=128, num_updates=4777, lr=9.99698e-05, gnorm=2.39, loss_scale=4, train_wall=11, gb_free=2.8, wall=54324
2021-06-19 09:44:21 | INFO | train_inner | epoch 002: 1808 / 3002 loss=2.713, ppl=6.56, wps=5850.8, ups=0.09, wpb=64869, bsz=128, num_updates=4778, lr=9.99698e-05, gnorm=2.766, loss_scale=4, train_wall=11, gb_free=2.8, wall=54335
2021-06-19 09:44:32 | INFO | train_inner | epoch 002: 1809 / 3002 loss=2.917, ppl=7.55, wps=5912.2, ups=0.09, wpb=64744, bsz=128, num_updates=4779, lr=9.99698e-05, gnorm=2.31, loss_scale=4, train_wall=11, gb_free=2.8, wall=54346
2021-06-19 09:44:43 | INFO | train_inner | epoch 002: 1810 / 3002 loss=2.927, ppl=7.6, wps=5872.9, ups=0.09, wpb=64833, bsz=128, num_updates=4780, lr=9.99698e-05, gnorm=2.577, loss_scale=4, train_wall=11, gb_free=2.8, wall=54357
2021-06-19 09:44:54 | INFO | train_inner | epoch 002: 1811 / 3002 loss=2.562, ppl=5.9, wps=5847.3, ups=0.09, wpb=64889, bsz=128, num_updates=4781, lr=9.99697e-05, gnorm=2.387, loss_scale=4, train_wall=11, gb_free=2.8, wall=54369
2021-06-19 09:45:05 | INFO | train_inner | epoch 002: 1812 / 3002 loss=2.871, ppl=7.31, wps=5761, ups=0.09, wpb=64773, bsz=128, num_updates=4782, lr=9.99697e-05, gnorm=5.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=54380
2021-06-19 09:45:17 | INFO | train_inner | epoch 002: 1813 / 3002 loss=2.74, ppl=6.68, wps=5832.6, ups=0.09, wpb=64750, bsz=128, num_updates=4783, lr=9.99697e-05, gnorm=2.374, loss_scale=4, train_wall=11, gb_free=2.8, wall=54391
2021-06-19 09:45:28 | INFO | train_inner | epoch 002: 1814 / 3002 loss=2.823, ppl=7.08, wps=5756.1, ups=0.09, wpb=64804, bsz=128, num_updates=4784, lr=9.99697e-05, gnorm=2.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=54402
2021-06-19 09:45:39 | INFO | train_inner | epoch 002: 1815 / 3002 loss=2.719, ppl=6.59, wps=5847.8, ups=0.09, wpb=64856, bsz=128, num_updates=4785, lr=9.99697e-05, gnorm=2.482, loss_scale=4, train_wall=11, gb_free=2.8, wall=54413
2021-06-19 09:45:50 | INFO | train_inner | epoch 002: 1816 / 3002 loss=2.986, ppl=7.92, wps=5880.4, ups=0.09, wpb=64816, bsz=128, num_updates=4786, lr=9.99697e-05, gnorm=2.608, loss_scale=4, train_wall=11, gb_free=2.8, wall=54424
2021-06-19 09:46:01 | INFO | train_inner | epoch 002: 1817 / 3002 loss=2.918, ppl=7.56, wps=6030.4, ups=0.09, wpb=64805, bsz=128, num_updates=4787, lr=9.99697e-05, gnorm=2.593, loss_scale=4, train_wall=10, gb_free=2.8, wall=54435
2021-06-19 09:46:12 | INFO | train_inner | epoch 002: 1818 / 3002 loss=2.714, ppl=6.56, wps=5843.3, ups=0.09, wpb=64830, bsz=128, num_updates=4788, lr=9.99697e-05, gnorm=3.67, loss_scale=4, train_wall=11, gb_free=2.8, wall=54446
2021-06-19 09:46:23 | INFO | train_inner | epoch 002: 1819 / 3002 loss=2.828, ppl=7.1, wps=5871.7, ups=0.09, wpb=64874, bsz=128, num_updates=4789, lr=9.99697e-05, gnorm=2.451, loss_scale=4, train_wall=11, gb_free=2.8, wall=54457
2021-06-19 09:46:34 | INFO | train_inner | epoch 002: 1820 / 3002 loss=2.64, ppl=6.23, wps=5827.4, ups=0.09, wpb=64825, bsz=128, num_updates=4790, lr=9.99697e-05, gnorm=2.587, loss_scale=4, train_wall=11, gb_free=2.8, wall=54468
2021-06-19 09:46:45 | INFO | train_inner | epoch 002: 1821 / 3002 loss=2.72, ppl=6.59, wps=5896, ups=0.09, wpb=64814, bsz=128, num_updates=4791, lr=9.99697e-05, gnorm=3.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=54479
2021-06-19 09:46:56 | INFO | train_inner | epoch 002: 1822 / 3002 loss=2.759, ppl=6.77, wps=5974.4, ups=0.09, wpb=64769, bsz=128, num_updates=4792, lr=9.99697e-05, gnorm=2.745, loss_scale=4, train_wall=10, gb_free=2.8, wall=54490
2021-06-19 09:47:07 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 09:47:18 | INFO | train_inner | epoch 002: 1824 / 3002 loss=2.858, ppl=7.25, wps=2937.8, ups=0.05, wpb=64830, bsz=128, num_updates=4793, lr=9.99697e-05, gnorm=2.443, loss_scale=2, train_wall=21, gb_free=2.8, wall=54512
2021-06-19 09:47:29 | INFO | train_inner | epoch 002: 1825 / 3002 loss=2.893, ppl=7.43, wps=5848.9, ups=0.09, wpb=64866, bsz=128, num_updates=4794, lr=9.99696e-05, gnorm=2.429, loss_scale=2, train_wall=11, gb_free=2.8, wall=54523
2021-06-19 09:47:40 | INFO | train_inner | epoch 002: 1826 / 3002 loss=2.709, ppl=6.54, wps=5769.5, ups=0.09, wpb=64812, bsz=128, num_updates=4795, lr=9.99696e-05, gnorm=2.36, loss_scale=2, train_wall=11, gb_free=2.8, wall=54535
2021-06-19 09:47:51 | INFO | train_inner | epoch 002: 1827 / 3002 loss=2.682, ppl=6.42, wps=5942.8, ups=0.09, wpb=64850, bsz=128, num_updates=4796, lr=9.99696e-05, gnorm=2.431, loss_scale=2, train_wall=10, gb_free=2.8, wall=54545
2021-06-19 09:48:02 | INFO | train_inner | epoch 002: 1828 / 3002 loss=2.596, ppl=6.04, wps=5941.3, ups=0.09, wpb=64844, bsz=128, num_updates=4797, lr=9.99696e-05, gnorm=2.362, loss_scale=2, train_wall=10, gb_free=2.8, wall=54556
2021-06-19 09:48:13 | INFO | train_inner | epoch 002: 1829 / 3002 loss=2.752, ppl=6.73, wps=5828.8, ups=0.09, wpb=64894, bsz=128, num_updates=4798, lr=9.99696e-05, gnorm=2.313, loss_scale=2, train_wall=11, gb_free=2.8, wall=54567
2021-06-19 09:48:24 | INFO | train_inner | epoch 002: 1830 / 3002 loss=2.826, ppl=7.09, wps=5861.8, ups=0.09, wpb=64749, bsz=128, num_updates=4799, lr=9.99696e-05, gnorm=2.92, loss_scale=2, train_wall=11, gb_free=2.8, wall=54579
2021-06-19 09:48:35 | INFO | train_inner | epoch 002: 1831 / 3002 loss=2.905, ppl=7.49, wps=5999.5, ups=0.09, wpb=64810, bsz=128, num_updates=4800, lr=9.99696e-05, gnorm=2.432, loss_scale=2, train_wall=10, gb_free=2.8, wall=54589
2021-06-19 09:48:46 | INFO | train_inner | epoch 002: 1832 / 3002 loss=2.648, ppl=6.27, wps=5938.2, ups=0.09, wpb=64957, bsz=128, num_updates=4801, lr=9.99696e-05, gnorm=2.609, loss_scale=2, train_wall=10, gb_free=2.8, wall=54600
2021-06-19 09:48:57 | INFO | train_inner | epoch 002: 1833 / 3002 loss=2.815, ppl=7.04, wps=5788.7, ups=0.09, wpb=64842, bsz=128, num_updates=4802, lr=9.99696e-05, gnorm=3.829, loss_scale=2, train_wall=11, gb_free=2.8, wall=54611
2021-06-19 09:49:08 | INFO | train_inner | epoch 002: 1834 / 3002 loss=2.888, ppl=7.4, wps=5965, ups=0.09, wpb=64779, bsz=128, num_updates=4803, lr=9.99696e-05, gnorm=2.297, loss_scale=2, train_wall=10, gb_free=2.8, wall=54622
2021-06-19 09:49:19 | INFO | train_inner | epoch 002: 1835 / 3002 loss=2.856, ppl=7.24, wps=5719, ups=0.09, wpb=64811, bsz=128, num_updates=4804, lr=9.99696e-05, gnorm=2.971, loss_scale=2, train_wall=11, gb_free=2.8, wall=54634
2021-06-19 09:49:30 | INFO | train_inner | epoch 002: 1836 / 3002 loss=2.769, ppl=6.82, wps=5821.7, ups=0.09, wpb=64743, bsz=128, num_updates=4805, lr=9.99696e-05, gnorm=2.698, loss_scale=2, train_wall=11, gb_free=2.8, wall=54645
2021-06-19 09:49:41 | INFO | train_inner | epoch 002: 1837 / 3002 loss=2.61, ppl=6.1, wps=5914.2, ups=0.09, wpb=64871, bsz=128, num_updates=4806, lr=9.99695e-05, gnorm=3.172, loss_scale=2, train_wall=11, gb_free=2.8, wall=54656
2021-06-19 09:49:53 | INFO | train_inner | epoch 002: 1838 / 3002 loss=2.806, ppl=7, wps=5833.1, ups=0.09, wpb=64889, bsz=128, num_updates=4807, lr=9.99695e-05, gnorm=2.341, loss_scale=2, train_wall=11, gb_free=2.8, wall=54667
2021-06-19 09:50:04 | INFO | train_inner | epoch 002: 1839 / 3002 loss=2.576, ppl=5.96, wps=5829, ups=0.09, wpb=64891, bsz=128, num_updates=4808, lr=9.99695e-05, gnorm=2.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=54678
2021-06-19 09:50:15 | INFO | train_inner | epoch 002: 1840 / 3002 loss=2.784, ppl=6.89, wps=5786.8, ups=0.09, wpb=64773, bsz=128, num_updates=4809, lr=9.99695e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=54689
2021-06-19 09:50:26 | INFO | train_inner | epoch 002: 1841 / 3002 loss=2.927, ppl=7.6, wps=5773, ups=0.09, wpb=64785, bsz=128, num_updates=4810, lr=9.99695e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=54700
2021-06-19 09:50:37 | INFO | train_inner | epoch 002: 1842 / 3002 loss=2.705, ppl=6.52, wps=5810.8, ups=0.09, wpb=64842, bsz=128, num_updates=4811, lr=9.99695e-05, gnorm=2.568, loss_scale=2, train_wall=11, gb_free=2.8, wall=54712
2021-06-19 09:50:48 | INFO | train_inner | epoch 002: 1843 / 3002 loss=2.748, ppl=6.72, wps=5918.6, ups=0.09, wpb=64810, bsz=128, num_updates=4812, lr=9.99695e-05, gnorm=2.454, loss_scale=2, train_wall=10, gb_free=2.8, wall=54723
2021-06-19 09:50:59 | INFO | train_inner | epoch 002: 1844 / 3002 loss=2.895, ppl=7.44, wps=5931, ups=0.09, wpb=64820, bsz=128, num_updates=4813, lr=9.99695e-05, gnorm=2.541, loss_scale=2, train_wall=10, gb_free=2.8, wall=54733
2021-06-19 09:51:10 | INFO | train_inner | epoch 002: 1845 / 3002 loss=2.696, ppl=6.48, wps=5918.9, ups=0.09, wpb=64933, bsz=128, num_updates=4814, lr=9.99695e-05, gnorm=2.405, loss_scale=2, train_wall=10, gb_free=2.8, wall=54744
2021-06-19 09:51:21 | INFO | train_inner | epoch 002: 1846 / 3002 loss=2.712, ppl=6.55, wps=5801.7, ups=0.09, wpb=64904, bsz=128, num_updates=4815, lr=9.99695e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=54756
2021-06-19 09:51:32 | INFO | train_inner | epoch 002: 1847 / 3002 loss=2.908, ppl=7.5, wps=5896.3, ups=0.09, wpb=64777, bsz=128, num_updates=4816, lr=9.99695e-05, gnorm=2.241, loss_scale=2, train_wall=11, gb_free=2.8, wall=54767
2021-06-19 09:51:43 | INFO | train_inner | epoch 002: 1848 / 3002 loss=2.914, ppl=7.54, wps=5860.6, ups=0.09, wpb=64781, bsz=128, num_updates=4817, lr=9.99695e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=54778
2021-06-19 09:51:55 | INFO | train_inner | epoch 002: 1849 / 3002 loss=2.83, ppl=7.11, wps=5765.9, ups=0.09, wpb=64899, bsz=128, num_updates=4818, lr=9.99695e-05, gnorm=2.156, loss_scale=2, train_wall=11, gb_free=2.8, wall=54789
2021-06-19 09:52:06 | INFO | train_inner | epoch 002: 1850 / 3002 loss=2.822, ppl=7.07, wps=5735.9, ups=0.09, wpb=64811, bsz=128, num_updates=4819, lr=9.99694e-05, gnorm=2.264, loss_scale=2, train_wall=11, gb_free=2.8, wall=54800
2021-06-19 09:52:17 | INFO | train_inner | epoch 002: 1851 / 3002 loss=2.809, ppl=7.01, wps=5836.4, ups=0.09, wpb=64886, bsz=128, num_updates=4820, lr=9.99694e-05, gnorm=2.263, loss_scale=2, train_wall=11, gb_free=2.8, wall=54811
2021-06-19 09:52:28 | INFO | train_inner | epoch 002: 1852 / 3002 loss=2.942, ppl=7.68, wps=5793.9, ups=0.09, wpb=64865, bsz=128, num_updates=4821, lr=9.99694e-05, gnorm=2.381, loss_scale=2, train_wall=11, gb_free=2.8, wall=54823
2021-06-19 09:52:39 | INFO | train_inner | epoch 002: 1853 / 3002 loss=2.903, ppl=7.48, wps=5863.6, ups=0.09, wpb=64836, bsz=128, num_updates=4822, lr=9.99694e-05, gnorm=2.816, loss_scale=2, train_wall=11, gb_free=2.8, wall=54834
2021-06-19 09:52:50 | INFO | train_inner | epoch 002: 1854 / 3002 loss=2.694, ppl=6.47, wps=5798.6, ups=0.09, wpb=64887, bsz=128, num_updates=4823, lr=9.99694e-05, gnorm=2.486, loss_scale=2, train_wall=11, gb_free=2.8, wall=54845
2021-06-19 09:53:01 | INFO | train_inner | epoch 002: 1855 / 3002 loss=2.773, ppl=6.83, wps=6018.1, ups=0.09, wpb=64799, bsz=128, num_updates=4824, lr=9.99694e-05, gnorm=2.316, loss_scale=2, train_wall=10, gb_free=2.8, wall=54856
2021-06-19 09:53:12 | INFO | train_inner | epoch 002: 1856 / 3002 loss=2.697, ppl=6.48, wps=5881.6, ups=0.09, wpb=64797, bsz=128, num_updates=4825, lr=9.99694e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=54867
2021-06-19 09:53:23 | INFO | train_inner | epoch 002: 1857 / 3002 loss=2.819, ppl=7.06, wps=5799.1, ups=0.09, wpb=64731, bsz=128, num_updates=4826, lr=9.99694e-05, gnorm=2.229, loss_scale=2, train_wall=11, gb_free=2.8, wall=54878
2021-06-19 09:53:34 | INFO | train_inner | epoch 002: 1858 / 3002 loss=2.829, ppl=7.11, wps=5952.2, ups=0.09, wpb=64848, bsz=128, num_updates=4827, lr=9.99694e-05, gnorm=2.703, loss_scale=2, train_wall=10, gb_free=2.8, wall=54889
2021-06-19 09:53:46 | INFO | train_inner | epoch 002: 1859 / 3002 loss=2.88, ppl=7.36, wps=5720.5, ups=0.09, wpb=64833, bsz=128, num_updates=4828, lr=9.99694e-05, gnorm=2.744, loss_scale=2, train_wall=11, gb_free=2.8, wall=54900
2021-06-19 09:53:57 | INFO | train_inner | epoch 002: 1860 / 3002 loss=2.783, ppl=6.88, wps=5963.1, ups=0.09, wpb=64867, bsz=128, num_updates=4829, lr=9.99694e-05, gnorm=2.313, loss_scale=2, train_wall=10, gb_free=2.8, wall=54911
2021-06-19 09:54:08 | INFO | train_inner | epoch 002: 1861 / 3002 loss=2.694, ppl=6.47, wps=5898.7, ups=0.09, wpb=64860, bsz=128, num_updates=4830, lr=9.99694e-05, gnorm=2.289, loss_scale=2, train_wall=11, gb_free=2.8, wall=54922
2021-06-19 09:54:19 | INFO | train_inner | epoch 002: 1862 / 3002 loss=2.808, ppl=7.01, wps=5803.9, ups=0.09, wpb=64778, bsz=128, num_updates=4831, lr=9.99693e-05, gnorm=2.41, loss_scale=2, train_wall=11, gb_free=2.8, wall=54933
2021-06-19 09:54:30 | INFO | train_inner | epoch 002: 1863 / 3002 loss=2.717, ppl=6.58, wps=5913.6, ups=0.09, wpb=64887, bsz=128, num_updates=4832, lr=9.99693e-05, gnorm=2.221, loss_scale=2, train_wall=11, gb_free=2.8, wall=54944
2021-06-19 09:54:41 | INFO | train_inner | epoch 002: 1864 / 3002 loss=2.914, ppl=7.54, wps=5973.6, ups=0.09, wpb=64899, bsz=128, num_updates=4833, lr=9.99693e-05, gnorm=2.274, loss_scale=2, train_wall=10, gb_free=2.8, wall=54955
2021-06-19 09:54:52 | INFO | train_inner | epoch 002: 1865 / 3002 loss=2.685, ppl=6.43, wps=5815.2, ups=0.09, wpb=64861, bsz=128, num_updates=4834, lr=9.99693e-05, gnorm=2.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=54966
2021-06-19 09:55:03 | INFO | train_inner | epoch 002: 1866 / 3002 loss=2.772, ppl=6.83, wps=5796.2, ups=0.09, wpb=64793, bsz=128, num_updates=4835, lr=9.99693e-05, gnorm=2.255, loss_scale=2, train_wall=11, gb_free=2.8, wall=54977
2021-06-19 09:55:14 | INFO | train_inner | epoch 002: 1867 / 3002 loss=2.865, ppl=7.28, wps=5822, ups=0.09, wpb=64784, bsz=128, num_updates=4836, lr=9.99693e-05, gnorm=2.372, loss_scale=2, train_wall=11, gb_free=2.8, wall=54988
2021-06-19 09:55:25 | INFO | train_inner | epoch 002: 1868 / 3002 loss=2.717, ppl=6.58, wps=5898.8, ups=0.09, wpb=64850, bsz=128, num_updates=4837, lr=9.99693e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=54999
2021-06-19 09:55:36 | INFO | train_inner | epoch 002: 1869 / 3002 loss=2.649, ppl=6.27, wps=5781.5, ups=0.09, wpb=64791, bsz=128, num_updates=4838, lr=9.99693e-05, gnorm=2.288, loss_scale=2, train_wall=11, gb_free=2.8, wall=55011
2021-06-19 09:55:47 | INFO | train_inner | epoch 002: 1870 / 3002 loss=2.671, ppl=6.37, wps=5842.6, ups=0.09, wpb=64761, bsz=128, num_updates=4839, lr=9.99693e-05, gnorm=2.183, loss_scale=2, train_wall=11, gb_free=2.8, wall=55022
2021-06-19 09:55:58 | INFO | train_inner | epoch 002: 1871 / 3002 loss=2.713, ppl=6.56, wps=5920.7, ups=0.09, wpb=64802, bsz=128, num_updates=4840, lr=9.99693e-05, gnorm=2.151, loss_scale=2, train_wall=10, gb_free=2.8, wall=55033
2021-06-19 09:56:09 | INFO | train_inner | epoch 002: 1872 / 3002 loss=2.68, ppl=6.41, wps=5931, ups=0.09, wpb=64803, bsz=128, num_updates=4841, lr=9.99693e-05, gnorm=2.381, loss_scale=2, train_wall=10, gb_free=2.8, wall=55043
2021-06-19 09:56:20 | INFO | train_inner | epoch 002: 1873 / 3002 loss=2.796, ppl=6.95, wps=5959.9, ups=0.09, wpb=64801, bsz=128, num_updates=4842, lr=9.99693e-05, gnorm=2.347, loss_scale=2, train_wall=10, gb_free=2.8, wall=55054
2021-06-19 09:56:31 | INFO | train_inner | epoch 002: 1874 / 3002 loss=2.715, ppl=6.56, wps=5909.9, ups=0.09, wpb=64846, bsz=128, num_updates=4843, lr=9.99693e-05, gnorm=2.265, loss_scale=2, train_wall=11, gb_free=2.8, wall=55065
2021-06-19 09:56:42 | INFO | train_inner | epoch 002: 1875 / 3002 loss=2.693, ppl=6.47, wps=5882.2, ups=0.09, wpb=64855, bsz=128, num_updates=4844, lr=9.99692e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=55076
2021-06-19 09:56:53 | INFO | train_inner | epoch 002: 1876 / 3002 loss=2.782, ppl=6.88, wps=5837.3, ups=0.09, wpb=64752, bsz=128, num_updates=4845, lr=9.99692e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=55087
2021-06-19 09:57:04 | INFO | train_inner | epoch 002: 1877 / 3002 loss=2.684, ppl=6.43, wps=5916.1, ups=0.09, wpb=64853, bsz=128, num_updates=4846, lr=9.99692e-05, gnorm=2.221, loss_scale=2, train_wall=10, gb_free=2.8, wall=55098
2021-06-19 09:57:15 | INFO | train_inner | epoch 002: 1878 / 3002 loss=2.842, ppl=7.17, wps=5821.7, ups=0.09, wpb=64796, bsz=128, num_updates=4847, lr=9.99692e-05, gnorm=2.163, loss_scale=2, train_wall=11, gb_free=2.8, wall=55110
2021-06-19 09:57:26 | INFO | train_inner | epoch 002: 1879 / 3002 loss=2.661, ppl=6.33, wps=5950.5, ups=0.09, wpb=64928, bsz=128, num_updates=4848, lr=9.99692e-05, gnorm=2.298, loss_scale=2, train_wall=10, gb_free=2.8, wall=55120
2021-06-19 09:57:37 | INFO | train_inner | epoch 002: 1880 / 3002 loss=2.674, ppl=6.38, wps=5842.5, ups=0.09, wpb=64871, bsz=128, num_updates=4849, lr=9.99692e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=55132
2021-06-19 09:57:48 | INFO | train_inner | epoch 002: 1881 / 3002 loss=2.833, ppl=7.13, wps=5761, ups=0.09, wpb=64854, bsz=128, num_updates=4850, lr=9.99692e-05, gnorm=2.332, loss_scale=2, train_wall=11, gb_free=2.8, wall=55143
2021-06-19 09:58:00 | INFO | train_inner | epoch 002: 1882 / 3002 loss=2.633, ppl=6.2, wps=5847.8, ups=0.09, wpb=64802, bsz=128, num_updates=4851, lr=9.99692e-05, gnorm=2.274, loss_scale=2, train_wall=11, gb_free=2.8, wall=55154
2021-06-19 09:58:11 | INFO | train_inner | epoch 002: 1883 / 3002 loss=2.656, ppl=6.3, wps=5912.8, ups=0.09, wpb=64837, bsz=128, num_updates=4852, lr=9.99692e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=55165
2021-06-19 09:58:22 | INFO | train_inner | epoch 002: 1884 / 3002 loss=2.925, ppl=7.59, wps=5794.1, ups=0.09, wpb=64829, bsz=128, num_updates=4853, lr=9.99692e-05, gnorm=2.463, loss_scale=2, train_wall=11, gb_free=2.8, wall=55176
2021-06-19 09:58:33 | INFO | train_inner | epoch 002: 1885 / 3002 loss=2.879, ppl=7.36, wps=5804.5, ups=0.09, wpb=64749, bsz=128, num_updates=4854, lr=9.99692e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=55187
2021-06-19 09:58:44 | INFO | train_inner | epoch 002: 1886 / 3002 loss=2.625, ppl=6.17, wps=5827.6, ups=0.09, wpb=64882, bsz=128, num_updates=4855, lr=9.99692e-05, gnorm=2.598, loss_scale=2, train_wall=11, gb_free=2.8, wall=55198
2021-06-19 09:58:55 | INFO | train_inner | epoch 002: 1887 / 3002 loss=2.939, ppl=7.67, wps=5884.8, ups=0.09, wpb=64842, bsz=128, num_updates=4856, lr=9.99691e-05, gnorm=2.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=55209
2021-06-19 09:59:06 | INFO | train_inner | epoch 002: 1888 / 3002 loss=2.799, ppl=6.96, wps=5864.2, ups=0.09, wpb=64859, bsz=128, num_updates=4857, lr=9.99691e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=55220
2021-06-19 09:59:17 | INFO | train_inner | epoch 002: 1889 / 3002 loss=2.852, ppl=7.22, wps=5818.1, ups=0.09, wpb=64782, bsz=128, num_updates=4858, lr=9.99691e-05, gnorm=2.345, loss_scale=2, train_wall=11, gb_free=2.8, wall=55232
2021-06-19 09:59:28 | INFO | train_inner | epoch 002: 1890 / 3002 loss=2.846, ppl=7.19, wps=5885.7, ups=0.09, wpb=64828, bsz=128, num_updates=4859, lr=9.99691e-05, gnorm=2.571, loss_scale=2, train_wall=11, gb_free=2.8, wall=55243
2021-06-19 09:59:39 | INFO | train_inner | epoch 002: 1891 / 3002 loss=2.82, ppl=7.06, wps=5940.1, ups=0.09, wpb=64718, bsz=128, num_updates=4860, lr=9.99691e-05, gnorm=2.214, loss_scale=2, train_wall=10, gb_free=2.8, wall=55253
2021-06-19 09:59:50 | INFO | train_inner | epoch 002: 1892 / 3002 loss=2.668, ppl=6.36, wps=5981.4, ups=0.09, wpb=64913, bsz=128, num_updates=4861, lr=9.99691e-05, gnorm=2.722, loss_scale=2, train_wall=10, gb_free=2.8, wall=55264
2021-06-19 10:00:01 | INFO | train_inner | epoch 002: 1893 / 3002 loss=2.679, ppl=6.41, wps=5810, ups=0.09, wpb=64819, bsz=128, num_updates=4862, lr=9.99691e-05, gnorm=2.314, loss_scale=2, train_wall=11, gb_free=2.8, wall=55275
2021-06-19 10:00:12 | INFO | train_inner | epoch 002: 1894 / 3002 loss=2.802, ppl=6.98, wps=5812.1, ups=0.09, wpb=64759, bsz=128, num_updates=4863, lr=9.99691e-05, gnorm=2.321, loss_scale=2, train_wall=11, gb_free=2.8, wall=55287
2021-06-19 10:00:24 | INFO | train_inner | epoch 002: 1895 / 3002 loss=2.798, ppl=6.96, wps=5714.9, ups=0.09, wpb=64713, bsz=128, num_updates=4864, lr=9.99691e-05, gnorm=2.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=55298
2021-06-19 10:00:35 | INFO | train_inner | epoch 002: 1896 / 3002 loss=2.783, ppl=6.88, wps=5931.6, ups=0.09, wpb=64833, bsz=128, num_updates=4865, lr=9.99691e-05, gnorm=2.233, loss_scale=2, train_wall=10, gb_free=2.8, wall=55309
2021-06-19 10:00:46 | INFO | train_inner | epoch 002: 1897 / 3002 loss=2.641, ppl=6.24, wps=5888.9, ups=0.09, wpb=64939, bsz=128, num_updates=4866, lr=9.99691e-05, gnorm=2.366, loss_scale=2, train_wall=11, gb_free=2.8, wall=55320
2021-06-19 10:00:57 | INFO | train_inner | epoch 002: 1898 / 3002 loss=2.756, ppl=6.75, wps=5813.2, ups=0.09, wpb=64842, bsz=128, num_updates=4867, lr=9.99691e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=55331
2021-06-19 10:01:08 | INFO | train_inner | epoch 002: 1899 / 3002 loss=2.917, ppl=7.56, wps=5992, ups=0.09, wpb=64759, bsz=128, num_updates=4868, lr=9.99691e-05, gnorm=2.307, loss_scale=2, train_wall=10, gb_free=2.8, wall=55342
2021-06-19 10:01:18 | INFO | train_inner | epoch 002: 1900 / 3002 loss=2.664, ppl=6.34, wps=6007.3, ups=0.09, wpb=64839, bsz=128, num_updates=4869, lr=9.9969e-05, gnorm=2.474, loss_scale=2, train_wall=10, gb_free=2.8, wall=55353
2021-06-19 10:01:29 | INFO | train_inner | epoch 002: 1901 / 3002 loss=2.712, ppl=6.55, wps=5908.1, ups=0.09, wpb=64862, bsz=128, num_updates=4870, lr=9.9969e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=55364
2021-06-19 10:01:40 | INFO | train_inner | epoch 002: 1902 / 3002 loss=2.745, ppl=6.71, wps=5844.1, ups=0.09, wpb=64890, bsz=128, num_updates=4871, lr=9.9969e-05, gnorm=2.309, loss_scale=2, train_wall=11, gb_free=2.8, wall=55375
2021-06-19 10:01:51 | INFO | train_inner | epoch 002: 1903 / 3002 loss=2.795, ppl=6.94, wps=5891.8, ups=0.09, wpb=64809, bsz=128, num_updates=4872, lr=9.9969e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=55386
2021-06-19 10:02:02 | INFO | train_inner | epoch 002: 1904 / 3002 loss=2.633, ppl=6.2, wps=5918.3, ups=0.09, wpb=64829, bsz=128, num_updates=4873, lr=9.9969e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=55397
2021-06-19 10:02:13 | INFO | train_inner | epoch 002: 1905 / 3002 loss=2.762, ppl=6.79, wps=5939.3, ups=0.09, wpb=64822, bsz=128, num_updates=4874, lr=9.9969e-05, gnorm=2.282, loss_scale=2, train_wall=10, gb_free=2.8, wall=55408
2021-06-19 10:02:24 | INFO | train_inner | epoch 002: 1906 / 3002 loss=2.903, ppl=7.48, wps=5790.6, ups=0.09, wpb=64699, bsz=128, num_updates=4875, lr=9.9969e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=55419
2021-06-19 10:02:36 | INFO | train_inner | epoch 002: 1907 / 3002 loss=2.83, ppl=7.11, wps=5733.2, ups=0.09, wpb=64810, bsz=128, num_updates=4876, lr=9.9969e-05, gnorm=3.063, loss_scale=2, train_wall=11, gb_free=2.8, wall=55430
2021-06-19 10:02:47 | INFO | train_inner | epoch 002: 1908 / 3002 loss=2.762, ppl=6.78, wps=5871.6, ups=0.09, wpb=64874, bsz=128, num_updates=4877, lr=9.9969e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=55441
2021-06-19 10:02:58 | INFO | train_inner | epoch 002: 1909 / 3002 loss=2.656, ppl=6.3, wps=5931.2, ups=0.09, wpb=64831, bsz=128, num_updates=4878, lr=9.9969e-05, gnorm=2.276, loss_scale=2, train_wall=10, gb_free=2.8, wall=55452
2021-06-19 10:03:09 | INFO | train_inner | epoch 002: 1910 / 3002 loss=2.755, ppl=6.75, wps=5967.4, ups=0.09, wpb=64743, bsz=128, num_updates=4879, lr=9.9969e-05, gnorm=2.353, loss_scale=2, train_wall=10, gb_free=2.8, wall=55463
2021-06-19 10:03:20 | INFO | train_inner | epoch 002: 1911 / 3002 loss=2.897, ppl=7.45, wps=5855.1, ups=0.09, wpb=64889, bsz=128, num_updates=4880, lr=9.9969e-05, gnorm=2.361, loss_scale=2, train_wall=11, gb_free=2.8, wall=55474
2021-06-19 10:03:31 | INFO | train_inner | epoch 002: 1912 / 3002 loss=2.663, ppl=6.34, wps=5840.2, ups=0.09, wpb=64890, bsz=128, num_updates=4881, lr=9.99689e-05, gnorm=2.247, loss_scale=2, train_wall=11, gb_free=2.8, wall=55485
2021-06-19 10:03:42 | INFO | train_inner | epoch 002: 1913 / 3002 loss=2.687, ppl=6.44, wps=5808, ups=0.09, wpb=64763, bsz=128, num_updates=4882, lr=9.99689e-05, gnorm=2.462, loss_scale=2, train_wall=11, gb_free=2.8, wall=55496
2021-06-19 10:03:53 | INFO | train_inner | epoch 002: 1914 / 3002 loss=2.87, ppl=7.31, wps=5841.2, ups=0.09, wpb=64883, bsz=128, num_updates=4883, lr=9.99689e-05, gnorm=2.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=55507
2021-06-19 10:04:04 | INFO | train_inner | epoch 002: 1915 / 3002 loss=2.728, ppl=6.63, wps=5935, ups=0.09, wpb=64821, bsz=128, num_updates=4884, lr=9.99689e-05, gnorm=2.844, loss_scale=2, train_wall=10, gb_free=2.8, wall=55518
2021-06-19 10:04:15 | INFO | train_inner | epoch 002: 1916 / 3002 loss=2.776, ppl=6.85, wps=5857.3, ups=0.09, wpb=64823, bsz=128, num_updates=4885, lr=9.99689e-05, gnorm=2.316, loss_scale=2, train_wall=11, gb_free=2.8, wall=55529
2021-06-19 10:04:26 | INFO | train_inner | epoch 002: 1917 / 3002 loss=2.74, ppl=6.68, wps=5908.6, ups=0.09, wpb=64777, bsz=128, num_updates=4886, lr=9.99689e-05, gnorm=2.155, loss_scale=2, train_wall=10, gb_free=2.8, wall=55540
2021-06-19 10:04:37 | INFO | train_inner | epoch 002: 1918 / 3002 loss=2.913, ppl=7.53, wps=5855.5, ups=0.09, wpb=64774, bsz=128, num_updates=4887, lr=9.99689e-05, gnorm=4.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=55551
2021-06-19 10:04:48 | INFO | train_inner | epoch 002: 1919 / 3002 loss=2.749, ppl=6.72, wps=5924.5, ups=0.09, wpb=64835, bsz=128, num_updates=4888, lr=9.99689e-05, gnorm=2.394, loss_scale=2, train_wall=11, gb_free=2.8, wall=55562
2021-06-19 10:04:59 | INFO | train_inner | epoch 002: 1920 / 3002 loss=2.871, ppl=7.32, wps=5802.6, ups=0.09, wpb=64794, bsz=128, num_updates=4889, lr=9.99689e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=55573
2021-06-19 10:05:10 | INFO | train_inner | epoch 002: 1921 / 3002 loss=2.823, ppl=7.08, wps=5903.3, ups=0.09, wpb=64876, bsz=128, num_updates=4890, lr=9.99689e-05, gnorm=2.215, loss_scale=2, train_wall=11, gb_free=2.8, wall=55584
2021-06-19 10:05:21 | INFO | train_inner | epoch 002: 1922 / 3002 loss=3.014, ppl=8.08, wps=5724, ups=0.09, wpb=64830, bsz=128, num_updates=4891, lr=9.99689e-05, gnorm=2.297, loss_scale=2, train_wall=11, gb_free=2.8, wall=55596
2021-06-19 10:05:32 | INFO | train_inner | epoch 002: 1923 / 3002 loss=2.788, ppl=6.91, wps=5935.7, ups=0.09, wpb=64849, bsz=128, num_updates=4892, lr=9.99689e-05, gnorm=2.244, loss_scale=2, train_wall=10, gb_free=2.8, wall=55607
2021-06-19 10:05:43 | INFO | train_inner | epoch 002: 1924 / 3002 loss=2.863, ppl=7.28, wps=5967.6, ups=0.09, wpb=64815, bsz=128, num_updates=4893, lr=9.99689e-05, gnorm=2.178, loss_scale=2, train_wall=10, gb_free=2.8, wall=55618
2021-06-19 10:05:54 | INFO | train_inner | epoch 002: 1925 / 3002 loss=2.551, ppl=5.86, wps=5916.3, ups=0.09, wpb=64855, bsz=128, num_updates=4894, lr=9.99688e-05, gnorm=2.217, loss_scale=2, train_wall=10, gb_free=2.8, wall=55629
2021-06-19 10:06:06 | INFO | train_inner | epoch 002: 1926 / 3002 loss=2.796, ppl=6.95, wps=5720.4, ups=0.09, wpb=64780, bsz=128, num_updates=4895, lr=9.99688e-05, gnorm=2.38, loss_scale=2, train_wall=11, gb_free=2.8, wall=55640
2021-06-19 10:06:17 | INFO | train_inner | epoch 002: 1927 / 3002 loss=2.823, ppl=7.08, wps=5829.6, ups=0.09, wpb=64841, bsz=128, num_updates=4896, lr=9.99688e-05, gnorm=2.386, loss_scale=2, train_wall=11, gb_free=2.8, wall=55651
2021-06-19 10:06:28 | INFO | train_inner | epoch 002: 1928 / 3002 loss=2.697, ppl=6.48, wps=5883.1, ups=0.09, wpb=64863, bsz=128, num_updates=4897, lr=9.99688e-05, gnorm=2.22, loss_scale=2, train_wall=11, gb_free=2.8, wall=55662
2021-06-19 10:06:39 | INFO | train_inner | epoch 002: 1929 / 3002 loss=2.756, ppl=6.76, wps=5756.1, ups=0.09, wpb=64825, bsz=128, num_updates=4898, lr=9.99688e-05, gnorm=2.47, loss_scale=2, train_wall=11, gb_free=2.8, wall=55673
2021-06-19 10:06:50 | INFO | train_inner | epoch 002: 1930 / 3002 loss=2.587, ppl=6.01, wps=5749.3, ups=0.09, wpb=64903, bsz=128, num_updates=4899, lr=9.99688e-05, gnorm=2.168, loss_scale=2, train_wall=11, gb_free=2.8, wall=55685
2021-06-19 10:07:01 | INFO | train_inner | epoch 002: 1931 / 3002 loss=2.803, ppl=6.98, wps=5823.2, ups=0.09, wpb=64766, bsz=128, num_updates=4900, lr=9.99688e-05, gnorm=2.171, loss_scale=2, train_wall=11, gb_free=2.8, wall=55696
2021-06-19 10:07:12 | INFO | train_inner | epoch 002: 1932 / 3002 loss=2.519, ppl=5.73, wps=5948.2, ups=0.09, wpb=64826, bsz=128, num_updates=4901, lr=9.99688e-05, gnorm=2.233, loss_scale=2, train_wall=10, gb_free=2.8, wall=55707
2021-06-19 10:07:23 | INFO | train_inner | epoch 002: 1933 / 3002 loss=2.641, ppl=6.24, wps=5909.2, ups=0.09, wpb=64802, bsz=128, num_updates=4902, lr=9.99688e-05, gnorm=2.236, loss_scale=2, train_wall=11, gb_free=2.8, wall=55718
2021-06-19 10:07:34 | INFO | train_inner | epoch 002: 1934 / 3002 loss=2.863, ppl=7.27, wps=5903.5, ups=0.09, wpb=64943, bsz=128, num_updates=4903, lr=9.99688e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=55729
2021-06-19 10:07:45 | INFO | train_inner | epoch 002: 1935 / 3002 loss=2.9, ppl=7.46, wps=5880.6, ups=0.09, wpb=64858, bsz=128, num_updates=4904, lr=9.99688e-05, gnorm=2.23, loss_scale=2, train_wall=11, gb_free=2.8, wall=55740
2021-06-19 10:07:56 | INFO | train_inner | epoch 002: 1936 / 3002 loss=2.764, ppl=6.79, wps=5918, ups=0.09, wpb=64844, bsz=128, num_updates=4905, lr=9.99688e-05, gnorm=2.378, loss_scale=2, train_wall=10, gb_free=2.8, wall=55751
2021-06-19 10:08:07 | INFO | train_inner | epoch 002: 1937 / 3002 loss=2.83, ppl=7.11, wps=5779.2, ups=0.09, wpb=64885, bsz=128, num_updates=4906, lr=9.99687e-05, gnorm=2.19, loss_scale=2, train_wall=11, gb_free=2.8, wall=55762
2021-06-19 10:08:18 | INFO | train_inner | epoch 002: 1938 / 3002 loss=2.86, ppl=7.26, wps=5897.7, ups=0.09, wpb=64765, bsz=128, num_updates=4907, lr=9.99687e-05, gnorm=2.314, loss_scale=2, train_wall=11, gb_free=2.8, wall=55773
2021-06-19 10:08:29 | INFO | train_inner | epoch 002: 1939 / 3002 loss=2.565, ppl=5.92, wps=5917.3, ups=0.09, wpb=64819, bsz=128, num_updates=4908, lr=9.99687e-05, gnorm=2.249, loss_scale=2, train_wall=10, gb_free=2.8, wall=55784
2021-06-19 10:08:40 | INFO | train_inner | epoch 002: 1940 / 3002 loss=2.676, ppl=6.39, wps=5942.2, ups=0.09, wpb=64888, bsz=128, num_updates=4909, lr=9.99687e-05, gnorm=2.149, loss_scale=2, train_wall=10, gb_free=2.8, wall=55795
2021-06-19 10:08:51 | INFO | train_inner | epoch 002: 1941 / 3002 loss=2.721, ppl=6.59, wps=5970, ups=0.09, wpb=64937, bsz=128, num_updates=4910, lr=9.99687e-05, gnorm=2.17, loss_scale=2, train_wall=10, gb_free=2.8, wall=55805
2021-06-19 10:09:02 | INFO | train_inner | epoch 002: 1942 / 3002 loss=2.746, ppl=6.71, wps=5892.2, ups=0.09, wpb=64800, bsz=128, num_updates=4911, lr=9.99687e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=55816
2021-06-19 10:09:13 | INFO | train_inner | epoch 002: 1943 / 3002 loss=2.808, ppl=7, wps=5900.8, ups=0.09, wpb=64789, bsz=128, num_updates=4912, lr=9.99687e-05, gnorm=4.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=55827
2021-06-19 10:09:24 | INFO | train_inner | epoch 002: 1944 / 3002 loss=2.798, ppl=6.95, wps=5827, ups=0.09, wpb=64741, bsz=128, num_updates=4913, lr=9.99687e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=55839
2021-06-19 10:09:35 | INFO | train_inner | epoch 002: 1945 / 3002 loss=2.682, ppl=6.42, wps=5876, ups=0.09, wpb=64802, bsz=128, num_updates=4914, lr=9.99687e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=55850
2021-06-19 10:09:46 | INFO | train_inner | epoch 002: 1946 / 3002 loss=2.774, ppl=6.84, wps=5958.8, ups=0.09, wpb=64799, bsz=128, num_updates=4915, lr=9.99687e-05, gnorm=2.695, loss_scale=2, train_wall=10, gb_free=2.8, wall=55860
2021-06-19 10:09:57 | INFO | train_inner | epoch 002: 1947 / 3002 loss=2.811, ppl=7.02, wps=5831.9, ups=0.09, wpb=64800, bsz=128, num_updates=4916, lr=9.99687e-05, gnorm=38.292, loss_scale=2, train_wall=11, gb_free=2.8, wall=55872
2021-06-19 10:10:08 | INFO | train_inner | epoch 002: 1948 / 3002 loss=2.707, ppl=6.53, wps=5879.1, ups=0.09, wpb=64805, bsz=128, num_updates=4917, lr=9.99687e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=55883
2021-06-19 10:10:19 | INFO | train_inner | epoch 002: 1949 / 3002 loss=2.743, ppl=6.69, wps=5835.4, ups=0.09, wpb=64803, bsz=128, num_updates=4918, lr=9.99687e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=55894
2021-06-19 10:10:31 | INFO | train_inner | epoch 002: 1950 / 3002 loss=2.76, ppl=6.78, wps=5738.2, ups=0.09, wpb=64757, bsz=128, num_updates=4919, lr=9.99686e-05, gnorm=2.291, loss_scale=2, train_wall=11, gb_free=2.8, wall=55905
2021-06-19 10:10:42 | INFO | train_inner | epoch 002: 1951 / 3002 loss=2.776, ppl=6.85, wps=5898.8, ups=0.09, wpb=64845, bsz=128, num_updates=4920, lr=9.99686e-05, gnorm=2.285, loss_scale=4, train_wall=11, gb_free=2.8, wall=55916
2021-06-19 10:10:53 | INFO | train_inner | epoch 002: 1952 / 3002 loss=2.776, ppl=6.85, wps=5908.8, ups=0.09, wpb=64805, bsz=128, num_updates=4921, lr=9.99686e-05, gnorm=2.338, loss_scale=4, train_wall=11, gb_free=2.8, wall=55927
2021-06-19 10:11:04 | INFO | train_inner | epoch 002: 1953 / 3002 loss=2.773, ppl=6.83, wps=5810.2, ups=0.09, wpb=64867, bsz=128, num_updates=4922, lr=9.99686e-05, gnorm=5.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=55938
2021-06-19 10:11:15 | INFO | train_inner | epoch 002: 1954 / 3002 loss=2.793, ppl=6.93, wps=5855.9, ups=0.09, wpb=64798, bsz=128, num_updates=4923, lr=9.99686e-05, gnorm=2.823, loss_scale=4, train_wall=11, gb_free=2.8, wall=55949
2021-06-19 10:11:26 | INFO | train_inner | epoch 002: 1955 / 3002 loss=2.829, ppl=7.1, wps=5800.7, ups=0.09, wpb=64753, bsz=128, num_updates=4924, lr=9.99686e-05, gnorm=2.487, loss_scale=4, train_wall=11, gb_free=2.8, wall=55960
2021-06-19 10:11:37 | INFO | train_inner | epoch 002: 1956 / 3002 loss=2.816, ppl=7.04, wps=5844.6, ups=0.09, wpb=64854, bsz=128, num_updates=4925, lr=9.99686e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=55971
2021-06-19 10:11:48 | INFO | train_inner | epoch 002: 1957 / 3002 loss=2.982, ppl=7.9, wps=5798.9, ups=0.09, wpb=64814, bsz=128, num_updates=4926, lr=9.99686e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=55983
2021-06-19 10:11:59 | INFO | train_inner | epoch 002: 1958 / 3002 loss=2.705, ppl=6.52, wps=5805.9, ups=0.09, wpb=64753, bsz=128, num_updates=4927, lr=9.99686e-05, gnorm=2.406, loss_scale=4, train_wall=11, gb_free=2.8, wall=55994
2021-06-19 10:12:10 | INFO | train_inner | epoch 002: 1959 / 3002 loss=2.687, ppl=6.44, wps=5882.8, ups=0.09, wpb=64869, bsz=128, num_updates=4928, lr=9.99686e-05, gnorm=2.343, loss_scale=4, train_wall=11, gb_free=2.8, wall=56005
2021-06-19 10:12:22 | INFO | train_inner | epoch 002: 1960 / 3002 loss=2.832, ppl=7.12, wps=5889.6, ups=0.09, wpb=64889, bsz=128, num_updates=4929, lr=9.99686e-05, gnorm=2.518, loss_scale=4, train_wall=11, gb_free=2.8, wall=56016
2021-06-19 10:12:33 | INFO | train_inner | epoch 002: 1961 / 3002 loss=2.77, ppl=6.82, wps=5849.7, ups=0.09, wpb=64873, bsz=128, num_updates=4930, lr=9.99686e-05, gnorm=2.494, loss_scale=4, train_wall=11, gb_free=2.8, wall=56027
2021-06-19 10:12:44 | INFO | train_inner | epoch 002: 1962 / 3002 loss=2.843, ppl=7.17, wps=5838.2, ups=0.09, wpb=64879, bsz=128, num_updates=4931, lr=9.99685e-05, gnorm=8.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=56038
2021-06-19 10:12:55 | INFO | train_inner | epoch 002: 1963 / 3002 loss=2.587, ppl=6.01, wps=5806.4, ups=0.09, wpb=64892, bsz=128, num_updates=4932, lr=9.99685e-05, gnorm=8.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=56049
2021-06-19 10:13:06 | INFO | train_inner | epoch 002: 1964 / 3002 loss=2.81, ppl=7.01, wps=5834.3, ups=0.09, wpb=64869, bsz=128, num_updates=4933, lr=9.99685e-05, gnorm=2.4, loss_scale=4, train_wall=11, gb_free=2.8, wall=56060
2021-06-19 10:13:17 | INFO | train_inner | epoch 002: 1965 / 3002 loss=2.713, ppl=6.56, wps=5698.1, ups=0.09, wpb=64775, bsz=128, num_updates=4934, lr=9.99685e-05, gnorm=2.455, loss_scale=4, train_wall=11, gb_free=2.8, wall=56072
2021-06-19 10:13:28 | INFO | train_inner | epoch 002: 1966 / 3002 loss=2.691, ppl=6.46, wps=5857.5, ups=0.09, wpb=64812, bsz=128, num_updates=4935, lr=9.99685e-05, gnorm=2.887, loss_scale=4, train_wall=11, gb_free=2.8, wall=56083
2021-06-19 10:13:40 | INFO | train_inner | epoch 002: 1967 / 3002 loss=2.879, ppl=7.36, wps=5758.9, ups=0.09, wpb=64825, bsz=128, num_updates=4936, lr=9.99685e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=56094
2021-06-19 10:13:51 | INFO | train_inner | epoch 002: 1968 / 3002 loss=2.787, ppl=6.9, wps=5870.6, ups=0.09, wpb=64760, bsz=128, num_updates=4937, lr=9.99685e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=56105
2021-06-19 10:14:02 | INFO | train_inner | epoch 002: 1969 / 3002 loss=2.741, ppl=6.68, wps=5717.2, ups=0.09, wpb=64784, bsz=128, num_updates=4938, lr=9.99685e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=56116
2021-06-19 10:14:13 | INFO | train_inner | epoch 002: 1970 / 3002 loss=2.902, ppl=7.47, wps=5892.8, ups=0.09, wpb=64808, bsz=128, num_updates=4939, lr=9.99685e-05, gnorm=2.363, loss_scale=4, train_wall=11, gb_free=2.8, wall=56127
2021-06-19 10:14:24 | INFO | train_inner | epoch 002: 1971 / 3002 loss=2.754, ppl=6.75, wps=5809.4, ups=0.09, wpb=64781, bsz=128, num_updates=4940, lr=9.99685e-05, gnorm=2.832, loss_scale=4, train_wall=11, gb_free=2.8, wall=56139
2021-06-19 10:14:35 | INFO | train_inner | epoch 002: 1972 / 3002 loss=2.617, ppl=6.13, wps=6003.1, ups=0.09, wpb=64854, bsz=128, num_updates=4941, lr=9.99685e-05, gnorm=2.351, loss_scale=4, train_wall=10, gb_free=2.8, wall=56149
2021-06-19 10:14:46 | INFO | train_inner | epoch 002: 1973 / 3002 loss=2.813, ppl=7.03, wps=5818.7, ups=0.09, wpb=64786, bsz=128, num_updates=4942, lr=9.99685e-05, gnorm=2.525, loss_scale=4, train_wall=11, gb_free=2.8, wall=56160
2021-06-19 10:14:57 | INFO | train_inner | epoch 002: 1974 / 3002 loss=2.768, ppl=6.81, wps=5917, ups=0.09, wpb=64841, bsz=128, num_updates=4943, lr=9.99685e-05, gnorm=2.535, loss_scale=4, train_wall=10, gb_free=2.8, wall=56171
2021-06-19 10:15:08 | INFO | train_inner | epoch 002: 1975 / 3002 loss=2.911, ppl=7.52, wps=5922.3, ups=0.09, wpb=64847, bsz=128, num_updates=4944, lr=9.99684e-05, gnorm=2.271, loss_scale=4, train_wall=10, gb_free=2.8, wall=56182
2021-06-19 10:15:19 | INFO | train_inner | epoch 002: 1976 / 3002 loss=2.736, ppl=6.66, wps=5732.7, ups=0.09, wpb=64830, bsz=128, num_updates=4945, lr=9.99684e-05, gnorm=2.32, loss_scale=4, train_wall=11, gb_free=2.8, wall=56194
2021-06-19 10:15:30 | INFO | train_inner | epoch 002: 1977 / 3002 loss=2.809, ppl=7.01, wps=5848.8, ups=0.09, wpb=64806, bsz=128, num_updates=4946, lr=9.99684e-05, gnorm=2.494, loss_scale=4, train_wall=11, gb_free=2.8, wall=56205
2021-06-19 10:15:42 | INFO | train_inner | epoch 002: 1978 / 3002 loss=2.594, ppl=6.04, wps=5835.1, ups=0.09, wpb=64833, bsz=128, num_updates=4947, lr=9.99684e-05, gnorm=3.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=56216
2021-06-19 10:15:53 | INFO | train_inner | epoch 002: 1979 / 3002 loss=2.859, ppl=7.26, wps=5750.4, ups=0.09, wpb=64830, bsz=128, num_updates=4948, lr=9.99684e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=56227
2021-06-19 10:16:04 | INFO | train_inner | epoch 002: 1980 / 3002 loss=2.798, ppl=6.95, wps=5795.2, ups=0.09, wpb=64777, bsz=128, num_updates=4949, lr=9.99684e-05, gnorm=3.738, loss_scale=4, train_wall=11, gb_free=2.8, wall=56238
2021-06-19 10:16:15 | INFO | train_inner | epoch 002: 1981 / 3002 loss=2.789, ppl=6.91, wps=5754.1, ups=0.09, wpb=64839, bsz=128, num_updates=4950, lr=9.99684e-05, gnorm=12.572, loss_scale=4, train_wall=11, gb_free=2.8, wall=56250
2021-06-19 10:16:26 | INFO | train_inner | epoch 002: 1982 / 3002 loss=2.665, ppl=6.34, wps=5838.3, ups=0.09, wpb=64878, bsz=128, num_updates=4951, lr=9.99684e-05, gnorm=2.775, loss_scale=4, train_wall=11, gb_free=2.8, wall=56261
2021-06-19 10:16:37 | INFO | train_inner | epoch 002: 1983 / 3002 loss=2.638, ppl=6.22, wps=5889.8, ups=0.09, wpb=64836, bsz=128, num_updates=4952, lr=9.99684e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=56272
2021-06-19 10:16:49 | INFO | train_inner | epoch 002: 1984 / 3002 loss=2.754, ppl=6.74, wps=5828, ups=0.09, wpb=64834, bsz=128, num_updates=4953, lr=9.99684e-05, gnorm=2.586, loss_scale=4, train_wall=11, gb_free=2.8, wall=56283
2021-06-19 10:16:59 | INFO | train_inner | epoch 002: 1985 / 3002 loss=2.686, ppl=6.43, wps=5964.7, ups=0.09, wpb=64859, bsz=128, num_updates=4954, lr=9.99684e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=56294
2021-06-19 10:17:11 | INFO | train_inner | epoch 002: 1986 / 3002 loss=2.721, ppl=6.59, wps=5841.6, ups=0.09, wpb=64883, bsz=128, num_updates=4955, lr=9.99684e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=56305
2021-06-19 10:17:22 | INFO | train_inner | epoch 002: 1987 / 3002 loss=2.739, ppl=6.68, wps=5809.3, ups=0.09, wpb=64855, bsz=128, num_updates=4956, lr=9.99683e-05, gnorm=2.413, loss_scale=4, train_wall=11, gb_free=2.8, wall=56316
2021-06-19 10:17:33 | INFO | train_inner | epoch 002: 1988 / 3002 loss=2.755, ppl=6.75, wps=5817.5, ups=0.09, wpb=64845, bsz=128, num_updates=4957, lr=9.99683e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=56327
2021-06-19 10:17:44 | INFO | train_inner | epoch 002: 1989 / 3002 loss=2.724, ppl=6.61, wps=5814.7, ups=0.09, wpb=64825, bsz=128, num_updates=4958, lr=9.99683e-05, gnorm=2.367, loss_scale=4, train_wall=11, gb_free=2.8, wall=56338
2021-06-19 10:17:55 | INFO | train_inner | epoch 002: 1990 / 3002 loss=2.552, ppl=5.86, wps=5759, ups=0.09, wpb=64833, bsz=128, num_updates=4959, lr=9.99683e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=56350
2021-06-19 10:18:06 | INFO | train_inner | epoch 002: 1991 / 3002 loss=2.764, ppl=6.79, wps=5829.4, ups=0.09, wpb=64839, bsz=128, num_updates=4960, lr=9.99683e-05, gnorm=2.535, loss_scale=4, train_wall=11, gb_free=2.8, wall=56361
2021-06-19 10:18:17 | INFO | train_inner | epoch 002: 1992 / 3002 loss=2.783, ppl=6.88, wps=5835.9, ups=0.09, wpb=64991, bsz=128, num_updates=4961, lr=9.99683e-05, gnorm=2.414, loss_scale=4, train_wall=11, gb_free=2.8, wall=56372
2021-06-19 10:18:29 | INFO | train_inner | epoch 002: 1993 / 3002 loss=2.773, ppl=6.83, wps=5741.6, ups=0.09, wpb=64863, bsz=128, num_updates=4962, lr=9.99683e-05, gnorm=2.292, loss_scale=4, train_wall=11, gb_free=2.8, wall=56383
2021-06-19 10:18:40 | INFO | train_inner | epoch 002: 1994 / 3002 loss=2.799, ppl=6.96, wps=5902.5, ups=0.09, wpb=64830, bsz=128, num_updates=4963, lr=9.99683e-05, gnorm=2.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=56394
2021-06-19 10:18:51 | INFO | train_inner | epoch 002: 1995 / 3002 loss=2.76, ppl=6.77, wps=5853.4, ups=0.09, wpb=64837, bsz=128, num_updates=4964, lr=9.99683e-05, gnorm=2.471, loss_scale=4, train_wall=11, gb_free=2.8, wall=56405
2021-06-19 10:19:02 | INFO | train_inner | epoch 002: 1996 / 3002 loss=2.833, ppl=7.12, wps=5904.5, ups=0.09, wpb=64878, bsz=128, num_updates=4965, lr=9.99683e-05, gnorm=4.567, loss_scale=4, train_wall=11, gb_free=2.8, wall=56416
2021-06-19 10:19:13 | INFO | train_inner | epoch 002: 1997 / 3002 loss=2.66, ppl=6.32, wps=5850.6, ups=0.09, wpb=64792, bsz=128, num_updates=4966, lr=9.99683e-05, gnorm=2.397, loss_scale=4, train_wall=11, gb_free=2.8, wall=56427
2021-06-19 10:19:24 | INFO | train_inner | epoch 002: 1998 / 3002 loss=2.745, ppl=6.71, wps=5895.4, ups=0.09, wpb=64796, bsz=128, num_updates=4967, lr=9.99683e-05, gnorm=2.377, loss_scale=4, train_wall=11, gb_free=2.8, wall=56438
2021-06-19 10:19:35 | INFO | train_inner | epoch 002: 1999 / 3002 loss=2.816, ppl=7.04, wps=5889.8, ups=0.09, wpb=64743, bsz=128, num_updates=4968, lr=9.99683e-05, gnorm=3.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=56449
2021-06-19 10:19:46 | INFO | train_inner | epoch 002: 2000 / 3002 loss=2.825, ppl=7.09, wps=5788.4, ups=0.09, wpb=64834, bsz=128, num_updates=4969, lr=9.99682e-05, gnorm=4.528, loss_scale=4, train_wall=11, gb_free=2.8, wall=56460
2021-06-19 10:19:57 | INFO | train_inner | epoch 002: 2001 / 3002 loss=2.791, ppl=6.92, wps=5822.9, ups=0.09, wpb=64783, bsz=128, num_updates=4970, lr=9.99682e-05, gnorm=2.49, loss_scale=4, train_wall=11, gb_free=2.8, wall=56472
2021-06-19 10:20:08 | INFO | train_inner | epoch 002: 2002 / 3002 loss=2.793, ppl=6.93, wps=5821.3, ups=0.09, wpb=64845, bsz=128, num_updates=4971, lr=9.99682e-05, gnorm=2.355, loss_scale=4, train_wall=11, gb_free=2.8, wall=56483
2021-06-19 10:20:19 | INFO | train_inner | epoch 002: 2003 / 3002 loss=2.852, ppl=7.22, wps=5862.5, ups=0.09, wpb=64779, bsz=128, num_updates=4972, lr=9.99682e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=56494
2021-06-19 10:20:30 | INFO | train_inner | epoch 002: 2004 / 3002 loss=2.724, ppl=6.61, wps=5941.5, ups=0.09, wpb=64808, bsz=128, num_updates=4973, lr=9.99682e-05, gnorm=6.989, loss_scale=4, train_wall=10, gb_free=2.8, wall=56505
2021-06-19 10:20:41 | INFO | train_inner | epoch 002: 2005 / 3002 loss=2.684, ppl=6.43, wps=5894.8, ups=0.09, wpb=64829, bsz=128, num_updates=4974, lr=9.99682e-05, gnorm=2.616, loss_scale=4, train_wall=11, gb_free=2.8, wall=56516
2021-06-19 10:20:53 | INFO | train_inner | epoch 002: 2006 / 3002 loss=2.632, ppl=6.2, wps=5749.7, ups=0.09, wpb=64822, bsz=128, num_updates=4975, lr=9.99682e-05, gnorm=2.432, loss_scale=4, train_wall=11, gb_free=2.8, wall=56527
2021-06-19 10:21:04 | INFO | train_inner | epoch 002: 2007 / 3002 loss=2.732, ppl=6.64, wps=5880.5, ups=0.09, wpb=64857, bsz=128, num_updates=4976, lr=9.99682e-05, gnorm=2.497, loss_scale=4, train_wall=11, gb_free=2.8, wall=56538
2021-06-19 10:21:15 | INFO | train_inner | epoch 002: 2008 / 3002 loss=2.681, ppl=6.42, wps=5802, ups=0.09, wpb=64812, bsz=128, num_updates=4977, lr=9.99682e-05, gnorm=2.419, loss_scale=4, train_wall=11, gb_free=2.8, wall=56549
2021-06-19 10:21:26 | INFO | train_inner | epoch 002: 2009 / 3002 loss=2.609, ppl=6.1, wps=5776.4, ups=0.09, wpb=64892, bsz=128, num_updates=4978, lr=9.99682e-05, gnorm=2.515, loss_scale=4, train_wall=11, gb_free=2.8, wall=56560
2021-06-19 10:21:37 | INFO | train_inner | epoch 002: 2010 / 3002 loss=2.852, ppl=7.22, wps=5915.6, ups=0.09, wpb=64860, bsz=128, num_updates=4979, lr=9.99682e-05, gnorm=2.775, loss_scale=4, train_wall=11, gb_free=2.8, wall=56571
2021-06-19 10:21:48 | INFO | train_inner | epoch 002: 2011 / 3002 loss=2.796, ppl=6.95, wps=5924.9, ups=0.09, wpb=64733, bsz=128, num_updates=4980, lr=9.99682e-05, gnorm=2.765, loss_scale=4, train_wall=10, gb_free=2.8, wall=56582
2021-06-19 10:21:59 | INFO | train_inner | epoch 002: 2012 / 3002 loss=2.694, ppl=6.47, wps=5863.9, ups=0.09, wpb=64909, bsz=128, num_updates=4981, lr=9.99681e-05, gnorm=2.502, loss_scale=4, train_wall=11, gb_free=2.8, wall=56593
2021-06-19 10:22:10 | INFO | train_inner | epoch 002: 2013 / 3002 loss=2.733, ppl=6.65, wps=5849.6, ups=0.09, wpb=64866, bsz=128, num_updates=4982, lr=9.99681e-05, gnorm=3.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=56604
2021-06-19 10:22:21 | INFO | train_inner | epoch 002: 2014 / 3002 loss=2.662, ppl=6.33, wps=5835.7, ups=0.09, wpb=64880, bsz=128, num_updates=4983, lr=9.99681e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=56616
2021-06-19 10:22:32 | INFO | train_inner | epoch 002: 2015 / 3002 loss=2.743, ppl=6.69, wps=5893.7, ups=0.09, wpb=64848, bsz=128, num_updates=4984, lr=9.99681e-05, gnorm=2.399, loss_scale=4, train_wall=11, gb_free=2.8, wall=56627
2021-06-19 10:22:43 | INFO | train_inner | epoch 002: 2016 / 3002 loss=2.701, ppl=6.5, wps=5939.3, ups=0.09, wpb=64846, bsz=128, num_updates=4985, lr=9.99681e-05, gnorm=2.472, loss_scale=4, train_wall=10, gb_free=2.8, wall=56637
2021-06-19 10:22:54 | INFO | train_inner | epoch 002: 2017 / 3002 loss=2.669, ppl=6.36, wps=5818.2, ups=0.09, wpb=64864, bsz=128, num_updates=4986, lr=9.99681e-05, gnorm=2.435, loss_scale=4, train_wall=11, gb_free=2.8, wall=56649
2021-06-19 10:23:05 | INFO | train_inner | epoch 002: 2018 / 3002 loss=2.733, ppl=6.65, wps=5783.3, ups=0.09, wpb=64806, bsz=128, num_updates=4987, lr=9.99681e-05, gnorm=2.641, loss_scale=4, train_wall=11, gb_free=2.8, wall=56660
2021-06-19 10:23:17 | INFO | train_inner | epoch 002: 2019 / 3002 loss=2.642, ppl=6.24, wps=5776.9, ups=0.09, wpb=64749, bsz=128, num_updates=4988, lr=9.99681e-05, gnorm=2.843, loss_scale=4, train_wall=11, gb_free=2.8, wall=56671
2021-06-19 10:23:28 | INFO | train_inner | epoch 002: 2020 / 3002 loss=2.677, ppl=6.4, wps=5921.2, ups=0.09, wpb=64874, bsz=128, num_updates=4989, lr=9.99681e-05, gnorm=2.314, loss_scale=4, train_wall=10, gb_free=2.8, wall=56682
2021-06-19 10:23:39 | INFO | train_inner | epoch 002: 2021 / 3002 loss=2.826, ppl=7.09, wps=5862.3, ups=0.09, wpb=64916, bsz=128, num_updates=4990, lr=9.99681e-05, gnorm=8.448, loss_scale=4, train_wall=11, gb_free=2.8, wall=56693
2021-06-19 10:23:50 | INFO | train_inner | epoch 002: 2022 / 3002 loss=2.748, ppl=6.72, wps=5759.7, ups=0.09, wpb=64821, bsz=128, num_updates=4991, lr=9.99681e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=56704
2021-06-19 10:24:01 | INFO | train_inner | epoch 002: 2023 / 3002 loss=2.711, ppl=6.55, wps=5845.1, ups=0.09, wpb=64885, bsz=128, num_updates=4992, lr=9.99681e-05, gnorm=3.882, loss_scale=4, train_wall=11, gb_free=2.8, wall=56715
2021-06-19 10:24:12 | INFO | train_inner | epoch 002: 2024 / 3002 loss=2.859, ppl=7.26, wps=5939.2, ups=0.09, wpb=64901, bsz=128, num_updates=4993, lr=9.99681e-05, gnorm=2.614, loss_scale=4, train_wall=10, gb_free=2.8, wall=56726
2021-06-19 10:24:23 | INFO | train_inner | epoch 002: 2025 / 3002 loss=2.736, ppl=6.66, wps=5904.7, ups=0.09, wpb=64857, bsz=128, num_updates=4994, lr=9.9968e-05, gnorm=2.792, loss_scale=4, train_wall=11, gb_free=2.8, wall=56737
2021-06-19 10:24:34 | INFO | train_inner | epoch 002: 2026 / 3002 loss=2.669, ppl=6.36, wps=5889.3, ups=0.09, wpb=64793, bsz=128, num_updates=4995, lr=9.9968e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=56748
2021-06-19 10:24:45 | INFO | train_inner | epoch 002: 2027 / 3002 loss=2.614, ppl=6.12, wps=5960.6, ups=0.09, wpb=64822, bsz=128, num_updates=4996, lr=9.9968e-05, gnorm=2.572, loss_scale=4, train_wall=10, gb_free=2.8, wall=56759
2021-06-19 10:24:56 | INFO | train_inner | epoch 002: 2028 / 3002 loss=2.773, ppl=6.84, wps=5738.9, ups=0.09, wpb=64754, bsz=128, num_updates=4997, lr=9.9968e-05, gnorm=4.472, loss_scale=4, train_wall=11, gb_free=2.8, wall=56770
2021-06-19 10:25:07 | INFO | train_inner | epoch 002: 2029 / 3002 loss=2.899, ppl=7.46, wps=5862, ups=0.09, wpb=64807, bsz=128, num_updates=4998, lr=9.9968e-05, gnorm=2.47, loss_scale=4, train_wall=11, gb_free=2.8, wall=56782
2021-06-19 10:25:18 | INFO | train_inner | epoch 002: 2030 / 3002 loss=2.856, ppl=7.24, wps=5946.6, ups=0.09, wpb=64942, bsz=128, num_updates=4999, lr=9.9968e-05, gnorm=8.935, loss_scale=4, train_wall=10, gb_free=2.8, wall=56792
2021-06-19 10:25:29 | INFO | train_inner | epoch 002: 2031 / 3002 loss=2.819, ppl=7.05, wps=5810.9, ups=0.09, wpb=64835, bsz=128, num_updates=5000, lr=9.9968e-05, gnorm=2.618, loss_scale=4, train_wall=11, gb_free=2.8, wall=56804
2021-06-19 10:25:40 | INFO | train_inner | epoch 002: 2032 / 3002 loss=2.722, ppl=6.6, wps=5812.9, ups=0.09, wpb=64738, bsz=128, num_updates=5001, lr=9.9968e-05, gnorm=3.183, loss_scale=4, train_wall=11, gb_free=2.8, wall=56815
2021-06-19 10:25:52 | INFO | train_inner | epoch 002: 2033 / 3002 loss=2.655, ppl=6.3, wps=5832.8, ups=0.09, wpb=64813, bsz=128, num_updates=5002, lr=9.9968e-05, gnorm=2.513, loss_scale=4, train_wall=11, gb_free=2.8, wall=56826
2021-06-19 10:26:02 | INFO | train_inner | epoch 002: 2034 / 3002 loss=2.597, ppl=6.05, wps=5934.4, ups=0.09, wpb=64840, bsz=128, num_updates=5003, lr=9.9968e-05, gnorm=2.379, loss_scale=4, train_wall=10, gb_free=2.8, wall=56837
2021-06-19 10:26:14 | INFO | train_inner | epoch 002: 2035 / 3002 loss=2.725, ppl=6.61, wps=5754.6, ups=0.09, wpb=64852, bsz=128, num_updates=5004, lr=9.9968e-05, gnorm=4.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=56848
2021-06-19 10:26:25 | INFO | train_inner | epoch 002: 2036 / 3002 loss=2.569, ppl=5.93, wps=5905.1, ups=0.09, wpb=64865, bsz=128, num_updates=5005, lr=9.9968e-05, gnorm=2.624, loss_scale=4, train_wall=11, gb_free=2.8, wall=56859
2021-06-19 10:26:36 | INFO | train_inner | epoch 002: 2037 / 3002 loss=2.626, ppl=6.17, wps=5802.4, ups=0.09, wpb=64768, bsz=128, num_updates=5006, lr=9.99679e-05, gnorm=2.568, loss_scale=4, train_wall=11, gb_free=2.8, wall=56870
2021-06-19 10:26:47 | INFO | train_inner | epoch 002: 2038 / 3002 loss=2.766, ppl=6.8, wps=5877.5, ups=0.09, wpb=64789, bsz=128, num_updates=5007, lr=9.99679e-05, gnorm=2.498, loss_scale=4, train_wall=11, gb_free=2.8, wall=56881
2021-06-19 10:26:58 | INFO | train_inner | epoch 002: 2039 / 3002 loss=2.661, ppl=6.32, wps=5823.1, ups=0.09, wpb=64785, bsz=128, num_updates=5008, lr=9.99679e-05, gnorm=2.414, loss_scale=4, train_wall=11, gb_free=2.8, wall=56892
2021-06-19 10:27:09 | INFO | train_inner | epoch 002: 2040 / 3002 loss=2.632, ppl=6.2, wps=5895.9, ups=0.09, wpb=64868, bsz=128, num_updates=5009, lr=9.99679e-05, gnorm=2.335, loss_scale=4, train_wall=11, gb_free=2.8, wall=56903
2021-06-19 10:27:20 | INFO | train_inner | epoch 002: 2041 / 3002 loss=2.823, ppl=7.08, wps=5869.1, ups=0.09, wpb=64782, bsz=128, num_updates=5010, lr=9.99679e-05, gnorm=5.294, loss_scale=4, train_wall=11, gb_free=2.8, wall=56914
2021-06-19 10:27:31 | INFO | train_inner | epoch 002: 2042 / 3002 loss=2.841, ppl=7.16, wps=5860.6, ups=0.09, wpb=64818, bsz=128, num_updates=5011, lr=9.99679e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=56925
2021-06-19 10:27:42 | INFO | train_inner | epoch 002: 2043 / 3002 loss=2.629, ppl=6.19, wps=5884.2, ups=0.09, wpb=64857, bsz=128, num_updates=5012, lr=9.99679e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=56936
2021-06-19 10:27:53 | INFO | train_inner | epoch 002: 2044 / 3002 loss=2.666, ppl=6.35, wps=5904.5, ups=0.09, wpb=64876, bsz=128, num_updates=5013, lr=9.99679e-05, gnorm=2.378, loss_scale=4, train_wall=11, gb_free=2.8, wall=56947
2021-06-19 10:28:04 | INFO | train_inner | epoch 002: 2045 / 3002 loss=2.767, ppl=6.81, wps=5883.7, ups=0.09, wpb=64823, bsz=128, num_updates=5014, lr=9.99679e-05, gnorm=2.327, loss_scale=4, train_wall=11, gb_free=2.8, wall=56958
2021-06-19 10:28:15 | INFO | train_inner | epoch 002: 2046 / 3002 loss=2.717, ppl=6.57, wps=5924, ups=0.09, wpb=64817, bsz=128, num_updates=5015, lr=9.99679e-05, gnorm=2.452, loss_scale=4, train_wall=11, gb_free=2.8, wall=56969
2021-06-19 10:28:26 | INFO | train_inner | epoch 002: 2047 / 3002 loss=2.675, ppl=6.39, wps=5949.7, ups=0.09, wpb=64824, bsz=128, num_updates=5016, lr=9.99679e-05, gnorm=2.409, loss_scale=4, train_wall=10, gb_free=2.8, wall=56980
2021-06-19 10:28:37 | INFO | train_inner | epoch 002: 2048 / 3002 loss=2.652, ppl=6.28, wps=5807.4, ups=0.09, wpb=64814, bsz=128, num_updates=5017, lr=9.99679e-05, gnorm=2.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=56991
2021-06-19 10:28:49 | INFO | train_inner | epoch 002: 2049 / 3002 loss=2.771, ppl=6.83, wps=5688.4, ups=0.09, wpb=64837, bsz=128, num_updates=5018, lr=9.99679e-05, gnorm=2.284, loss_scale=4, train_wall=11, gb_free=2.8, wall=57003
2021-06-19 10:28:59 | INFO | train_inner | epoch 002: 2050 / 3002 loss=2.591, ppl=6.03, wps=5919.8, ups=0.09, wpb=64884, bsz=128, num_updates=5019, lr=9.99678e-05, gnorm=2.645, loss_scale=4, train_wall=10, gb_free=2.8, wall=57014
2021-06-19 10:29:11 | INFO | train_inner | epoch 002: 2051 / 3002 loss=2.643, ppl=6.25, wps=5862.5, ups=0.09, wpb=64783, bsz=128, num_updates=5020, lr=9.99678e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=57025
2021-06-19 10:29:22 | INFO | train_inner | epoch 002: 2052 / 3002 loss=2.674, ppl=6.38, wps=5785.9, ups=0.09, wpb=64827, bsz=128, num_updates=5021, lr=9.99678e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=57036
2021-06-19 10:29:33 | INFO | train_inner | epoch 002: 2053 / 3002 loss=2.725, ppl=6.61, wps=5719.5, ups=0.09, wpb=64811, bsz=128, num_updates=5022, lr=9.99678e-05, gnorm=2.316, loss_scale=4, train_wall=11, gb_free=2.8, wall=57047
2021-06-19 10:29:44 | INFO | train_inner | epoch 002: 2054 / 3002 loss=2.912, ppl=7.53, wps=5879.1, ups=0.09, wpb=64774, bsz=128, num_updates=5023, lr=9.99678e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=57058
2021-06-19 10:29:55 | INFO | train_inner | epoch 002: 2055 / 3002 loss=2.719, ppl=6.59, wps=5806.9, ups=0.09, wpb=64745, bsz=128, num_updates=5024, lr=9.99678e-05, gnorm=2.389, loss_scale=4, train_wall=11, gb_free=2.8, wall=57070
2021-06-19 10:30:06 | INFO | train_inner | epoch 002: 2056 / 3002 loss=2.766, ppl=6.8, wps=5870.7, ups=0.09, wpb=64737, bsz=128, num_updates=5025, lr=9.99678e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=57081
2021-06-19 10:30:17 | INFO | train_inner | epoch 002: 2057 / 3002 loss=2.887, ppl=7.4, wps=5775.4, ups=0.09, wpb=64790, bsz=128, num_updates=5026, lr=9.99678e-05, gnorm=2.308, loss_scale=4, train_wall=11, gb_free=2.8, wall=57092
2021-06-19 10:30:29 | INFO | train_inner | epoch 002: 2058 / 3002 loss=2.589, ppl=6.02, wps=5782.6, ups=0.09, wpb=64837, bsz=128, num_updates=5027, lr=9.99678e-05, gnorm=2.634, loss_scale=4, train_wall=11, gb_free=2.8, wall=57103
2021-06-19 10:30:40 | INFO | train_inner | epoch 002: 2059 / 3002 loss=2.678, ppl=6.4, wps=5908, ups=0.09, wpb=64903, bsz=128, num_updates=5028, lr=9.99678e-05, gnorm=2.315, loss_scale=4, train_wall=11, gb_free=2.8, wall=57114
2021-06-19 10:30:51 | INFO | train_inner | epoch 002: 2060 / 3002 loss=2.715, ppl=6.56, wps=5727, ups=0.09, wpb=64885, bsz=128, num_updates=5029, lr=9.99678e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=57125
2021-06-19 10:31:02 | INFO | train_inner | epoch 002: 2061 / 3002 loss=2.684, ppl=6.43, wps=5741.1, ups=0.09, wpb=64826, bsz=128, num_updates=5030, lr=9.99678e-05, gnorm=2.691, loss_scale=4, train_wall=11, gb_free=2.8, wall=57137
2021-06-19 10:31:13 | INFO | train_inner | epoch 002: 2062 / 3002 loss=2.954, ppl=7.75, wps=5841.4, ups=0.09, wpb=64824, bsz=128, num_updates=5031, lr=9.99677e-05, gnorm=2.538, loss_scale=4, train_wall=11, gb_free=2.8, wall=57148
2021-06-19 10:31:24 | INFO | train_inner | epoch 002: 2063 / 3002 loss=2.906, ppl=7.5, wps=5926.4, ups=0.09, wpb=64879, bsz=128, num_updates=5032, lr=9.99677e-05, gnorm=3.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=57159
2021-06-19 10:31:35 | INFO | train_inner | epoch 002: 2064 / 3002 loss=2.708, ppl=6.53, wps=5922, ups=0.09, wpb=64861, bsz=128, num_updates=5033, lr=9.99677e-05, gnorm=2.329, loss_scale=4, train_wall=10, gb_free=2.8, wall=57170
2021-06-19 10:31:46 | INFO | train_inner | epoch 002: 2065 / 3002 loss=2.639, ppl=6.23, wps=5975.7, ups=0.09, wpb=64800, bsz=128, num_updates=5034, lr=9.99677e-05, gnorm=2.239, loss_scale=4, train_wall=10, gb_free=2.8, wall=57180
2021-06-19 10:31:57 | INFO | train_inner | epoch 002: 2066 / 3002 loss=2.816, ppl=7.04, wps=5862.8, ups=0.09, wpb=64851, bsz=128, num_updates=5035, lr=9.99677e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=57192
2021-06-19 10:32:08 | INFO | train_inner | epoch 002: 2067 / 3002 loss=2.696, ppl=6.48, wps=5945.8, ups=0.09, wpb=64939, bsz=128, num_updates=5036, lr=9.99677e-05, gnorm=2.471, loss_scale=4, train_wall=10, gb_free=2.8, wall=57202
2021-06-19 10:32:19 | INFO | train_inner | epoch 002: 2068 / 3002 loss=2.741, ppl=6.69, wps=5822.7, ups=0.09, wpb=64861, bsz=128, num_updates=5037, lr=9.99677e-05, gnorm=2.272, loss_scale=4, train_wall=11, gb_free=2.8, wall=57214
2021-06-19 10:32:30 | INFO | train_inner | epoch 002: 2069 / 3002 loss=2.587, ppl=6.01, wps=6059.6, ups=0.09, wpb=64898, bsz=128, num_updates=5038, lr=9.99677e-05, gnorm=2.312, loss_scale=4, train_wall=10, gb_free=2.8, wall=57224
2021-06-19 10:32:41 | INFO | train_inner | epoch 002: 2070 / 3002 loss=2.796, ppl=6.95, wps=5848, ups=0.09, wpb=64858, bsz=128, num_updates=5039, lr=9.99677e-05, gnorm=2.379, loss_scale=4, train_wall=11, gb_free=2.8, wall=57235
2021-06-19 10:32:52 | INFO | train_inner | epoch 002: 2071 / 3002 loss=2.718, ppl=6.58, wps=5897.7, ups=0.09, wpb=64915, bsz=128, num_updates=5040, lr=9.99677e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=57246
2021-06-19 10:33:03 | INFO | train_inner | epoch 002: 2072 / 3002 loss=2.651, ppl=6.28, wps=5941.7, ups=0.09, wpb=64879, bsz=128, num_updates=5041, lr=9.99677e-05, gnorm=2.225, loss_scale=4, train_wall=10, gb_free=2.8, wall=57257
2021-06-19 10:33:14 | INFO | train_inner | epoch 002: 2073 / 3002 loss=2.73, ppl=6.64, wps=5839.7, ups=0.09, wpb=64807, bsz=128, num_updates=5042, lr=9.99677e-05, gnorm=2.563, loss_scale=4, train_wall=11, gb_free=2.8, wall=57268
2021-06-19 10:33:25 | INFO | train_inner | epoch 002: 2074 / 3002 loss=2.726, ppl=6.62, wps=5752.1, ups=0.09, wpb=64793, bsz=128, num_updates=5043, lr=9.99677e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=57280
2021-06-19 10:33:37 | INFO | train_inner | epoch 002: 2075 / 3002 loss=2.615, ppl=6.13, wps=5806.7, ups=0.09, wpb=64810, bsz=128, num_updates=5044, lr=9.99676e-05, gnorm=2.587, loss_scale=4, train_wall=11, gb_free=2.8, wall=57291
2021-06-19 10:33:48 | INFO | train_inner | epoch 002: 2076 / 3002 loss=2.789, ppl=6.91, wps=5706.6, ups=0.09, wpb=64809, bsz=128, num_updates=5045, lr=9.99676e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=57302
2021-06-19 10:33:59 | INFO | train_inner | epoch 002: 2077 / 3002 loss=2.786, ppl=6.9, wps=5796.3, ups=0.09, wpb=64745, bsz=128, num_updates=5046, lr=9.99676e-05, gnorm=2.335, loss_scale=4, train_wall=11, gb_free=2.8, wall=57313
2021-06-19 10:34:10 | INFO | train_inner | epoch 002: 2078 / 3002 loss=2.966, ppl=7.81, wps=5808.9, ups=0.09, wpb=64821, bsz=128, num_updates=5047, lr=9.99676e-05, gnorm=2.321, loss_scale=4, train_wall=11, gb_free=2.8, wall=57325
2021-06-19 10:34:21 | INFO | train_inner | epoch 002: 2079 / 3002 loss=2.62, ppl=6.15, wps=5858.2, ups=0.09, wpb=64840, bsz=128, num_updates=5048, lr=9.99676e-05, gnorm=2.272, loss_scale=8, train_wall=11, gb_free=2.8, wall=57336
2021-06-19 10:34:32 | INFO | train_inner | epoch 002: 2080 / 3002 loss=2.667, ppl=6.35, wps=5866, ups=0.09, wpb=64827, bsz=128, num_updates=5049, lr=9.99676e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=57347
2021-06-19 10:34:44 | INFO | train_inner | epoch 002: 2081 / 3002 loss=2.802, ppl=6.97, wps=5794.4, ups=0.09, wpb=64814, bsz=128, num_updates=5050, lr=9.99676e-05, gnorm=2.283, loss_scale=8, train_wall=11, gb_free=2.8, wall=57358
2021-06-19 10:34:54 | INFO | train_inner | epoch 002: 2082 / 3002 loss=2.703, ppl=6.51, wps=5971, ups=0.09, wpb=64880, bsz=128, num_updates=5051, lr=9.99676e-05, gnorm=2.386, loss_scale=8, train_wall=10, gb_free=2.8, wall=57369
2021-06-19 10:35:06 | INFO | train_inner | epoch 002: 2083 / 3002 loss=2.767, ppl=6.81, wps=5731.5, ups=0.09, wpb=64868, bsz=128, num_updates=5052, lr=9.99676e-05, gnorm=2.317, loss_scale=8, train_wall=11, gb_free=2.8, wall=57380
2021-06-19 10:35:17 | INFO | train_inner | epoch 002: 2084 / 3002 loss=2.669, ppl=6.36, wps=5780.6, ups=0.09, wpb=64818, bsz=128, num_updates=5053, lr=9.99676e-05, gnorm=2.627, loss_scale=8, train_wall=11, gb_free=2.8, wall=57391
2021-06-19 10:35:28 | INFO | train_inner | epoch 002: 2085 / 3002 loss=2.895, ppl=7.44, wps=5952.8, ups=0.09, wpb=64915, bsz=128, num_updates=5054, lr=9.99676e-05, gnorm=2.215, loss_scale=8, train_wall=10, gb_free=2.8, wall=57402
2021-06-19 10:35:39 | INFO | train_inner | epoch 002: 2086 / 3002 loss=2.751, ppl=6.73, wps=5903.4, ups=0.09, wpb=64880, bsz=128, num_updates=5055, lr=9.99676e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=57413
2021-06-19 10:35:50 | INFO | train_inner | epoch 002: 2087 / 3002 loss=2.605, ppl=6.08, wps=5865.1, ups=0.09, wpb=64852, bsz=128, num_updates=5056, lr=9.99675e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=57424
2021-06-19 10:36:01 | INFO | train_inner | epoch 002: 2088 / 3002 loss=2.74, ppl=6.68, wps=5759.8, ups=0.09, wpb=64817, bsz=128, num_updates=5057, lr=9.99675e-05, gnorm=2.235, loss_scale=8, train_wall=11, gb_free=2.8, wall=57435
2021-06-19 10:36:12 | INFO | train_inner | epoch 002: 2089 / 3002 loss=2.894, ppl=7.43, wps=5852.2, ups=0.09, wpb=64841, bsz=128, num_updates=5058, lr=9.99675e-05, gnorm=2.265, loss_scale=8, train_wall=11, gb_free=2.8, wall=57447
2021-06-19 10:36:23 | INFO | train_inner | epoch 002: 2090 / 3002 loss=2.766, ppl=6.8, wps=5889.4, ups=0.09, wpb=64812, bsz=128, num_updates=5059, lr=9.99675e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=57458
2021-06-19 10:36:34 | INFO | train_inner | epoch 002: 2091 / 3002 loss=2.889, ppl=7.41, wps=5755, ups=0.09, wpb=64892, bsz=128, num_updates=5060, lr=9.99675e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=57469
2021-06-19 10:36:46 | INFO | train_inner | epoch 002: 2092 / 3002 loss=2.814, ppl=7.03, wps=5835.6, ups=0.09, wpb=64779, bsz=128, num_updates=5061, lr=9.99675e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=57480
2021-06-19 10:36:57 | INFO | train_inner | epoch 002: 2093 / 3002 loss=2.704, ppl=6.52, wps=5893.3, ups=0.09, wpb=64769, bsz=128, num_updates=5062, lr=9.99675e-05, gnorm=2.495, loss_scale=8, train_wall=11, gb_free=2.8, wall=57491
2021-06-19 10:37:08 | INFO | train_inner | epoch 002: 2094 / 3002 loss=2.707, ppl=6.53, wps=5817.5, ups=0.09, wpb=64772, bsz=128, num_updates=5063, lr=9.99675e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=57502
2021-06-19 10:37:19 | INFO | train_inner | epoch 002: 2095 / 3002 loss=2.688, ppl=6.44, wps=5824.9, ups=0.09, wpb=64791, bsz=128, num_updates=5064, lr=9.99675e-05, gnorm=2.469, loss_scale=8, train_wall=11, gb_free=2.8, wall=57513
2021-06-19 10:37:30 | INFO | train_inner | epoch 002: 2096 / 3002 loss=2.648, ppl=6.27, wps=5848.8, ups=0.09, wpb=64892, bsz=128, num_updates=5065, lr=9.99675e-05, gnorm=2.25, loss_scale=8, train_wall=11, gb_free=2.8, wall=57524
2021-06-19 10:37:41 | INFO | train_inner | epoch 002: 2097 / 3002 loss=2.762, ppl=6.78, wps=5835.6, ups=0.09, wpb=64842, bsz=128, num_updates=5066, lr=9.99675e-05, gnorm=2.51, loss_scale=8, train_wall=11, gb_free=2.8, wall=57535
2021-06-19 10:37:52 | INFO | train_inner | epoch 002: 2098 / 3002 loss=2.832, ppl=7.12, wps=5851.7, ups=0.09, wpb=64800, bsz=128, num_updates=5067, lr=9.99675e-05, gnorm=2.353, loss_scale=8, train_wall=11, gb_free=2.8, wall=57546
2021-06-19 10:38:03 | INFO | train_inner | epoch 002: 2099 / 3002 loss=2.744, ppl=6.7, wps=5873.8, ups=0.09, wpb=64846, bsz=128, num_updates=5068, lr=9.99675e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=57557
2021-06-19 10:38:14 | INFO | train_inner | epoch 002: 2100 / 3002 loss=2.604, ppl=6.08, wps=5819.9, ups=0.09, wpb=64843, bsz=128, num_updates=5069, lr=9.99674e-05, gnorm=2.411, loss_scale=8, train_wall=11, gb_free=2.8, wall=57569
2021-06-19 10:38:25 | INFO | train_inner | epoch 002: 2101 / 3002 loss=2.685, ppl=6.43, wps=5846, ups=0.09, wpb=64845, bsz=128, num_updates=5070, lr=9.99674e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=57580
2021-06-19 10:38:37 | INFO | train_inner | epoch 002: 2102 / 3002 loss=2.707, ppl=6.53, wps=5805.3, ups=0.09, wpb=64848, bsz=128, num_updates=5071, lr=9.99674e-05, gnorm=2.619, loss_scale=8, train_wall=11, gb_free=2.8, wall=57591
2021-06-19 10:38:48 | INFO | train_inner | epoch 002: 2103 / 3002 loss=2.758, ppl=6.77, wps=5776.9, ups=0.09, wpb=64774, bsz=128, num_updates=5072, lr=9.99674e-05, gnorm=2.315, loss_scale=8, train_wall=11, gb_free=2.8, wall=57602
2021-06-19 10:38:59 | INFO | train_inner | epoch 002: 2104 / 3002 loss=2.881, ppl=7.37, wps=5864.4, ups=0.09, wpb=64881, bsz=128, num_updates=5073, lr=9.99674e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=57613
2021-06-19 10:39:10 | INFO | train_inner | epoch 002: 2105 / 3002 loss=2.686, ppl=6.44, wps=5771.4, ups=0.09, wpb=64783, bsz=128, num_updates=5074, lr=9.99674e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=57624
2021-06-19 10:39:21 | INFO | train_inner | epoch 002: 2106 / 3002 loss=2.822, ppl=7.07, wps=5878.5, ups=0.09, wpb=64772, bsz=128, num_updates=5075, lr=9.99674e-05, gnorm=2.248, loss_scale=8, train_wall=11, gb_free=2.8, wall=57635
2021-06-19 10:39:32 | INFO | train_inner | epoch 002: 2107 / 3002 loss=2.869, ppl=7.31, wps=5922.3, ups=0.09, wpb=64813, bsz=128, num_updates=5076, lr=9.99674e-05, gnorm=2.154, loss_scale=8, train_wall=10, gb_free=2.8, wall=57646
2021-06-19 10:39:43 | INFO | train_inner | epoch 002: 2108 / 3002 loss=2.751, ppl=6.73, wps=5827.1, ups=0.09, wpb=64832, bsz=128, num_updates=5077, lr=9.99674e-05, gnorm=5.683, loss_scale=8, train_wall=11, gb_free=2.8, wall=57657
2021-06-19 10:39:54 | INFO | train_inner | epoch 002: 2109 / 3002 loss=2.777, ppl=6.85, wps=5793.1, ups=0.09, wpb=64751, bsz=128, num_updates=5078, lr=9.99674e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=57669
2021-06-19 10:40:05 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 10:40:17 | INFO | train_inner | epoch 002: 2111 / 3002 loss=2.678, ppl=6.4, wps=2924.4, ups=0.05, wpb=64853, bsz=128, num_updates=5079, lr=9.99674e-05, gnorm=2.155, loss_scale=4, train_wall=21, gb_free=2.8, wall=57691
2021-06-19 10:40:28 | INFO | train_inner | epoch 002: 2112 / 3002 loss=2.741, ppl=6.69, wps=5774.1, ups=0.09, wpb=64821, bsz=128, num_updates=5080, lr=9.99674e-05, gnorm=2.309, loss_scale=4, train_wall=11, gb_free=2.8, wall=57702
2021-06-19 10:40:39 | INFO | train_inner | epoch 002: 2113 / 3002 loss=2.645, ppl=6.26, wps=5888, ups=0.09, wpb=64773, bsz=128, num_updates=5081, lr=9.99673e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=57713
2021-06-19 10:40:50 | INFO | train_inner | epoch 002: 2114 / 3002 loss=2.485, ppl=5.6, wps=5860.5, ups=0.09, wpb=64900, bsz=128, num_updates=5082, lr=9.99673e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=57724
2021-06-19 10:41:01 | INFO | train_inner | epoch 002: 2115 / 3002 loss=2.919, ppl=7.56, wps=5933.6, ups=0.09, wpb=64812, bsz=128, num_updates=5083, lr=9.99673e-05, gnorm=2.261, loss_scale=4, train_wall=10, gb_free=2.8, wall=57735
2021-06-19 10:41:12 | INFO | train_inner | epoch 002: 2116 / 3002 loss=2.731, ppl=6.64, wps=5858.7, ups=0.09, wpb=64815, bsz=128, num_updates=5084, lr=9.99673e-05, gnorm=2.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=57746
2021-06-19 10:41:23 | INFO | train_inner | epoch 002: 2117 / 3002 loss=2.693, ppl=6.47, wps=5844, ups=0.09, wpb=64798, bsz=128, num_updates=5085, lr=9.99673e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=57757
2021-06-19 10:41:34 | INFO | train_inner | epoch 002: 2118 / 3002 loss=2.711, ppl=6.55, wps=6017.1, ups=0.09, wpb=64905, bsz=128, num_updates=5086, lr=9.99673e-05, gnorm=2.176, loss_scale=4, train_wall=10, gb_free=2.8, wall=57768
2021-06-19 10:41:45 | INFO | train_inner | epoch 002: 2119 / 3002 loss=2.647, ppl=6.26, wps=5842.5, ups=0.09, wpb=64869, bsz=128, num_updates=5087, lr=9.99673e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=57779
2021-06-19 10:41:56 | INFO | train_inner | epoch 002: 2120 / 3002 loss=2.79, ppl=6.92, wps=5899.4, ups=0.09, wpb=64853, bsz=128, num_updates=5088, lr=9.99673e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=57790
2021-06-19 10:42:07 | INFO | train_inner | epoch 002: 2121 / 3002 loss=2.853, ppl=7.22, wps=5944, ups=0.09, wpb=64805, bsz=128, num_updates=5089, lr=9.99673e-05, gnorm=2.269, loss_scale=4, train_wall=10, gb_free=2.8, wall=57801
2021-06-19 10:42:18 | INFO | train_inner | epoch 002: 2122 / 3002 loss=2.865, ppl=7.29, wps=5887.2, ups=0.09, wpb=64860, bsz=128, num_updates=5090, lr=9.99673e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=57812
2021-06-19 10:42:29 | INFO | train_inner | epoch 002: 2123 / 3002 loss=2.791, ppl=6.92, wps=5941, ups=0.09, wpb=64792, bsz=128, num_updates=5091, lr=9.99673e-05, gnorm=2.226, loss_scale=4, train_wall=10, gb_free=2.8, wall=57823
2021-06-19 10:42:40 | INFO | train_inner | epoch 002: 2124 / 3002 loss=2.701, ppl=6.5, wps=5793, ups=0.09, wpb=64806, bsz=128, num_updates=5092, lr=9.99673e-05, gnorm=2.647, loss_scale=4, train_wall=11, gb_free=2.8, wall=57834
2021-06-19 10:42:51 | INFO | train_inner | epoch 002: 2125 / 3002 loss=2.739, ppl=6.68, wps=5771.3, ups=0.09, wpb=64817, bsz=128, num_updates=5093, lr=9.99673e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=57845
2021-06-19 10:43:02 | INFO | train_inner | epoch 002: 2126 / 3002 loss=2.716, ppl=6.57, wps=5836.6, ups=0.09, wpb=64820, bsz=128, num_updates=5094, lr=9.99672e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=57856
2021-06-19 10:43:13 | INFO | train_inner | epoch 002: 2127 / 3002 loss=2.673, ppl=6.38, wps=5761.2, ups=0.09, wpb=64718, bsz=128, num_updates=5095, lr=9.99672e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=57868
2021-06-19 10:43:24 | INFO | train_inner | epoch 002: 2128 / 3002 loss=2.784, ppl=6.89, wps=5838.3, ups=0.09, wpb=64788, bsz=128, num_updates=5096, lr=9.99672e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=57879
2021-06-19 10:43:35 | INFO | train_inner | epoch 002: 2129 / 3002 loss=2.69, ppl=6.45, wps=5937.5, ups=0.09, wpb=64892, bsz=128, num_updates=5097, lr=9.99672e-05, gnorm=2.183, loss_scale=4, train_wall=10, gb_free=2.8, wall=57890
2021-06-19 10:43:46 | INFO | train_inner | epoch 002: 2130 / 3002 loss=2.711, ppl=6.55, wps=5835.5, ups=0.09, wpb=64832, bsz=128, num_updates=5098, lr=9.99672e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=57901
2021-06-19 10:43:58 | INFO | train_inner | epoch 002: 2131 / 3002 loss=2.606, ppl=6.09, wps=5866.4, ups=0.09, wpb=64864, bsz=128, num_updates=5099, lr=9.99672e-05, gnorm=2.533, loss_scale=4, train_wall=11, gb_free=2.8, wall=57912
2021-06-19 10:44:09 | INFO | train_inner | epoch 002: 2132 / 3002 loss=2.694, ppl=6.47, wps=5736, ups=0.09, wpb=64832, bsz=128, num_updates=5100, lr=9.99672e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=57923
2021-06-19 10:44:20 | INFO | train_inner | epoch 002: 2133 / 3002 loss=2.635, ppl=6.21, wps=5859.6, ups=0.09, wpb=64831, bsz=128, num_updates=5101, lr=9.99672e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=57934
2021-06-19 10:44:31 | INFO | train_inner | epoch 002: 2134 / 3002 loss=2.835, ppl=7.13, wps=5744.2, ups=0.09, wpb=64852, bsz=128, num_updates=5102, lr=9.99672e-05, gnorm=2.494, loss_scale=4, train_wall=11, gb_free=2.8, wall=57946
2021-06-19 10:44:42 | INFO | train_inner | epoch 002: 2135 / 3002 loss=2.67, ppl=6.37, wps=5915.6, ups=0.09, wpb=64807, bsz=128, num_updates=5103, lr=9.99672e-05, gnorm=2.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=57956
2021-06-19 10:44:53 | INFO | train_inner | epoch 002: 2136 / 3002 loss=2.774, ppl=6.84, wps=5861.1, ups=0.09, wpb=64724, bsz=128, num_updates=5104, lr=9.99672e-05, gnorm=2.408, loss_scale=4, train_wall=11, gb_free=2.8, wall=57968
2021-06-19 10:45:04 | INFO | train_inner | epoch 002: 2137 / 3002 loss=2.74, ppl=6.68, wps=5848.9, ups=0.09, wpb=64819, bsz=128, num_updates=5105, lr=9.99672e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=57979
2021-06-19 10:45:15 | INFO | train_inner | epoch 002: 2138 / 3002 loss=2.585, ppl=6, wps=5824.7, ups=0.09, wpb=64777, bsz=128, num_updates=5106, lr=9.99671e-05, gnorm=2.231, loss_scale=4, train_wall=11, gb_free=2.8, wall=57990
2021-06-19 10:45:27 | INFO | train_inner | epoch 002: 2139 / 3002 loss=2.633, ppl=6.2, wps=5830.3, ups=0.09, wpb=64806, bsz=128, num_updates=5107, lr=9.99671e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=58001
2021-06-19 10:45:38 | INFO | train_inner | epoch 002: 2140 / 3002 loss=2.632, ppl=6.2, wps=5837.4, ups=0.09, wpb=64881, bsz=128, num_updates=5108, lr=9.99671e-05, gnorm=2.115, loss_scale=4, train_wall=11, gb_free=2.8, wall=58012
2021-06-19 10:45:49 | INFO | train_inner | epoch 002: 2141 / 3002 loss=2.633, ppl=6.2, wps=5759.5, ups=0.09, wpb=64877, bsz=128, num_updates=5109, lr=9.99671e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=58023
2021-06-19 10:46:00 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 10:46:11 | INFO | train_inner | epoch 002: 2143 / 3002 loss=2.727, ppl=6.62, wps=2942.4, ups=0.05, wpb=64778, bsz=128, num_updates=5110, lr=9.99671e-05, gnorm=2.269, loss_scale=2, train_wall=21, gb_free=2.8, wall=58045
2021-06-19 10:46:22 | INFO | train_inner | epoch 002: 2144 / 3002 loss=2.702, ppl=6.51, wps=5821.1, ups=0.09, wpb=64858, bsz=128, num_updates=5111, lr=9.99671e-05, gnorm=2.221, loss_scale=2, train_wall=11, gb_free=2.8, wall=58056
2021-06-19 10:46:33 | INFO | train_inner | epoch 002: 2145 / 3002 loss=2.839, ppl=7.16, wps=5893.5, ups=0.09, wpb=64813, bsz=128, num_updates=5112, lr=9.99671e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=58067
2021-06-19 10:46:44 | INFO | train_inner | epoch 002: 2146 / 3002 loss=2.674, ppl=6.38, wps=5936.9, ups=0.09, wpb=64831, bsz=128, num_updates=5113, lr=9.99671e-05, gnorm=2.25, loss_scale=2, train_wall=10, gb_free=2.8, wall=58078
2021-06-19 10:46:55 | INFO | train_inner | epoch 002: 2147 / 3002 loss=2.726, ppl=6.62, wps=5994.8, ups=0.09, wpb=64859, bsz=128, num_updates=5114, lr=9.99671e-05, gnorm=2.19, loss_scale=2, train_wall=10, gb_free=2.8, wall=58089
2021-06-19 10:47:06 | INFO | train_inner | epoch 002: 2148 / 3002 loss=2.731, ppl=6.64, wps=5856.7, ups=0.09, wpb=64781, bsz=128, num_updates=5115, lr=9.99671e-05, gnorm=2.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=58100
2021-06-19 10:47:17 | INFO | train_inner | epoch 002: 2149 / 3002 loss=2.744, ppl=6.7, wps=5947.5, ups=0.09, wpb=64932, bsz=128, num_updates=5116, lr=9.99671e-05, gnorm=2.302, loss_scale=2, train_wall=10, gb_free=2.8, wall=58111
2021-06-19 10:47:28 | INFO | train_inner | epoch 002: 2150 / 3002 loss=2.572, ppl=5.95, wps=5850.6, ups=0.09, wpb=64789, bsz=128, num_updates=5117, lr=9.99671e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=58122
2021-06-19 10:47:39 | INFO | train_inner | epoch 002: 2151 / 3002 loss=2.596, ppl=6.05, wps=5940.7, ups=0.09, wpb=64859, bsz=128, num_updates=5118, lr=9.99671e-05, gnorm=2.331, loss_scale=2, train_wall=10, gb_free=2.8, wall=58133
2021-06-19 10:47:50 | INFO | train_inner | epoch 002: 2152 / 3002 loss=2.81, ppl=7.01, wps=5905.9, ups=0.09, wpb=64822, bsz=128, num_updates=5119, lr=9.9967e-05, gnorm=2.34, loss_scale=2, train_wall=11, gb_free=2.8, wall=58144
2021-06-19 10:48:01 | INFO | train_inner | epoch 002: 2153 / 3002 loss=2.764, ppl=6.79, wps=5740.4, ups=0.09, wpb=64819, bsz=128, num_updates=5120, lr=9.9967e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=58155
2021-06-19 10:48:12 | INFO | train_inner | epoch 002: 2154 / 3002 loss=2.708, ppl=6.53, wps=5853.4, ups=0.09, wpb=64896, bsz=128, num_updates=5121, lr=9.9967e-05, gnorm=2.402, loss_scale=2, train_wall=11, gb_free=2.8, wall=58166
2021-06-19 10:48:23 | INFO | train_inner | epoch 002: 2155 / 3002 loss=2.689, ppl=6.45, wps=5738.8, ups=0.09, wpb=64808, bsz=128, num_updates=5122, lr=9.9967e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=58178
2021-06-19 10:48:35 | INFO | train_inner | epoch 002: 2156 / 3002 loss=2.836, ppl=7.14, wps=5806.2, ups=0.09, wpb=64828, bsz=128, num_updates=5123, lr=9.9967e-05, gnorm=2.234, loss_scale=2, train_wall=11, gb_free=2.8, wall=58189
2021-06-19 10:48:46 | INFO | train_inner | epoch 002: 2157 / 3002 loss=2.819, ppl=7.05, wps=5937.1, ups=0.09, wpb=64859, bsz=128, num_updates=5124, lr=9.9967e-05, gnorm=2.207, loss_scale=2, train_wall=10, gb_free=2.8, wall=58200
2021-06-19 10:48:57 | INFO | train_inner | epoch 002: 2158 / 3002 loss=2.654, ppl=6.29, wps=5833.9, ups=0.09, wpb=65003, bsz=128, num_updates=5125, lr=9.9967e-05, gnorm=2.228, loss_scale=2, train_wall=11, gb_free=2.8, wall=58211
2021-06-19 10:49:08 | INFO | train_inner | epoch 002: 2159 / 3002 loss=2.702, ppl=6.51, wps=5836.3, ups=0.09, wpb=64922, bsz=128, num_updates=5126, lr=9.9967e-05, gnorm=2.241, loss_scale=2, train_wall=11, gb_free=2.8, wall=58222
2021-06-19 10:49:19 | INFO | train_inner | epoch 002: 2160 / 3002 loss=2.705, ppl=6.52, wps=5876.2, ups=0.09, wpb=64884, bsz=128, num_updates=5127, lr=9.9967e-05, gnorm=2.287, loss_scale=2, train_wall=11, gb_free=2.8, wall=58233
2021-06-19 10:49:30 | INFO | train_inner | epoch 002: 2161 / 3002 loss=2.799, ppl=6.96, wps=5817.8, ups=0.09, wpb=64742, bsz=128, num_updates=5128, lr=9.9967e-05, gnorm=2.277, loss_scale=2, train_wall=11, gb_free=2.8, wall=58244
2021-06-19 10:49:41 | INFO | train_inner | epoch 002: 2162 / 3002 loss=2.707, ppl=6.53, wps=5843.9, ups=0.09, wpb=64793, bsz=128, num_updates=5129, lr=9.9967e-05, gnorm=2.201, loss_scale=2, train_wall=11, gb_free=2.8, wall=58255
2021-06-19 10:49:52 | INFO | train_inner | epoch 002: 2163 / 3002 loss=2.903, ppl=7.48, wps=5920.8, ups=0.09, wpb=64814, bsz=128, num_updates=5130, lr=9.9967e-05, gnorm=2.254, loss_scale=2, train_wall=10, gb_free=2.8, wall=58266
2021-06-19 10:50:03 | INFO | train_inner | epoch 002: 2164 / 3002 loss=2.752, ppl=6.74, wps=5995, ups=0.09, wpb=64805, bsz=128, num_updates=5131, lr=9.99669e-05, gnorm=2.605, loss_scale=2, train_wall=10, gb_free=2.8, wall=58277
2021-06-19 10:50:14 | INFO | train_inner | epoch 002: 2165 / 3002 loss=2.738, ppl=6.67, wps=5851.8, ups=0.09, wpb=64890, bsz=128, num_updates=5132, lr=9.99669e-05, gnorm=2.352, loss_scale=2, train_wall=11, gb_free=2.8, wall=58288
2021-06-19 10:50:25 | INFO | train_inner | epoch 002: 2166 / 3002 loss=2.752, ppl=6.73, wps=5857, ups=0.09, wpb=64814, bsz=128, num_updates=5133, lr=9.99669e-05, gnorm=2.561, loss_scale=2, train_wall=11, gb_free=2.8, wall=58299
2021-06-19 10:50:36 | INFO | train_inner | epoch 002: 2167 / 3002 loss=2.781, ppl=6.88, wps=5862.2, ups=0.09, wpb=64858, bsz=128, num_updates=5134, lr=9.99669e-05, gnorm=2.25, loss_scale=2, train_wall=11, gb_free=2.8, wall=58310
2021-06-19 10:50:47 | INFO | train_inner | epoch 002: 2168 / 3002 loss=2.556, ppl=5.88, wps=5800, ups=0.09, wpb=64872, bsz=128, num_updates=5135, lr=9.99669e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=58322
2021-06-19 10:50:58 | INFO | train_inner | epoch 002: 2169 / 3002 loss=2.738, ppl=6.67, wps=5867, ups=0.09, wpb=64857, bsz=128, num_updates=5136, lr=9.99669e-05, gnorm=2.551, loss_scale=2, train_wall=11, gb_free=2.8, wall=58333
2021-06-19 10:51:09 | INFO | train_inner | epoch 002: 2170 / 3002 loss=2.812, ppl=7.02, wps=5867, ups=0.09, wpb=64719, bsz=128, num_updates=5137, lr=9.99669e-05, gnorm=2.341, loss_scale=2, train_wall=11, gb_free=2.8, wall=58344
2021-06-19 10:51:20 | INFO | train_inner | epoch 002: 2171 / 3002 loss=2.705, ppl=6.52, wps=5796.7, ups=0.09, wpb=64859, bsz=128, num_updates=5138, lr=9.99669e-05, gnorm=2.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=58355
2021-06-19 10:51:32 | INFO | train_inner | epoch 002: 2172 / 3002 loss=2.602, ppl=6.07, wps=5837.8, ups=0.09, wpb=64915, bsz=128, num_updates=5139, lr=9.99669e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=58366
2021-06-19 10:51:43 | INFO | train_inner | epoch 002: 2173 / 3002 loss=2.741, ppl=6.69, wps=5802.3, ups=0.09, wpb=64854, bsz=128, num_updates=5140, lr=9.99669e-05, gnorm=5.739, loss_scale=2, train_wall=11, gb_free=2.8, wall=58377
2021-06-19 10:51:54 | INFO | train_inner | epoch 002: 2174 / 3002 loss=2.679, ppl=6.4, wps=5844, ups=0.09, wpb=64869, bsz=128, num_updates=5141, lr=9.99669e-05, gnorm=2.26, loss_scale=2, train_wall=11, gb_free=2.8, wall=58388
2021-06-19 10:52:05 | INFO | train_inner | epoch 002: 2175 / 3002 loss=2.796, ppl=6.95, wps=5924.6, ups=0.09, wpb=64900, bsz=128, num_updates=5142, lr=9.99669e-05, gnorm=2.228, loss_scale=2, train_wall=10, gb_free=2.8, wall=58399
2021-06-19 10:52:16 | INFO | train_inner | epoch 002: 2176 / 3002 loss=2.835, ppl=7.13, wps=5907.8, ups=0.09, wpb=64902, bsz=128, num_updates=5143, lr=9.99669e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=58410
2021-06-19 10:52:27 | INFO | train_inner | epoch 002: 2177 / 3002 loss=2.69, ppl=6.45, wps=5968.7, ups=0.09, wpb=64865, bsz=128, num_updates=5144, lr=9.99668e-05, gnorm=3.029, loss_scale=2, train_wall=10, gb_free=2.8, wall=58421
2021-06-19 10:52:38 | INFO | train_inner | epoch 002: 2178 / 3002 loss=2.605, ppl=6.08, wps=5761.2, ups=0.09, wpb=64761, bsz=128, num_updates=5145, lr=9.99668e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=58432
2021-06-19 10:52:49 | INFO | train_inner | epoch 002: 2179 / 3002 loss=2.794, ppl=6.94, wps=5785.1, ups=0.09, wpb=64824, bsz=128, num_updates=5146, lr=9.99668e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=58443
2021-06-19 10:53:00 | INFO | train_inner | epoch 002: 2180 / 3002 loss=2.782, ppl=6.88, wps=5880.7, ups=0.09, wpb=64897, bsz=128, num_updates=5147, lr=9.99668e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=58454
2021-06-19 10:53:11 | INFO | train_inner | epoch 002: 2181 / 3002 loss=2.627, ppl=6.18, wps=5852.8, ups=0.09, wpb=64822, bsz=128, num_updates=5148, lr=9.99668e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=58466
2021-06-19 10:53:22 | INFO | train_inner | epoch 002: 2182 / 3002 loss=2.722, ppl=6.6, wps=5887.9, ups=0.09, wpb=64776, bsz=128, num_updates=5149, lr=9.99668e-05, gnorm=2.201, loss_scale=2, train_wall=11, gb_free=2.8, wall=58477
2021-06-19 10:53:33 | INFO | train_inner | epoch 002: 2183 / 3002 loss=2.805, ppl=6.99, wps=5801.7, ups=0.09, wpb=64830, bsz=128, num_updates=5150, lr=9.99668e-05, gnorm=2.367, loss_scale=2, train_wall=11, gb_free=2.8, wall=58488
2021-06-19 10:53:45 | INFO | train_inner | epoch 002: 2184 / 3002 loss=2.61, ppl=6.11, wps=5783, ups=0.09, wpb=64808, bsz=128, num_updates=5151, lr=9.99668e-05, gnorm=2.521, loss_scale=2, train_wall=11, gb_free=2.8, wall=58499
2021-06-19 10:53:56 | INFO | train_inner | epoch 002: 2185 / 3002 loss=2.716, ppl=6.57, wps=5807, ups=0.09, wpb=64852, bsz=128, num_updates=5152, lr=9.99668e-05, gnorm=2.265, loss_scale=2, train_wall=11, gb_free=2.8, wall=58510
2021-06-19 10:54:07 | INFO | train_inner | epoch 002: 2186 / 3002 loss=2.785, ppl=6.89, wps=5837.2, ups=0.09, wpb=64802, bsz=128, num_updates=5153, lr=9.99668e-05, gnorm=3.566, loss_scale=2, train_wall=11, gb_free=2.8, wall=58521
2021-06-19 10:54:18 | INFO | train_inner | epoch 002: 2187 / 3002 loss=2.748, ppl=6.72, wps=5747.1, ups=0.09, wpb=64816, bsz=128, num_updates=5154, lr=9.99668e-05, gnorm=2.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=58533
2021-06-19 10:54:29 | INFO | train_inner | epoch 002: 2188 / 3002 loss=2.745, ppl=6.7, wps=5855.3, ups=0.09, wpb=64815, bsz=128, num_updates=5155, lr=9.99668e-05, gnorm=2.253, loss_scale=2, train_wall=11, gb_free=2.8, wall=58544
2021-06-19 10:54:40 | INFO | train_inner | epoch 002: 2189 / 3002 loss=2.795, ppl=6.94, wps=5894.7, ups=0.09, wpb=64832, bsz=128, num_updates=5156, lr=9.99667e-05, gnorm=2.328, loss_scale=2, train_wall=11, gb_free=2.8, wall=58555
2021-06-19 10:54:51 | INFO | train_inner | epoch 002: 2190 / 3002 loss=2.876, ppl=7.34, wps=5786.1, ups=0.09, wpb=64912, bsz=128, num_updates=5157, lr=9.99667e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=58566
2021-06-19 10:55:02 | INFO | train_inner | epoch 002: 2191 / 3002 loss=2.657, ppl=6.31, wps=5954.7, ups=0.09, wpb=64846, bsz=128, num_updates=5158, lr=9.99667e-05, gnorm=2.165, loss_scale=2, train_wall=10, gb_free=2.8, wall=58577
2021-06-19 10:55:13 | INFO | train_inner | epoch 002: 2192 / 3002 loss=2.788, ppl=6.91, wps=5889.6, ups=0.09, wpb=64795, bsz=128, num_updates=5159, lr=9.99667e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=58588
2021-06-19 10:55:24 | INFO | train_inner | epoch 002: 2193 / 3002 loss=2.565, ppl=5.92, wps=5858.4, ups=0.09, wpb=64787, bsz=128, num_updates=5160, lr=9.99667e-05, gnorm=2.349, loss_scale=2, train_wall=11, gb_free=2.8, wall=58599
2021-06-19 10:55:35 | INFO | train_inner | epoch 002: 2194 / 3002 loss=2.753, ppl=6.74, wps=5862.6, ups=0.09, wpb=64838, bsz=128, num_updates=5161, lr=9.99667e-05, gnorm=7.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=58610
2021-06-19 10:55:47 | INFO | train_inner | epoch 002: 2195 / 3002 loss=2.749, ppl=6.72, wps=5871.9, ups=0.09, wpb=64796, bsz=128, num_updates=5162, lr=9.99667e-05, gnorm=2.287, loss_scale=2, train_wall=11, gb_free=2.8, wall=58621
2021-06-19 10:55:58 | INFO | train_inner | epoch 002: 2196 / 3002 loss=2.737, ppl=6.67, wps=5834.5, ups=0.09, wpb=64861, bsz=128, num_updates=5163, lr=9.99667e-05, gnorm=2.573, loss_scale=2, train_wall=11, gb_free=2.8, wall=58632
2021-06-19 10:56:09 | INFO | train_inner | epoch 002: 2197 / 3002 loss=2.68, ppl=6.41, wps=5787.2, ups=0.09, wpb=64920, bsz=128, num_updates=5164, lr=9.99667e-05, gnorm=2.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=58643
2021-06-19 10:56:20 | INFO | train_inner | epoch 002: 2198 / 3002 loss=2.804, ppl=6.98, wps=5906.8, ups=0.09, wpb=64791, bsz=128, num_updates=5165, lr=9.99667e-05, gnorm=2.404, loss_scale=2, train_wall=11, gb_free=2.8, wall=58654
2021-06-19 10:56:31 | INFO | train_inner | epoch 002: 2199 / 3002 loss=2.769, ppl=6.82, wps=5899.7, ups=0.09, wpb=64732, bsz=128, num_updates=5166, lr=9.99667e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=58665
2021-06-19 10:56:42 | INFO | train_inner | epoch 002: 2200 / 3002 loss=2.812, ppl=7.02, wps=5772.8, ups=0.09, wpb=64793, bsz=128, num_updates=5167, lr=9.99667e-05, gnorm=3.729, loss_scale=2, train_wall=11, gb_free=2.8, wall=58676
2021-06-19 10:56:53 | INFO | train_inner | epoch 002: 2201 / 3002 loss=2.692, ppl=6.46, wps=5921.8, ups=0.09, wpb=64839, bsz=128, num_updates=5168, lr=9.99667e-05, gnorm=2.291, loss_scale=2, train_wall=10, gb_free=2.8, wall=58687
2021-06-19 10:57:04 | INFO | train_inner | epoch 002: 2202 / 3002 loss=2.8, ppl=6.96, wps=5907.3, ups=0.09, wpb=64789, bsz=128, num_updates=5169, lr=9.99666e-05, gnorm=2.519, loss_scale=2, train_wall=10, gb_free=2.8, wall=58698
2021-06-19 10:57:15 | INFO | train_inner | epoch 002: 2203 / 3002 loss=2.798, ppl=6.95, wps=5794.7, ups=0.09, wpb=64809, bsz=128, num_updates=5170, lr=9.99666e-05, gnorm=2.417, loss_scale=2, train_wall=11, gb_free=2.8, wall=58709
2021-06-19 10:57:26 | INFO | train_inner | epoch 002: 2204 / 3002 loss=2.785, ppl=6.89, wps=5857.9, ups=0.09, wpb=64787, bsz=128, num_updates=5171, lr=9.99666e-05, gnorm=2.479, loss_scale=2, train_wall=11, gb_free=2.8, wall=58721
2021-06-19 10:57:37 | INFO | train_inner | epoch 002: 2205 / 3002 loss=2.661, ppl=6.33, wps=5878.5, ups=0.09, wpb=64778, bsz=128, num_updates=5172, lr=9.99666e-05, gnorm=2.479, loss_scale=2, train_wall=11, gb_free=2.8, wall=58732
2021-06-19 10:57:48 | INFO | train_inner | epoch 002: 2206 / 3002 loss=2.774, ppl=6.84, wps=5904.5, ups=0.09, wpb=64888, bsz=128, num_updates=5173, lr=9.99666e-05, gnorm=2.277, loss_scale=2, train_wall=11, gb_free=2.8, wall=58743
2021-06-19 10:57:59 | INFO | train_inner | epoch 002: 2207 / 3002 loss=2.779, ppl=6.86, wps=5815.8, ups=0.09, wpb=64777, bsz=128, num_updates=5174, lr=9.99666e-05, gnorm=2.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=58754
2021-06-19 10:58:11 | INFO | train_inner | epoch 002: 2208 / 3002 loss=2.768, ppl=6.81, wps=5756.9, ups=0.09, wpb=64816, bsz=128, num_updates=5175, lr=9.99666e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=58765
2021-06-19 10:58:22 | INFO | train_inner | epoch 002: 2209 / 3002 loss=2.661, ppl=6.32, wps=5878.2, ups=0.09, wpb=64790, bsz=128, num_updates=5176, lr=9.99666e-05, gnorm=2.371, loss_scale=2, train_wall=11, gb_free=2.8, wall=58776
2021-06-19 10:58:33 | INFO | train_inner | epoch 002: 2210 / 3002 loss=2.882, ppl=7.37, wps=5776.5, ups=0.09, wpb=64711, bsz=128, num_updates=5177, lr=9.99666e-05, gnorm=2.374, loss_scale=2, train_wall=11, gb_free=2.8, wall=58787
2021-06-19 10:58:44 | INFO | train_inner | epoch 002: 2211 / 3002 loss=2.8, ppl=6.97, wps=5898, ups=0.09, wpb=64857, bsz=128, num_updates=5178, lr=9.99666e-05, gnorm=13.997, loss_scale=2, train_wall=11, gb_free=2.8, wall=58798
2021-06-19 10:58:55 | INFO | train_inner | epoch 002: 2212 / 3002 loss=2.793, ppl=6.93, wps=5811, ups=0.09, wpb=64770, bsz=128, num_updates=5179, lr=9.99666e-05, gnorm=4.25, loss_scale=2, train_wall=11, gb_free=2.8, wall=58809
2021-06-19 10:59:06 | INFO | train_inner | epoch 002: 2213 / 3002 loss=2.817, ppl=7.05, wps=5859.9, ups=0.09, wpb=64874, bsz=128, num_updates=5180, lr=9.99666e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=58820
2021-06-19 10:59:17 | INFO | train_inner | epoch 002: 2214 / 3002 loss=2.789, ppl=6.91, wps=5884.7, ups=0.09, wpb=64864, bsz=128, num_updates=5181, lr=9.99665e-05, gnorm=2.322, loss_scale=2, train_wall=11, gb_free=2.8, wall=58831
2021-06-19 10:59:28 | INFO | train_inner | epoch 002: 2215 / 3002 loss=2.811, ppl=7.02, wps=5893.5, ups=0.09, wpb=64831, bsz=128, num_updates=5182, lr=9.99665e-05, gnorm=29.735, loss_scale=2, train_wall=11, gb_free=2.8, wall=58842
2021-06-19 10:59:39 | INFO | train_inner | epoch 002: 2216 / 3002 loss=2.724, ppl=6.61, wps=5746.8, ups=0.09, wpb=64750, bsz=128, num_updates=5183, lr=9.99665e-05, gnorm=2.397, loss_scale=2, train_wall=11, gb_free=2.8, wall=58854
2021-06-19 10:59:50 | INFO | train_inner | epoch 002: 2217 / 3002 loss=2.879, ppl=7.36, wps=5821.2, ups=0.09, wpb=64823, bsz=128, num_updates=5184, lr=9.99665e-05, gnorm=2.509, loss_scale=2, train_wall=11, gb_free=2.8, wall=58865
2021-06-19 11:00:02 | INFO | train_inner | epoch 002: 2218 / 3002 loss=2.728, ppl=6.63, wps=5775.7, ups=0.09, wpb=64763, bsz=128, num_updates=5185, lr=9.99665e-05, gnorm=2.389, loss_scale=2, train_wall=11, gb_free=2.8, wall=58876
2021-06-19 11:00:13 | INFO | train_inner | epoch 002: 2219 / 3002 loss=2.687, ppl=6.44, wps=5823.1, ups=0.09, wpb=64737, bsz=128, num_updates=5186, lr=9.99665e-05, gnorm=2.394, loss_scale=2, train_wall=11, gb_free=2.8, wall=58887
2021-06-19 11:00:24 | INFO | train_inner | epoch 002: 2220 / 3002 loss=2.552, ppl=5.86, wps=5853.4, ups=0.09, wpb=64803, bsz=128, num_updates=5187, lr=9.99665e-05, gnorm=2.317, loss_scale=2, train_wall=11, gb_free=2.8, wall=58898
2021-06-19 11:00:35 | INFO | train_inner | epoch 002: 2221 / 3002 loss=2.582, ppl=5.99, wps=5930.9, ups=0.09, wpb=64824, bsz=128, num_updates=5188, lr=9.99665e-05, gnorm=2.411, loss_scale=2, train_wall=10, gb_free=2.8, wall=58909
2021-06-19 11:00:46 | INFO | train_inner | epoch 002: 2222 / 3002 loss=2.897, ppl=7.45, wps=5750.2, ups=0.09, wpb=64788, bsz=128, num_updates=5189, lr=9.99665e-05, gnorm=5.022, loss_scale=2, train_wall=11, gb_free=2.8, wall=58920
2021-06-19 11:00:57 | INFO | train_inner | epoch 002: 2223 / 3002 loss=2.839, ppl=7.15, wps=5871.7, ups=0.09, wpb=64781, bsz=128, num_updates=5190, lr=9.99665e-05, gnorm=2.4, loss_scale=2, train_wall=11, gb_free=2.8, wall=58931
2021-06-19 11:01:08 | INFO | train_inner | epoch 002: 2224 / 3002 loss=2.883, ppl=7.38, wps=5890.7, ups=0.09, wpb=64855, bsz=128, num_updates=5191, lr=9.99665e-05, gnorm=2.439, loss_scale=2, train_wall=11, gb_free=2.8, wall=58942
2021-06-19 11:01:19 | INFO | train_inner | epoch 002: 2225 / 3002 loss=2.807, ppl=7, wps=5875.9, ups=0.09, wpb=64789, bsz=128, num_updates=5192, lr=9.99665e-05, gnorm=2.482, loss_scale=2, train_wall=11, gb_free=2.8, wall=58953
2021-06-19 11:01:30 | INFO | train_inner | epoch 002: 2226 / 3002 loss=2.665, ppl=6.34, wps=5848, ups=0.09, wpb=64732, bsz=128, num_updates=5193, lr=9.99665e-05, gnorm=2.305, loss_scale=2, train_wall=11, gb_free=2.8, wall=58965
2021-06-19 11:01:41 | INFO | train_inner | epoch 002: 2227 / 3002 loss=2.876, ppl=7.34, wps=5846.3, ups=0.09, wpb=64902, bsz=128, num_updates=5194, lr=9.99664e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=58976
2021-06-19 11:01:52 | INFO | train_inner | epoch 002: 2228 / 3002 loss=2.719, ppl=6.59, wps=5831.1, ups=0.09, wpb=64780, bsz=128, num_updates=5195, lr=9.99664e-05, gnorm=2.982, loss_scale=2, train_wall=11, gb_free=2.8, wall=58987
2021-06-19 11:02:03 | INFO | train_inner | epoch 002: 2229 / 3002 loss=2.882, ppl=7.37, wps=5894.2, ups=0.09, wpb=64879, bsz=128, num_updates=5196, lr=9.99664e-05, gnorm=2.321, loss_scale=2, train_wall=11, gb_free=2.8, wall=58998
2021-06-19 11:02:14 | INFO | train_inner | epoch 002: 2230 / 3002 loss=2.906, ppl=7.49, wps=5930.5, ups=0.09, wpb=64693, bsz=128, num_updates=5197, lr=9.99664e-05, gnorm=2.305, loss_scale=2, train_wall=10, gb_free=2.8, wall=59009
2021-06-19 11:02:25 | INFO | train_inner | epoch 002: 2231 / 3002 loss=2.693, ppl=6.47, wps=5843.4, ups=0.09, wpb=64800, bsz=128, num_updates=5198, lr=9.99664e-05, gnorm=2.509, loss_scale=2, train_wall=11, gb_free=2.8, wall=59020
2021-06-19 11:02:36 | INFO | train_inner | epoch 002: 2232 / 3002 loss=2.786, ppl=6.9, wps=5939.1, ups=0.09, wpb=64860, bsz=128, num_updates=5199, lr=9.99664e-05, gnorm=2.387, loss_scale=2, train_wall=10, gb_free=2.8, wall=59031
2021-06-19 11:02:47 | INFO | train_inner | epoch 002: 2233 / 3002 loss=2.704, ppl=6.52, wps=5795.1, ups=0.09, wpb=64773, bsz=128, num_updates=5200, lr=9.99664e-05, gnorm=6.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=59042
2021-06-19 11:02:59 | INFO | train_inner | epoch 002: 2234 / 3002 loss=2.784, ppl=6.89, wps=5869.4, ups=0.09, wpb=64782, bsz=128, num_updates=5201, lr=9.99664e-05, gnorm=2.265, loss_scale=2, train_wall=11, gb_free=2.8, wall=59053
2021-06-19 11:03:09 | INFO | train_inner | epoch 002: 2235 / 3002 loss=2.602, ppl=6.07, wps=5935.8, ups=0.09, wpb=64813, bsz=128, num_updates=5202, lr=9.99664e-05, gnorm=2.327, loss_scale=2, train_wall=10, gb_free=2.8, wall=59064
2021-06-19 11:03:21 | INFO | train_inner | epoch 002: 2236 / 3002 loss=2.761, ppl=6.78, wps=5814.6, ups=0.09, wpb=64827, bsz=128, num_updates=5203, lr=9.99664e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=59075
2021-06-19 11:03:32 | INFO | train_inner | epoch 002: 2237 / 3002 loss=2.824, ppl=7.08, wps=5890.2, ups=0.09, wpb=64851, bsz=128, num_updates=5204, lr=9.99664e-05, gnorm=2.314, loss_scale=2, train_wall=11, gb_free=2.8, wall=59086
2021-06-19 11:03:43 | INFO | train_inner | epoch 002: 2238 / 3002 loss=2.725, ppl=6.61, wps=5858.4, ups=0.09, wpb=64939, bsz=128, num_updates=5205, lr=9.99664e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=59097
2021-06-19 11:03:54 | INFO | train_inner | epoch 002: 2239 / 3002 loss=2.823, ppl=7.08, wps=5837.8, ups=0.09, wpb=64869, bsz=128, num_updates=5206, lr=9.99663e-05, gnorm=2.328, loss_scale=2, train_wall=11, gb_free=2.8, wall=59108
2021-06-19 11:04:05 | INFO | train_inner | epoch 002: 2240 / 3002 loss=2.606, ppl=6.09, wps=5759.4, ups=0.09, wpb=64785, bsz=128, num_updates=5207, lr=9.99663e-05, gnorm=2.263, loss_scale=2, train_wall=11, gb_free=2.8, wall=59119
2021-06-19 11:04:16 | INFO | train_inner | epoch 002: 2241 / 3002 loss=2.64, ppl=6.23, wps=5742.7, ups=0.09, wpb=64837, bsz=128, num_updates=5208, lr=9.99663e-05, gnorm=2.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=59131
2021-06-19 11:04:28 | INFO | train_inner | epoch 002: 2242 / 3002 loss=2.833, ppl=7.13, wps=5797, ups=0.09, wpb=64754, bsz=128, num_updates=5209, lr=9.99663e-05, gnorm=2.433, loss_scale=2, train_wall=11, gb_free=2.8, wall=59142
2021-06-19 11:04:39 | INFO | train_inner | epoch 002: 2243 / 3002 loss=2.805, ppl=6.99, wps=5877.6, ups=0.09, wpb=64752, bsz=128, num_updates=5210, lr=9.99663e-05, gnorm=2.298, loss_scale=2, train_wall=11, gb_free=2.8, wall=59153
2021-06-19 11:04:49 | INFO | train_inner | epoch 002: 2244 / 3002 loss=2.883, ppl=7.37, wps=5931.5, ups=0.09, wpb=64852, bsz=128, num_updates=5211, lr=9.99663e-05, gnorm=2.304, loss_scale=2, train_wall=10, gb_free=2.8, wall=59164
2021-06-19 11:05:01 | INFO | train_inner | epoch 002: 2245 / 3002 loss=2.711, ppl=6.55, wps=5797, ups=0.09, wpb=64790, bsz=128, num_updates=5212, lr=9.99663e-05, gnorm=2.317, loss_scale=2, train_wall=11, gb_free=2.8, wall=59175
2021-06-19 11:05:12 | INFO | train_inner | epoch 002: 2246 / 3002 loss=2.852, ppl=7.22, wps=5803.7, ups=0.09, wpb=64770, bsz=128, num_updates=5213, lr=9.99663e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=59186
2021-06-19 11:05:23 | INFO | train_inner | epoch 002: 2247 / 3002 loss=2.633, ppl=6.2, wps=5717.4, ups=0.09, wpb=64784, bsz=128, num_updates=5214, lr=9.99663e-05, gnorm=14.736, loss_scale=2, train_wall=11, gb_free=2.8, wall=59197
2021-06-19 11:05:34 | INFO | train_inner | epoch 002: 2248 / 3002 loss=2.581, ppl=5.98, wps=5782.6, ups=0.09, wpb=64877, bsz=128, num_updates=5215, lr=9.99663e-05, gnorm=2.326, loss_scale=2, train_wall=11, gb_free=2.8, wall=59209
2021-06-19 11:05:46 | INFO | train_inner | epoch 002: 2249 / 3002 loss=2.758, ppl=6.76, wps=5774.9, ups=0.09, wpb=64809, bsz=128, num_updates=5216, lr=9.99663e-05, gnorm=2.262, loss_scale=2, train_wall=11, gb_free=2.8, wall=59220
2021-06-19 11:05:57 | INFO | train_inner | epoch 002: 2250 / 3002 loss=2.686, ppl=6.43, wps=5832.1, ups=0.09, wpb=64864, bsz=128, num_updates=5217, lr=9.99663e-05, gnorm=3.166, loss_scale=2, train_wall=11, gb_free=2.8, wall=59231
2021-06-19 11:06:08 | INFO | train_inner | epoch 002: 2251 / 3002 loss=2.812, ppl=7.02, wps=5769.2, ups=0.09, wpb=64798, bsz=128, num_updates=5218, lr=9.99663e-05, gnorm=2.312, loss_scale=2, train_wall=11, gb_free=2.8, wall=59242
2021-06-19 11:06:19 | INFO | train_inner | epoch 002: 2252 / 3002 loss=2.714, ppl=6.56, wps=5903.3, ups=0.09, wpb=64906, bsz=128, num_updates=5219, lr=9.99662e-05, gnorm=2.454, loss_scale=2, train_wall=11, gb_free=2.8, wall=59253
2021-06-19 11:06:30 | INFO | train_inner | epoch 002: 2253 / 3002 loss=2.811, ppl=7.02, wps=5720.7, ups=0.09, wpb=64864, bsz=128, num_updates=5220, lr=9.99662e-05, gnorm=2.349, loss_scale=2, train_wall=11, gb_free=2.8, wall=59265
2021-06-19 11:06:41 | INFO | train_inner | epoch 002: 2254 / 3002 loss=2.682, ppl=6.42, wps=5777, ups=0.09, wpb=64816, bsz=128, num_updates=5221, lr=9.99662e-05, gnorm=2.289, loss_scale=2, train_wall=11, gb_free=2.8, wall=59276
2021-06-19 11:06:53 | INFO | train_inner | epoch 002: 2255 / 3002 loss=2.781, ppl=6.87, wps=5866.2, ups=0.09, wpb=64803, bsz=128, num_updates=5222, lr=9.99662e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=59287
2021-06-19 11:07:03 | INFO | train_inner | epoch 002: 2256 / 3002 loss=2.839, ppl=7.15, wps=5923.9, ups=0.09, wpb=64837, bsz=128, num_updates=5223, lr=9.99662e-05, gnorm=2.257, loss_scale=2, train_wall=10, gb_free=2.8, wall=59298
2021-06-19 11:07:15 | INFO | train_inner | epoch 002: 2257 / 3002 loss=2.931, ppl=7.63, wps=5870.5, ups=0.09, wpb=64781, bsz=128, num_updates=5224, lr=9.99662e-05, gnorm=2.426, loss_scale=2, train_wall=11, gb_free=2.8, wall=59309
2021-06-19 11:07:25 | INFO | train_inner | epoch 002: 2258 / 3002 loss=2.913, ppl=7.53, wps=5913.1, ups=0.09, wpb=64812, bsz=128, num_updates=5225, lr=9.99662e-05, gnorm=2.399, loss_scale=2, train_wall=10, gb_free=2.8, wall=59320
2021-06-19 11:07:36 | INFO | train_inner | epoch 002: 2259 / 3002 loss=2.782, ppl=6.88, wps=5988.4, ups=0.09, wpb=64769, bsz=128, num_updates=5226, lr=9.99662e-05, gnorm=2.195, loss_scale=2, train_wall=10, gb_free=2.8, wall=59331
2021-06-19 11:07:47 | INFO | train_inner | epoch 002: 2260 / 3002 loss=2.986, ppl=7.92, wps=5906.5, ups=0.09, wpb=64832, bsz=128, num_updates=5227, lr=9.99662e-05, gnorm=2.207, loss_scale=2, train_wall=11, gb_free=2.8, wall=59342
2021-06-19 11:07:58 | INFO | train_inner | epoch 002: 2261 / 3002 loss=2.763, ppl=6.79, wps=5815.2, ups=0.09, wpb=64878, bsz=128, num_updates=5228, lr=9.99662e-05, gnorm=2.219, loss_scale=2, train_wall=11, gb_free=2.8, wall=59353
2021-06-19 11:08:09 | INFO | train_inner | epoch 002: 2262 / 3002 loss=2.706, ppl=6.53, wps=5898.3, ups=0.09, wpb=64827, bsz=128, num_updates=5229, lr=9.99662e-05, gnorm=2.338, loss_scale=2, train_wall=11, gb_free=2.8, wall=59364
2021-06-19 11:08:20 | INFO | train_inner | epoch 002: 2263 / 3002 loss=2.802, ppl=6.97, wps=5990.7, ups=0.09, wpb=64820, bsz=128, num_updates=5230, lr=9.99662e-05, gnorm=2.258, loss_scale=2, train_wall=10, gb_free=2.8, wall=59375
2021-06-19 11:08:31 | INFO | train_inner | epoch 002: 2264 / 3002 loss=2.731, ppl=6.64, wps=5903.7, ups=0.09, wpb=64912, bsz=128, num_updates=5231, lr=9.99661e-05, gnorm=2.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=59386
2021-06-19 11:08:42 | INFO | train_inner | epoch 002: 2265 / 3002 loss=2.654, ppl=6.29, wps=5882.2, ups=0.09, wpb=64820, bsz=128, num_updates=5232, lr=9.99661e-05, gnorm=2.238, loss_scale=2, train_wall=11, gb_free=2.8, wall=59397
2021-06-19 11:08:53 | INFO | train_inner | epoch 002: 2266 / 3002 loss=2.803, ppl=6.98, wps=6079.6, ups=0.09, wpb=64887, bsz=128, num_updates=5233, lr=9.99661e-05, gnorm=2.242, loss_scale=2, train_wall=10, gb_free=2.8, wall=59407
2021-06-19 11:09:04 | INFO | train_inner | epoch 002: 2267 / 3002 loss=2.766, ppl=6.8, wps=5844.4, ups=0.09, wpb=64913, bsz=128, num_updates=5234, lr=9.99661e-05, gnorm=2.294, loss_scale=2, train_wall=11, gb_free=2.8, wall=59418
2021-06-19 11:09:15 | INFO | train_inner | epoch 002: 2268 / 3002 loss=2.804, ppl=6.98, wps=5868.2, ups=0.09, wpb=64788, bsz=128, num_updates=5235, lr=9.99661e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=59429
2021-06-19 11:09:26 | INFO | train_inner | epoch 002: 2269 / 3002 loss=2.781, ppl=6.88, wps=5790.7, ups=0.09, wpb=64787, bsz=128, num_updates=5236, lr=9.99661e-05, gnorm=2.183, loss_scale=2, train_wall=11, gb_free=2.8, wall=59441
2021-06-19 11:09:38 | INFO | train_inner | epoch 002: 2270 / 3002 loss=2.673, ppl=6.38, wps=5718.3, ups=0.09, wpb=64798, bsz=128, num_updates=5237, lr=9.99661e-05, gnorm=2.255, loss_scale=4, train_wall=11, gb_free=2.8, wall=59452
2021-06-19 11:09:49 | INFO | train_inner | epoch 002: 2271 / 3002 loss=2.76, ppl=6.77, wps=5777.3, ups=0.09, wpb=64789, bsz=128, num_updates=5238, lr=9.99661e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=59463
2021-06-19 11:10:00 | INFO | train_inner | epoch 002: 2272 / 3002 loss=2.631, ppl=6.19, wps=5807.8, ups=0.09, wpb=64865, bsz=128, num_updates=5239, lr=9.99661e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=59474
2021-06-19 11:10:11 | INFO | train_inner | epoch 002: 2273 / 3002 loss=2.571, ppl=5.94, wps=5840.7, ups=0.09, wpb=64824, bsz=128, num_updates=5240, lr=9.99661e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=59485
2021-06-19 11:10:22 | INFO | train_inner | epoch 002: 2274 / 3002 loss=2.778, ppl=6.86, wps=5948.8, ups=0.09, wpb=64857, bsz=128, num_updates=5241, lr=9.99661e-05, gnorm=2.236, loss_scale=4, train_wall=10, gb_free=2.8, wall=59496
2021-06-19 11:10:33 | INFO | train_inner | epoch 002: 2275 / 3002 loss=2.8, ppl=6.96, wps=5783.6, ups=0.09, wpb=64848, bsz=128, num_updates=5242, lr=9.99661e-05, gnorm=2.21, loss_scale=4, train_wall=11, gb_free=2.8, wall=59508
2021-06-19 11:10:44 | INFO | train_inner | epoch 002: 2276 / 3002 loss=2.622, ppl=6.15, wps=5864.7, ups=0.09, wpb=64811, bsz=128, num_updates=5243, lr=9.99661e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=59519
2021-06-19 11:10:55 | INFO | train_inner | epoch 002: 2277 / 3002 loss=2.775, ppl=6.85, wps=5871, ups=0.09, wpb=64884, bsz=128, num_updates=5244, lr=9.9966e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=59530
2021-06-19 11:11:06 | INFO | train_inner | epoch 002: 2278 / 3002 loss=2.819, ppl=7.05, wps=5947.1, ups=0.09, wpb=64776, bsz=128, num_updates=5245, lr=9.9966e-05, gnorm=2.256, loss_scale=4, train_wall=10, gb_free=2.8, wall=59541
2021-06-19 11:11:17 | INFO | train_inner | epoch 002: 2279 / 3002 loss=2.649, ppl=6.27, wps=5935.8, ups=0.09, wpb=64815, bsz=128, num_updates=5246, lr=9.9966e-05, gnorm=2.213, loss_scale=4, train_wall=10, gb_free=2.8, wall=59551
2021-06-19 11:11:28 | INFO | train_inner | epoch 002: 2280 / 3002 loss=2.722, ppl=6.6, wps=5887.4, ups=0.09, wpb=64810, bsz=128, num_updates=5247, lr=9.9966e-05, gnorm=2.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=59562
2021-06-19 11:11:39 | INFO | train_inner | epoch 002: 2281 / 3002 loss=2.682, ppl=6.42, wps=5915, ups=0.09, wpb=64793, bsz=128, num_updates=5248, lr=9.9966e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=59573
2021-06-19 11:11:50 | INFO | train_inner | epoch 002: 2282 / 3002 loss=2.786, ppl=6.9, wps=5878.5, ups=0.09, wpb=64883, bsz=128, num_updates=5249, lr=9.9966e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=59584
2021-06-19 11:12:01 | INFO | train_inner | epoch 002: 2283 / 3002 loss=2.758, ppl=6.77, wps=5925.5, ups=0.09, wpb=64792, bsz=128, num_updates=5250, lr=9.9966e-05, gnorm=2.166, loss_scale=4, train_wall=10, gb_free=2.8, wall=59595
2021-06-19 11:12:12 | INFO | train_inner | epoch 002: 2284 / 3002 loss=2.774, ppl=6.84, wps=5863.8, ups=0.09, wpb=64848, bsz=128, num_updates=5251, lr=9.9966e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=59606
2021-06-19 11:12:23 | INFO | train_inner | epoch 002: 2285 / 3002 loss=2.673, ppl=6.38, wps=5779.8, ups=0.09, wpb=64815, bsz=128, num_updates=5252, lr=9.9966e-05, gnorm=10.724, loss_scale=4, train_wall=11, gb_free=2.8, wall=59618
2021-06-19 11:12:34 | INFO | train_inner | epoch 002: 2286 / 3002 loss=2.709, ppl=6.54, wps=5835.6, ups=0.09, wpb=64787, bsz=128, num_updates=5253, lr=9.9966e-05, gnorm=2.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=59629
2021-06-19 11:12:45 | INFO | train_inner | epoch 002: 2287 / 3002 loss=2.661, ppl=6.32, wps=5889.4, ups=0.09, wpb=64865, bsz=128, num_updates=5254, lr=9.9966e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=59640
2021-06-19 11:12:56 | INFO | train_inner | epoch 002: 2288 / 3002 loss=2.74, ppl=6.68, wps=5890.7, ups=0.09, wpb=64881, bsz=128, num_updates=5255, lr=9.9966e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=59651
2021-06-19 11:13:07 | INFO | train_inner | epoch 002: 2289 / 3002 loss=2.885, ppl=7.39, wps=5935.5, ups=0.09, wpb=64879, bsz=128, num_updates=5256, lr=9.99659e-05, gnorm=2.371, loss_scale=4, train_wall=10, gb_free=2.8, wall=59662
2021-06-19 11:13:18 | INFO | train_inner | epoch 002: 2290 / 3002 loss=2.614, ppl=6.12, wps=5857.3, ups=0.09, wpb=64692, bsz=128, num_updates=5257, lr=9.99659e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=59673
2021-06-19 11:13:30 | INFO | train_inner | epoch 002: 2291 / 3002 loss=2.759, ppl=6.77, wps=5839.1, ups=0.09, wpb=64784, bsz=128, num_updates=5258, lr=9.99659e-05, gnorm=2.757, loss_scale=4, train_wall=11, gb_free=2.8, wall=59684
2021-06-19 11:13:41 | INFO | train_inner | epoch 002: 2292 / 3002 loss=2.853, ppl=7.23, wps=5875.1, ups=0.09, wpb=64745, bsz=128, num_updates=5259, lr=9.99659e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=59695
2021-06-19 11:13:52 | INFO | train_inner | epoch 002: 2293 / 3002 loss=2.752, ppl=6.74, wps=5703.4, ups=0.09, wpb=64856, bsz=128, num_updates=5260, lr=9.99659e-05, gnorm=2.717, loss_scale=4, train_wall=11, gb_free=2.8, wall=59706
2021-06-19 11:14:03 | INFO | train_inner | epoch 002: 2294 / 3002 loss=2.767, ppl=6.81, wps=5730.7, ups=0.09, wpb=64842, bsz=128, num_updates=5261, lr=9.99659e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=59718
2021-06-19 11:14:14 | INFO | train_inner | epoch 002: 2295 / 3002 loss=2.814, ppl=7.03, wps=5799.3, ups=0.09, wpb=64844, bsz=128, num_updates=5262, lr=9.99659e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=59729
2021-06-19 11:14:25 | INFO | train_inner | epoch 002: 2296 / 3002 loss=2.717, ppl=6.58, wps=5881.9, ups=0.09, wpb=64769, bsz=128, num_updates=5263, lr=9.99659e-05, gnorm=2.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=59740
2021-06-19 11:14:37 | INFO | train_inner | epoch 002: 2297 / 3002 loss=2.912, ppl=7.53, wps=5806.8, ups=0.09, wpb=64756, bsz=128, num_updates=5264, lr=9.99659e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=59751
2021-06-19 11:14:48 | INFO | train_inner | epoch 002: 2298 / 3002 loss=2.813, ppl=7.03, wps=5894.7, ups=0.09, wpb=64793, bsz=128, num_updates=5265, lr=9.99659e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=59762
2021-06-19 11:14:59 | INFO | train_inner | epoch 002: 2299 / 3002 loss=2.942, ppl=7.68, wps=5904.2, ups=0.09, wpb=64896, bsz=128, num_updates=5266, lr=9.99659e-05, gnorm=2.448, loss_scale=4, train_wall=11, gb_free=2.8, wall=59773
2021-06-19 11:15:10 | INFO | train_inner | epoch 002: 2300 / 3002 loss=2.704, ppl=6.52, wps=5828, ups=0.09, wpb=64792, bsz=128, num_updates=5267, lr=9.99659e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=59784
2021-06-19 11:15:21 | INFO | train_inner | epoch 002: 2301 / 3002 loss=2.798, ppl=6.95, wps=5738.8, ups=0.09, wpb=64880, bsz=128, num_updates=5268, lr=9.99659e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=59795
2021-06-19 11:15:32 | INFO | train_inner | epoch 002: 2302 / 3002 loss=2.74, ppl=6.68, wps=5872, ups=0.09, wpb=64904, bsz=128, num_updates=5269, lr=9.99658e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=59806
2021-06-19 11:15:43 | INFO | train_inner | epoch 002: 2303 / 3002 loss=2.757, ppl=6.76, wps=5877.6, ups=0.09, wpb=64857, bsz=128, num_updates=5270, lr=9.99658e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=59817
2021-06-19 11:15:54 | INFO | train_inner | epoch 002: 2304 / 3002 loss=2.737, ppl=6.67, wps=5674.3, ups=0.09, wpb=64826, bsz=128, num_updates=5271, lr=9.99658e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=59829
2021-06-19 11:16:06 | INFO | train_inner | epoch 002: 2305 / 3002 loss=2.729, ppl=6.63, wps=5850.4, ups=0.09, wpb=64860, bsz=128, num_updates=5272, lr=9.99658e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=59840
2021-06-19 11:16:17 | INFO | train_inner | epoch 002: 2306 / 3002 loss=2.674, ppl=6.38, wps=5913.9, ups=0.09, wpb=64890, bsz=128, num_updates=5273, lr=9.99658e-05, gnorm=2.197, loss_scale=4, train_wall=10, gb_free=2.8, wall=59851
2021-06-19 11:16:28 | INFO | train_inner | epoch 002: 2307 / 3002 loss=2.829, ppl=7.11, wps=5838.5, ups=0.09, wpb=64873, bsz=128, num_updates=5274, lr=9.99658e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=59862
2021-06-19 11:16:39 | INFO | train_inner | epoch 002: 2308 / 3002 loss=2.609, ppl=6.1, wps=5840.6, ups=0.09, wpb=64789, bsz=128, num_updates=5275, lr=9.99658e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=59873
2021-06-19 11:16:50 | INFO | train_inner | epoch 002: 2309 / 3002 loss=2.735, ppl=6.66, wps=5910.4, ups=0.09, wpb=64851, bsz=128, num_updates=5276, lr=9.99658e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=59884
2021-06-19 11:17:01 | INFO | train_inner | epoch 002: 2310 / 3002 loss=2.862, ppl=7.27, wps=5786, ups=0.09, wpb=64792, bsz=128, num_updates=5277, lr=9.99658e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=59895
2021-06-19 11:17:12 | INFO | train_inner | epoch 002: 2311 / 3002 loss=2.801, ppl=6.97, wps=5816.7, ups=0.09, wpb=64788, bsz=128, num_updates=5278, lr=9.99658e-05, gnorm=2.313, loss_scale=4, train_wall=11, gb_free=2.8, wall=59906
2021-06-19 11:17:23 | INFO | train_inner | epoch 002: 2312 / 3002 loss=2.74, ppl=6.68, wps=5857.5, ups=0.09, wpb=64727, bsz=128, num_updates=5279, lr=9.99658e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=59917
2021-06-19 11:17:34 | INFO | train_inner | epoch 002: 2313 / 3002 loss=2.725, ppl=6.61, wps=5876, ups=0.09, wpb=64785, bsz=128, num_updates=5280, lr=9.99658e-05, gnorm=2.138, loss_scale=4, train_wall=11, gb_free=2.8, wall=59928
2021-06-19 11:17:45 | INFO | train_inner | epoch 002: 2314 / 3002 loss=2.688, ppl=6.44, wps=5912.1, ups=0.09, wpb=64932, bsz=128, num_updates=5281, lr=9.99657e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=59939
2021-06-19 11:17:56 | INFO | train_inner | epoch 002: 2315 / 3002 loss=2.567, ppl=5.93, wps=5791.3, ups=0.09, wpb=64801, bsz=128, num_updates=5282, lr=9.99657e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=59951
2021-06-19 11:18:07 | INFO | train_inner | epoch 002: 2316 / 3002 loss=2.639, ppl=6.23, wps=5920.5, ups=0.09, wpb=64839, bsz=128, num_updates=5283, lr=9.99657e-05, gnorm=2.114, loss_scale=4, train_wall=11, gb_free=2.8, wall=59962
2021-06-19 11:18:18 | INFO | train_inner | epoch 002: 2317 / 3002 loss=2.824, ppl=7.08, wps=5894.4, ups=0.09, wpb=64838, bsz=128, num_updates=5284, lr=9.99657e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=59973
2021-06-19 11:18:29 | INFO | train_inner | epoch 002: 2318 / 3002 loss=2.793, ppl=6.93, wps=5960.8, ups=0.09, wpb=64941, bsz=128, num_updates=5285, lr=9.99657e-05, gnorm=2.253, loss_scale=4, train_wall=10, gb_free=2.8, wall=59983
2021-06-19 11:18:40 | INFO | train_inner | epoch 002: 2319 / 3002 loss=2.746, ppl=6.71, wps=5984.2, ups=0.09, wpb=64892, bsz=128, num_updates=5286, lr=9.99657e-05, gnorm=2.246, loss_scale=4, train_wall=10, gb_free=2.8, wall=59994
2021-06-19 11:18:51 | INFO | train_inner | epoch 002: 2320 / 3002 loss=2.708, ppl=6.53, wps=5889.4, ups=0.09, wpb=64811, bsz=128, num_updates=5287, lr=9.99657e-05, gnorm=2.247, loss_scale=4, train_wall=11, gb_free=2.8, wall=60005
2021-06-19 11:19:02 | INFO | train_inner | epoch 002: 2321 / 3002 loss=2.707, ppl=6.53, wps=5948, ups=0.09, wpb=64846, bsz=128, num_updates=5288, lr=9.99657e-05, gnorm=2.354, loss_scale=4, train_wall=10, gb_free=2.8, wall=60016
2021-06-19 11:19:13 | INFO | train_inner | epoch 002: 2322 / 3002 loss=2.575, ppl=5.96, wps=5863.3, ups=0.09, wpb=64865, bsz=128, num_updates=5289, lr=9.99657e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=60027
2021-06-19 11:19:24 | INFO | train_inner | epoch 002: 2323 / 3002 loss=2.623, ppl=6.16, wps=5872.4, ups=0.09, wpb=64796, bsz=128, num_updates=5290, lr=9.99657e-05, gnorm=2.819, loss_scale=4, train_wall=11, gb_free=2.8, wall=60038
2021-06-19 11:19:35 | INFO | train_inner | epoch 002: 2324 / 3002 loss=2.777, ppl=6.85, wps=5900.8, ups=0.09, wpb=64846, bsz=128, num_updates=5291, lr=9.99657e-05, gnorm=3.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=60049
2021-06-19 11:19:46 | INFO | train_inner | epoch 002: 2325 / 3002 loss=2.936, ppl=7.66, wps=5856.5, ups=0.09, wpb=64884, bsz=128, num_updates=5292, lr=9.99657e-05, gnorm=2.304, loss_scale=4, train_wall=11, gb_free=2.8, wall=60060
2021-06-19 11:19:57 | INFO | train_inner | epoch 002: 2326 / 3002 loss=2.768, ppl=6.81, wps=5821.6, ups=0.09, wpb=64825, bsz=128, num_updates=5293, lr=9.99657e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=60072
2021-06-19 11:20:09 | INFO | train_inner | epoch 002: 2327 / 3002 loss=2.629, ppl=6.18, wps=5725.1, ups=0.09, wpb=64799, bsz=128, num_updates=5294, lr=9.99656e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=60083
2021-06-19 11:20:20 | INFO | train_inner | epoch 002: 2328 / 3002 loss=2.654, ppl=6.29, wps=5866.5, ups=0.09, wpb=64842, bsz=128, num_updates=5295, lr=9.99656e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=60094
2021-06-19 11:20:31 | INFO | train_inner | epoch 002: 2329 / 3002 loss=2.69, ppl=6.45, wps=5885.8, ups=0.09, wpb=64841, bsz=128, num_updates=5296, lr=9.99656e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=60105
2021-06-19 11:20:42 | INFO | train_inner | epoch 002: 2330 / 3002 loss=2.543, ppl=5.83, wps=5794.8, ups=0.09, wpb=64836, bsz=128, num_updates=5297, lr=9.99656e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=60116
2021-06-19 11:20:53 | INFO | train_inner | epoch 002: 2331 / 3002 loss=2.738, ppl=6.67, wps=5833.6, ups=0.09, wpb=64829, bsz=128, num_updates=5298, lr=9.99656e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=60127
2021-06-19 11:21:04 | INFO | train_inner | epoch 002: 2332 / 3002 loss=2.63, ppl=6.19, wps=5844.2, ups=0.09, wpb=64706, bsz=128, num_updates=5299, lr=9.99656e-05, gnorm=2.858, loss_scale=4, train_wall=11, gb_free=2.8, wall=60138
2021-06-19 11:21:15 | INFO | train_inner | epoch 002: 2333 / 3002 loss=2.799, ppl=6.96, wps=5765.3, ups=0.09, wpb=64789, bsz=128, num_updates=5300, lr=9.99656e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=60150
2021-06-19 11:21:26 | INFO | train_inner | epoch 002: 2334 / 3002 loss=2.768, ppl=6.81, wps=5757.6, ups=0.09, wpb=64759, bsz=128, num_updates=5301, lr=9.99656e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=60161
2021-06-19 11:21:38 | INFO | train_inner | epoch 002: 2335 / 3002 loss=2.669, ppl=6.36, wps=5792.9, ups=0.09, wpb=64880, bsz=128, num_updates=5302, lr=9.99656e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=60172
2021-06-19 11:21:49 | INFO | train_inner | epoch 002: 2336 / 3002 loss=2.629, ppl=6.19, wps=5845.6, ups=0.09, wpb=64924, bsz=128, num_updates=5303, lr=9.99656e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=60183
2021-06-19 11:22:00 | INFO | train_inner | epoch 002: 2337 / 3002 loss=2.722, ppl=6.6, wps=5831.3, ups=0.09, wpb=64826, bsz=128, num_updates=5304, lr=9.99656e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=60194
2021-06-19 11:22:11 | INFO | train_inner | epoch 002: 2338 / 3002 loss=2.594, ppl=6.04, wps=5828.6, ups=0.09, wpb=64861, bsz=128, num_updates=5305, lr=9.99656e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=60205
2021-06-19 11:22:22 | INFO | train_inner | epoch 002: 2339 / 3002 loss=2.713, ppl=6.56, wps=5845.4, ups=0.09, wpb=64849, bsz=128, num_updates=5306, lr=9.99655e-05, gnorm=2.257, loss_scale=4, train_wall=11, gb_free=2.8, wall=60216
2021-06-19 11:22:33 | INFO | train_inner | epoch 002: 2340 / 3002 loss=2.738, ppl=6.67, wps=5806.9, ups=0.09, wpb=64744, bsz=128, num_updates=5307, lr=9.99655e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=60228
2021-06-19 11:22:45 | INFO | train_inner | epoch 002: 2341 / 3002 loss=2.511, ppl=5.7, wps=5766.9, ups=0.09, wpb=64847, bsz=128, num_updates=5308, lr=9.99655e-05, gnorm=2.366, loss_scale=4, train_wall=11, gb_free=2.8, wall=60239
2021-06-19 11:22:56 | INFO | train_inner | epoch 002: 2342 / 3002 loss=2.671, ppl=6.37, wps=5897.5, ups=0.09, wpb=64875, bsz=128, num_updates=5309, lr=9.99655e-05, gnorm=2.318, loss_scale=4, train_wall=11, gb_free=2.8, wall=60250
2021-06-19 11:23:06 | INFO | train_inner | epoch 002: 2343 / 3002 loss=2.724, ppl=6.61, wps=5929.5, ups=0.09, wpb=64889, bsz=128, num_updates=5310, lr=9.99655e-05, gnorm=2.236, loss_scale=4, train_wall=10, gb_free=2.8, wall=60261
2021-06-19 11:23:18 | INFO | train_inner | epoch 002: 2344 / 3002 loss=2.611, ppl=6.11, wps=5861, ups=0.09, wpb=64797, bsz=128, num_updates=5311, lr=9.99655e-05, gnorm=2.716, loss_scale=4, train_wall=11, gb_free=2.8, wall=60272
2021-06-19 11:23:29 | INFO | train_inner | epoch 002: 2345 / 3002 loss=2.771, ppl=6.83, wps=5884.6, ups=0.09, wpb=64788, bsz=128, num_updates=5312, lr=9.99655e-05, gnorm=2.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=60283
2021-06-19 11:23:40 | INFO | train_inner | epoch 002: 2346 / 3002 loss=2.888, ppl=7.4, wps=5762, ups=0.09, wpb=64596, bsz=128, num_updates=5313, lr=9.99655e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=60294
2021-06-19 11:23:51 | INFO | train_inner | epoch 002: 2347 / 3002 loss=2.681, ppl=6.41, wps=5821.1, ups=0.09, wpb=64778, bsz=128, num_updates=5314, lr=9.99655e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=60305
2021-06-19 11:24:02 | INFO | train_inner | epoch 002: 2348 / 3002 loss=2.739, ppl=6.68, wps=5837.9, ups=0.09, wpb=64828, bsz=128, num_updates=5315, lr=9.99655e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=60316
2021-06-19 11:24:13 | INFO | train_inner | epoch 002: 2349 / 3002 loss=2.84, ppl=7.16, wps=5931.3, ups=0.09, wpb=64861, bsz=128, num_updates=5316, lr=9.99655e-05, gnorm=2.224, loss_scale=4, train_wall=10, gb_free=2.8, wall=60327
2021-06-19 11:24:24 | INFO | train_inner | epoch 002: 2350 / 3002 loss=2.651, ppl=6.28, wps=5818.5, ups=0.09, wpb=64808, bsz=128, num_updates=5317, lr=9.99655e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=60338
2021-06-19 11:24:35 | INFO | train_inner | epoch 002: 2351 / 3002 loss=2.788, ppl=6.91, wps=5799.1, ups=0.09, wpb=64818, bsz=128, num_updates=5318, lr=9.99655e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=60350
2021-06-19 11:24:47 | INFO | train_inner | epoch 002: 2352 / 3002 loss=2.684, ppl=6.42, wps=5731, ups=0.09, wpb=64799, bsz=128, num_updates=5319, lr=9.99654e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=60361
2021-06-19 11:24:58 | INFO | train_inner | epoch 002: 2353 / 3002 loss=2.799, ppl=6.96, wps=5859.4, ups=0.09, wpb=64837, bsz=128, num_updates=5320, lr=9.99654e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=60372
2021-06-19 11:25:09 | INFO | train_inner | epoch 002: 2354 / 3002 loss=2.765, ppl=6.8, wps=5830.6, ups=0.09, wpb=64829, bsz=128, num_updates=5321, lr=9.99654e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=60383
2021-06-19 11:25:20 | INFO | train_inner | epoch 002: 2355 / 3002 loss=2.785, ppl=6.89, wps=5892.1, ups=0.09, wpb=64865, bsz=128, num_updates=5322, lr=9.99654e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=60394
2021-06-19 11:25:31 | INFO | train_inner | epoch 002: 2356 / 3002 loss=2.762, ppl=6.78, wps=5698.5, ups=0.09, wpb=64646, bsz=128, num_updates=5323, lr=9.99654e-05, gnorm=3.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=60405
2021-06-19 11:25:42 | INFO | train_inner | epoch 002: 2357 / 3002 loss=2.865, ppl=7.29, wps=5732.9, ups=0.09, wpb=64837, bsz=128, num_updates=5324, lr=9.99654e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=60417
2021-06-19 11:25:54 | INFO | train_inner | epoch 002: 2358 / 3002 loss=2.642, ppl=6.24, wps=5777.3, ups=0.09, wpb=64862, bsz=128, num_updates=5325, lr=9.99654e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=60428
2021-06-19 11:26:05 | INFO | train_inner | epoch 002: 2359 / 3002 loss=2.797, ppl=6.95, wps=5798.3, ups=0.09, wpb=64867, bsz=128, num_updates=5326, lr=9.99654e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=60439
2021-06-19 11:26:16 | INFO | train_inner | epoch 002: 2360 / 3002 loss=2.739, ppl=6.68, wps=5711, ups=0.09, wpb=64841, bsz=128, num_updates=5327, lr=9.99654e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=60450
2021-06-19 11:26:27 | INFO | train_inner | epoch 002: 2361 / 3002 loss=2.645, ppl=6.26, wps=5847.2, ups=0.09, wpb=64839, bsz=128, num_updates=5328, lr=9.99654e-05, gnorm=2.16, loss_scale=4, train_wall=11, gb_free=2.8, wall=60462
2021-06-19 11:26:38 | INFO | train_inner | epoch 002: 2362 / 3002 loss=2.741, ppl=6.69, wps=5919.9, ups=0.09, wpb=64898, bsz=128, num_updates=5329, lr=9.99654e-05, gnorm=2.257, loss_scale=4, train_wall=11, gb_free=2.8, wall=60473
2021-06-19 11:26:49 | INFO | train_inner | epoch 002: 2363 / 3002 loss=2.684, ppl=6.43, wps=5954.8, ups=0.09, wpb=64906, bsz=128, num_updates=5330, lr=9.99654e-05, gnorm=2.115, loss_scale=4, train_wall=10, gb_free=2.8, wall=60483
2021-06-19 11:27:00 | INFO | train_inner | epoch 002: 2364 / 3002 loss=2.656, ppl=6.3, wps=5764.5, ups=0.09, wpb=64852, bsz=128, num_updates=5331, lr=9.99653e-05, gnorm=4.62, loss_scale=4, train_wall=11, gb_free=2.8, wall=60495
2021-06-19 11:27:11 | INFO | train_inner | epoch 002: 2365 / 3002 loss=2.76, ppl=6.77, wps=5839.9, ups=0.09, wpb=64807, bsz=128, num_updates=5332, lr=9.99653e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=60506
2021-06-19 11:27:22 | INFO | train_inner | epoch 002: 2366 / 3002 loss=2.684, ppl=6.43, wps=5934.3, ups=0.09, wpb=64823, bsz=128, num_updates=5333, lr=9.99653e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=60517
2021-06-19 11:27:33 | INFO | train_inner | epoch 002: 2367 / 3002 loss=2.788, ppl=6.9, wps=5937.4, ups=0.09, wpb=64801, bsz=128, num_updates=5334, lr=9.99653e-05, gnorm=2.172, loss_scale=4, train_wall=10, gb_free=2.8, wall=60528
2021-06-19 11:27:44 | INFO | train_inner | epoch 002: 2368 / 3002 loss=2.686, ppl=6.43, wps=5843.3, ups=0.09, wpb=64921, bsz=128, num_updates=5335, lr=9.99653e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=60539
2021-06-19 11:27:55 | INFO | train_inner | epoch 002: 2369 / 3002 loss=2.716, ppl=6.57, wps=5899.9, ups=0.09, wpb=64847, bsz=128, num_updates=5336, lr=9.99653e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=60550
2021-06-19 11:28:06 | INFO | train_inner | epoch 002: 2370 / 3002 loss=2.845, ppl=7.19, wps=5877.4, ups=0.09, wpb=64847, bsz=128, num_updates=5337, lr=9.99653e-05, gnorm=2.388, loss_scale=4, train_wall=11, gb_free=2.8, wall=60561
2021-06-19 11:28:17 | INFO | train_inner | epoch 002: 2371 / 3002 loss=2.784, ppl=6.89, wps=5862.1, ups=0.09, wpb=64586, bsz=128, num_updates=5338, lr=9.99653e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=60572
2021-06-19 11:28:29 | INFO | train_inner | epoch 002: 2372 / 3002 loss=2.905, ppl=7.49, wps=5834.7, ups=0.09, wpb=64833, bsz=128, num_updates=5339, lr=9.99653e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=60583
2021-06-19 11:28:40 | INFO | train_inner | epoch 002: 2373 / 3002 loss=2.784, ppl=6.89, wps=5881.7, ups=0.09, wpb=64865, bsz=128, num_updates=5340, lr=9.99653e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=60594
2021-06-19 11:28:51 | INFO | train_inner | epoch 002: 2374 / 3002 loss=2.778, ppl=6.86, wps=5832.6, ups=0.09, wpb=64821, bsz=128, num_updates=5341, lr=9.99653e-05, gnorm=2.142, loss_scale=4, train_wall=11, gb_free=2.8, wall=60605
2021-06-19 11:29:02 | INFO | train_inner | epoch 002: 2375 / 3002 loss=2.755, ppl=6.75, wps=5830, ups=0.09, wpb=64783, bsz=128, num_updates=5342, lr=9.99653e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=60616
2021-06-19 11:29:13 | INFO | train_inner | epoch 002: 2376 / 3002 loss=2.689, ppl=6.45, wps=5822.8, ups=0.09, wpb=64851, bsz=128, num_updates=5343, lr=9.99653e-05, gnorm=2.314, loss_scale=4, train_wall=11, gb_free=2.8, wall=60627
2021-06-19 11:29:24 | INFO | train_inner | epoch 002: 2377 / 3002 loss=2.85, ppl=7.21, wps=5890.4, ups=0.09, wpb=64883, bsz=128, num_updates=5344, lr=9.99652e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=60638
2021-06-19 11:29:35 | INFO | train_inner | epoch 002: 2378 / 3002 loss=2.701, ppl=6.5, wps=5817.8, ups=0.09, wpb=64761, bsz=128, num_updates=5345, lr=9.99652e-05, gnorm=2.295, loss_scale=4, train_wall=11, gb_free=2.8, wall=60649
2021-06-19 11:29:46 | INFO | train_inner | epoch 002: 2379 / 3002 loss=2.659, ppl=6.32, wps=5925, ups=0.09, wpb=64877, bsz=128, num_updates=5346, lr=9.99652e-05, gnorm=2.317, loss_scale=4, train_wall=11, gb_free=2.8, wall=60660
2021-06-19 11:29:57 | INFO | train_inner | epoch 002: 2380 / 3002 loss=2.854, ppl=7.23, wps=5953.3, ups=0.09, wpb=64759, bsz=128, num_updates=5347, lr=9.99652e-05, gnorm=2.278, loss_scale=4, train_wall=10, gb_free=2.8, wall=60671
2021-06-19 11:30:08 | INFO | train_inner | epoch 002: 2381 / 3002 loss=2.659, ppl=6.32, wps=5879.1, ups=0.09, wpb=64776, bsz=128, num_updates=5348, lr=9.99652e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=60682
2021-06-19 11:30:19 | INFO | train_inner | epoch 002: 2382 / 3002 loss=2.676, ppl=6.39, wps=5768.3, ups=0.09, wpb=64794, bsz=128, num_updates=5349, lr=9.99652e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=60693
2021-06-19 11:30:30 | INFO | train_inner | epoch 002: 2383 / 3002 loss=2.647, ppl=6.26, wps=5798, ups=0.09, wpb=64942, bsz=128, num_updates=5350, lr=9.99652e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=60705
2021-06-19 11:30:42 | INFO | train_inner | epoch 002: 2384 / 3002 loss=2.536, ppl=5.8, wps=5765.6, ups=0.09, wpb=64870, bsz=128, num_updates=5351, lr=9.99652e-05, gnorm=2.093, loss_scale=4, train_wall=11, gb_free=2.8, wall=60716
2021-06-19 11:30:53 | INFO | train_inner | epoch 002: 2385 / 3002 loss=2.713, ppl=6.56, wps=5836.7, ups=0.09, wpb=64766, bsz=128, num_updates=5352, lr=9.99652e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=60727
2021-06-19 11:31:04 | INFO | train_inner | epoch 002: 2386 / 3002 loss=2.66, ppl=6.32, wps=5879.1, ups=0.09, wpb=64740, bsz=128, num_updates=5353, lr=9.99652e-05, gnorm=2.231, loss_scale=4, train_wall=11, gb_free=2.8, wall=60738
2021-06-19 11:31:15 | INFO | train_inner | epoch 002: 2387 / 3002 loss=2.699, ppl=6.49, wps=5940.9, ups=0.09, wpb=64851, bsz=128, num_updates=5354, lr=9.99652e-05, gnorm=2.234, loss_scale=4, train_wall=10, gb_free=2.8, wall=60749
2021-06-19 11:31:26 | INFO | train_inner | epoch 002: 2388 / 3002 loss=2.715, ppl=6.56, wps=5735.1, ups=0.09, wpb=64739, bsz=128, num_updates=5355, lr=9.99652e-05, gnorm=2.664, loss_scale=4, train_wall=11, gb_free=2.8, wall=60760
2021-06-19 11:31:37 | INFO | train_inner | epoch 002: 2389 / 3002 loss=2.698, ppl=6.49, wps=5801.1, ups=0.09, wpb=64732, bsz=128, num_updates=5356, lr=9.99651e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=60771
2021-06-19 11:31:48 | INFO | train_inner | epoch 002: 2390 / 3002 loss=2.733, ppl=6.65, wps=5997.9, ups=0.09, wpb=64774, bsz=128, num_updates=5357, lr=9.99651e-05, gnorm=2.341, loss_scale=4, train_wall=10, gb_free=2.8, wall=60782
2021-06-19 11:31:59 | INFO | train_inner | epoch 002: 2391 / 3002 loss=2.652, ppl=6.28, wps=5882.8, ups=0.09, wpb=64793, bsz=128, num_updates=5358, lr=9.99651e-05, gnorm=2.421, loss_scale=4, train_wall=11, gb_free=2.8, wall=60793
2021-06-19 11:32:10 | INFO | train_inner | epoch 002: 2392 / 3002 loss=2.484, ppl=5.6, wps=5814.1, ups=0.09, wpb=64863, bsz=128, num_updates=5359, lr=9.99651e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=60804
2021-06-19 11:32:21 | INFO | train_inner | epoch 002: 2393 / 3002 loss=2.768, ppl=6.81, wps=5905.5, ups=0.09, wpb=64708, bsz=128, num_updates=5360, lr=9.99651e-05, gnorm=2.185, loss_scale=4, train_wall=10, gb_free=2.8, wall=60815
2021-06-19 11:32:32 | INFO | train_inner | epoch 002: 2394 / 3002 loss=2.802, ppl=6.97, wps=5874, ups=0.09, wpb=64841, bsz=128, num_updates=5361, lr=9.99651e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=60826
2021-06-19 11:32:43 | INFO | train_inner | epoch 002: 2395 / 3002 loss=2.787, ppl=6.9, wps=5892.2, ups=0.09, wpb=64865, bsz=128, num_updates=5362, lr=9.99651e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=60837
2021-06-19 11:32:54 | INFO | train_inner | epoch 002: 2396 / 3002 loss=2.768, ppl=6.81, wps=5850, ups=0.09, wpb=64829, bsz=128, num_updates=5363, lr=9.99651e-05, gnorm=2.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=60848
2021-06-19 11:33:05 | INFO | train_inner | epoch 002: 2397 / 3002 loss=2.634, ppl=6.21, wps=5976.8, ups=0.09, wpb=64923, bsz=128, num_updates=5364, lr=9.99651e-05, gnorm=2.1, loss_scale=4, train_wall=10, gb_free=2.8, wall=60859
2021-06-19 11:33:16 | INFO | train_inner | epoch 002: 2398 / 3002 loss=2.788, ppl=6.91, wps=5843, ups=0.09, wpb=64806, bsz=128, num_updates=5365, lr=9.99651e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=60870
2021-06-19 11:33:27 | INFO | train_inner | epoch 002: 2399 / 3002 loss=2.735, ppl=6.66, wps=5823, ups=0.09, wpb=64851, bsz=128, num_updates=5366, lr=9.99651e-05, gnorm=2.173, loss_scale=8, train_wall=11, gb_free=2.8, wall=60882
2021-06-19 11:33:38 | INFO | train_inner | epoch 002: 2400 / 3002 loss=2.841, ppl=7.16, wps=5876, ups=0.09, wpb=64774, bsz=128, num_updates=5367, lr=9.99651e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=60893
2021-06-19 11:33:49 | INFO | train_inner | epoch 002: 2401 / 3002 loss=2.805, ppl=6.99, wps=5872.7, ups=0.09, wpb=64908, bsz=128, num_updates=5368, lr=9.99651e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=60904
2021-06-19 11:34:00 | INFO | train_inner | epoch 002: 2402 / 3002 loss=2.793, ppl=6.93, wps=5916.6, ups=0.09, wpb=64715, bsz=128, num_updates=5369, lr=9.9965e-05, gnorm=2.257, loss_scale=8, train_wall=10, gb_free=2.8, wall=60915
2021-06-19 11:34:11 | INFO | train_inner | epoch 002: 2403 / 3002 loss=2.696, ppl=6.48, wps=5790.6, ups=0.09, wpb=64793, bsz=128, num_updates=5370, lr=9.9965e-05, gnorm=2.319, loss_scale=8, train_wall=11, gb_free=2.8, wall=60926
2021-06-19 11:34:23 | INFO | train_inner | epoch 002: 2404 / 3002 loss=2.544, ppl=5.83, wps=5777.2, ups=0.09, wpb=64756, bsz=128, num_updates=5371, lr=9.9965e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=60937
2021-06-19 11:34:34 | INFO | train_inner | epoch 002: 2405 / 3002 loss=2.691, ppl=6.46, wps=5884.8, ups=0.09, wpb=64920, bsz=128, num_updates=5372, lr=9.9965e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=60948
2021-06-19 11:34:45 | INFO | train_inner | epoch 002: 2406 / 3002 loss=2.821, ppl=7.07, wps=5932.1, ups=0.09, wpb=64829, bsz=128, num_updates=5373, lr=9.9965e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=60959
2021-06-19 11:34:56 | INFO | train_inner | epoch 002: 2407 / 3002 loss=2.717, ppl=6.57, wps=5896.6, ups=0.09, wpb=64873, bsz=128, num_updates=5374, lr=9.9965e-05, gnorm=3.615, loss_scale=8, train_wall=11, gb_free=2.8, wall=60970
2021-06-19 11:35:06 | INFO | train_inner | epoch 002: 2408 / 3002 loss=2.729, ppl=6.63, wps=6021.9, ups=0.09, wpb=64887, bsz=128, num_updates=5375, lr=9.9965e-05, gnorm=2.273, loss_scale=8, train_wall=10, gb_free=2.8, wall=60981
2021-06-19 11:35:18 | INFO | train_inner | epoch 002: 2409 / 3002 loss=2.796, ppl=6.94, wps=5804.2, ups=0.09, wpb=64805, bsz=128, num_updates=5376, lr=9.9965e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=60992
2021-06-19 11:35:29 | INFO | train_inner | epoch 002: 2410 / 3002 loss=2.557, ppl=5.88, wps=5807.7, ups=0.09, wpb=64818, bsz=128, num_updates=5377, lr=9.9965e-05, gnorm=2.254, loss_scale=8, train_wall=11, gb_free=2.8, wall=61003
2021-06-19 11:35:40 | INFO | train_inner | epoch 002: 2411 / 3002 loss=2.931, ppl=7.62, wps=5976.2, ups=0.09, wpb=64841, bsz=128, num_updates=5378, lr=9.9965e-05, gnorm=2.274, loss_scale=8, train_wall=10, gb_free=2.8, wall=61014
2021-06-19 11:35:51 | INFO | train_inner | epoch 002: 2412 / 3002 loss=2.672, ppl=6.37, wps=5853.6, ups=0.09, wpb=64867, bsz=128, num_updates=5379, lr=9.9965e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=61025
2021-06-19 11:36:02 | INFO | train_inner | epoch 002: 2413 / 3002 loss=2.68, ppl=6.41, wps=5909.6, ups=0.09, wpb=64853, bsz=128, num_updates=5380, lr=9.9965e-05, gnorm=2.383, loss_scale=8, train_wall=11, gb_free=2.8, wall=61036
2021-06-19 11:36:13 | INFO | train_inner | epoch 002: 2414 / 3002 loss=2.903, ppl=7.48, wps=5767.3, ups=0.09, wpb=64841, bsz=128, num_updates=5381, lr=9.99649e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=61047
2021-06-19 11:36:24 | INFO | train_inner | epoch 002: 2415 / 3002 loss=2.681, ppl=6.42, wps=5901.5, ups=0.09, wpb=64866, bsz=128, num_updates=5382, lr=9.99649e-05, gnorm=2.46, loss_scale=8, train_wall=10, gb_free=2.8, wall=61058
2021-06-19 11:36:35 | INFO | train_inner | epoch 002: 2416 / 3002 loss=2.832, ppl=7.12, wps=5798, ups=0.09, wpb=64867, bsz=128, num_updates=5383, lr=9.99649e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=61069
2021-06-19 11:36:46 | INFO | train_inner | epoch 002: 2417 / 3002 loss=2.5, ppl=5.66, wps=5764.2, ups=0.09, wpb=64884, bsz=128, num_updates=5384, lr=9.99649e-05, gnorm=2.563, loss_scale=8, train_wall=11, gb_free=2.8, wall=61081
2021-06-19 11:36:57 | INFO | train_inner | epoch 002: 2418 / 3002 loss=2.708, ppl=6.53, wps=5884.2, ups=0.09, wpb=64849, bsz=128, num_updates=5385, lr=9.99649e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=61092
2021-06-19 11:37:08 | INFO | train_inner | epoch 002: 2419 / 3002 loss=2.629, ppl=6.19, wps=5877.3, ups=0.09, wpb=64823, bsz=128, num_updates=5386, lr=9.99649e-05, gnorm=2.248, loss_scale=8, train_wall=11, gb_free=2.8, wall=61103
2021-06-19 11:37:19 | INFO | train_inner | epoch 002: 2420 / 3002 loss=2.611, ppl=6.11, wps=5922.1, ups=0.09, wpb=64844, bsz=128, num_updates=5387, lr=9.99649e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=61114
2021-06-19 11:37:30 | INFO | train_inner | epoch 002: 2421 / 3002 loss=2.671, ppl=6.37, wps=5821, ups=0.09, wpb=64858, bsz=128, num_updates=5388, lr=9.99649e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=61125
2021-06-19 11:37:41 | INFO | train_inner | epoch 002: 2422 / 3002 loss=2.591, ppl=6.03, wps=5935.4, ups=0.09, wpb=64862, bsz=128, num_updates=5389, lr=9.99649e-05, gnorm=3.056, loss_scale=8, train_wall=10, gb_free=2.8, wall=61136
2021-06-19 11:37:52 | INFO | train_inner | epoch 002: 2423 / 3002 loss=2.708, ppl=6.53, wps=6021.9, ups=0.09, wpb=64856, bsz=128, num_updates=5390, lr=9.99649e-05, gnorm=2.21, loss_scale=8, train_wall=10, gb_free=2.8, wall=61146
2021-06-19 11:38:03 | INFO | train_inner | epoch 002: 2424 / 3002 loss=2.647, ppl=6.27, wps=5874.2, ups=0.09, wpb=64877, bsz=128, num_updates=5391, lr=9.99649e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=61158
2021-06-19 11:38:14 | INFO | train_inner | epoch 002: 2425 / 3002 loss=2.73, ppl=6.63, wps=5900.5, ups=0.09, wpb=64815, bsz=128, num_updates=5392, lr=9.99649e-05, gnorm=6.259, loss_scale=8, train_wall=11, gb_free=2.8, wall=61169
2021-06-19 11:38:25 | INFO | train_inner | epoch 002: 2426 / 3002 loss=2.663, ppl=6.34, wps=5836.5, ups=0.09, wpb=64870, bsz=128, num_updates=5393, lr=9.99649e-05, gnorm=2.251, loss_scale=8, train_wall=11, gb_free=2.8, wall=61180
2021-06-19 11:38:37 | INFO | train_inner | epoch 002: 2427 / 3002 loss=2.8, ppl=6.97, wps=5689.2, ups=0.09, wpb=64761, bsz=128, num_updates=5394, lr=9.99648e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=61191
2021-06-19 11:38:48 | INFO | train_inner | epoch 002: 2428 / 3002 loss=2.7, ppl=6.5, wps=5805.1, ups=0.09, wpb=64890, bsz=128, num_updates=5395, lr=9.99648e-05, gnorm=2.505, loss_scale=8, train_wall=11, gb_free=2.8, wall=61202
2021-06-19 11:38:59 | INFO | train_inner | epoch 002: 2429 / 3002 loss=2.634, ppl=6.21, wps=5958.8, ups=0.09, wpb=64826, bsz=128, num_updates=5396, lr=9.99648e-05, gnorm=2.258, loss_scale=8, train_wall=10, gb_free=2.8, wall=61213
2021-06-19 11:39:10 | INFO | train_inner | epoch 002: 2430 / 3002 loss=2.473, ppl=5.55, wps=5809.6, ups=0.09, wpb=64855, bsz=128, num_updates=5397, lr=9.99648e-05, gnorm=2.242, loss_scale=8, train_wall=11, gb_free=2.8, wall=61224
2021-06-19 11:39:21 | INFO | train_inner | epoch 002: 2431 / 3002 loss=2.694, ppl=6.47, wps=5854.9, ups=0.09, wpb=64830, bsz=128, num_updates=5398, lr=9.99648e-05, gnorm=2.301, loss_scale=8, train_wall=11, gb_free=2.8, wall=61235
2021-06-19 11:39:32 | INFO | train_inner | epoch 002: 2432 / 3002 loss=2.647, ppl=6.26, wps=5897.3, ups=0.09, wpb=64770, bsz=128, num_updates=5399, lr=9.99648e-05, gnorm=2.307, loss_scale=8, train_wall=11, gb_free=2.8, wall=61246
2021-06-19 11:39:43 | INFO | train_inner | epoch 002: 2433 / 3002 loss=2.695, ppl=6.48, wps=5849.6, ups=0.09, wpb=64886, bsz=128, num_updates=5400, lr=9.99648e-05, gnorm=3.639, loss_scale=8, train_wall=11, gb_free=2.8, wall=61257
2021-06-19 11:39:54 | INFO | train_inner | epoch 002: 2434 / 3002 loss=2.743, ppl=6.69, wps=5815, ups=0.09, wpb=64851, bsz=128, num_updates=5401, lr=9.99648e-05, gnorm=2.326, loss_scale=8, train_wall=11, gb_free=2.8, wall=61269
2021-06-19 11:40:05 | INFO | train_inner | epoch 002: 2435 / 3002 loss=2.659, ppl=6.32, wps=5854.7, ups=0.09, wpb=64816, bsz=128, num_updates=5402, lr=9.99648e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=61280
2021-06-19 11:40:16 | INFO | train_inner | epoch 002: 2436 / 3002 loss=2.583, ppl=5.99, wps=5804.6, ups=0.09, wpb=64793, bsz=128, num_updates=5403, lr=9.99648e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=61291
2021-06-19 11:40:28 | INFO | train_inner | epoch 002: 2437 / 3002 loss=2.545, ppl=5.84, wps=5829.3, ups=0.09, wpb=64792, bsz=128, num_updates=5404, lr=9.99648e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=61302
2021-06-19 11:40:39 | INFO | train_inner | epoch 002: 2438 / 3002 loss=2.868, ppl=7.3, wps=5804.5, ups=0.09, wpb=64860, bsz=128, num_updates=5405, lr=9.99648e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=61313
2021-06-19 11:40:50 | INFO | train_inner | epoch 002: 2439 / 3002 loss=2.694, ppl=6.47, wps=5988.4, ups=0.09, wpb=64854, bsz=128, num_updates=5406, lr=9.99647e-05, gnorm=2.264, loss_scale=8, train_wall=10, gb_free=2.8, wall=61324
2021-06-19 11:41:01 | INFO | train_inner | epoch 002: 2440 / 3002 loss=2.753, ppl=6.74, wps=5873.3, ups=0.09, wpb=64819, bsz=128, num_updates=5407, lr=9.99647e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=61335
2021-06-19 11:41:12 | INFO | train_inner | epoch 002: 2441 / 3002 loss=2.662, ppl=6.33, wps=5856.6, ups=0.09, wpb=64792, bsz=128, num_updates=5408, lr=9.99647e-05, gnorm=2.247, loss_scale=8, train_wall=11, gb_free=2.8, wall=61346
2021-06-19 11:41:23 | INFO | train_inner | epoch 002: 2442 / 3002 loss=2.769, ppl=6.82, wps=5766.8, ups=0.09, wpb=64887, bsz=128, num_updates=5409, lr=9.99647e-05, gnorm=2.281, loss_scale=8, train_wall=11, gb_free=2.8, wall=61357
2021-06-19 11:41:34 | INFO | train_inner | epoch 002: 2443 / 3002 loss=2.706, ppl=6.52, wps=5864, ups=0.09, wpb=64867, bsz=128, num_updates=5410, lr=9.99647e-05, gnorm=2.304, loss_scale=8, train_wall=11, gb_free=2.8, wall=61368
2021-06-19 11:41:45 | INFO | train_inner | epoch 002: 2444 / 3002 loss=2.78, ppl=6.87, wps=5864.8, ups=0.09, wpb=64821, bsz=128, num_updates=5411, lr=9.99647e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=61379
2021-06-19 11:41:56 | INFO | train_inner | epoch 002: 2445 / 3002 loss=2.811, ppl=7.02, wps=5746.1, ups=0.09, wpb=64703, bsz=128, num_updates=5412, lr=9.99647e-05, gnorm=2.34, loss_scale=8, train_wall=11, gb_free=2.8, wall=61391
2021-06-19 11:42:07 | INFO | train_inner | epoch 002: 2446 / 3002 loss=2.83, ppl=7.11, wps=5789.3, ups=0.09, wpb=64864, bsz=128, num_updates=5413, lr=9.99647e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=61402
2021-06-19 11:42:19 | INFO | train_inner | epoch 002: 2447 / 3002 loss=2.595, ppl=6.04, wps=5829.6, ups=0.09, wpb=64844, bsz=128, num_updates=5414, lr=9.99647e-05, gnorm=2.165, loss_scale=8, train_wall=11, gb_free=2.8, wall=61413
2021-06-19 11:42:30 | INFO | train_inner | epoch 002: 2448 / 3002 loss=2.811, ppl=7.02, wps=5872.3, ups=0.09, wpb=64817, bsz=128, num_updates=5415, lr=9.99647e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=61424
2021-06-19 11:42:41 | INFO | train_inner | epoch 002: 2449 / 3002 loss=2.912, ppl=7.53, wps=5830.8, ups=0.09, wpb=64762, bsz=128, num_updates=5416, lr=9.99647e-05, gnorm=2.313, loss_scale=8, train_wall=11, gb_free=2.8, wall=61435
2021-06-19 11:42:52 | INFO | train_inner | epoch 002: 2450 / 3002 loss=2.653, ppl=6.29, wps=5733.5, ups=0.09, wpb=64763, bsz=128, num_updates=5417, lr=9.99647e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=61446
2021-06-19 11:43:03 | INFO | train_inner | epoch 002: 2451 / 3002 loss=2.595, ppl=6.04, wps=5828.7, ups=0.09, wpb=64837, bsz=128, num_updates=5418, lr=9.99647e-05, gnorm=2.164, loss_scale=8, train_wall=11, gb_free=2.8, wall=61458
2021-06-19 11:43:14 | INFO | train_inner | epoch 002: 2452 / 3002 loss=2.827, ppl=7.1, wps=5920.2, ups=0.09, wpb=64857, bsz=128, num_updates=5419, lr=9.99646e-05, gnorm=2.223, loss_scale=8, train_wall=11, gb_free=2.8, wall=61468
2021-06-19 11:43:25 | INFO | train_inner | epoch 002: 2453 / 3002 loss=2.741, ppl=6.68, wps=6037, ups=0.09, wpb=64868, bsz=128, num_updates=5420, lr=9.99646e-05, gnorm=2.237, loss_scale=8, train_wall=10, gb_free=2.8, wall=61479
2021-06-19 11:43:36 | INFO | train_inner | epoch 002: 2454 / 3002 loss=2.651, ppl=6.28, wps=5791.6, ups=0.09, wpb=64859, bsz=128, num_updates=5421, lr=9.99646e-05, gnorm=2.12, loss_scale=8, train_wall=11, gb_free=2.8, wall=61490
2021-06-19 11:43:47 | INFO | train_inner | epoch 002: 2455 / 3002 loss=2.66, ppl=6.32, wps=5881.6, ups=0.09, wpb=64813, bsz=128, num_updates=5422, lr=9.99646e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=61501
2021-06-19 11:43:58 | INFO | train_inner | epoch 002: 2456 / 3002 loss=2.754, ppl=6.74, wps=5874.3, ups=0.09, wpb=64838, bsz=128, num_updates=5423, lr=9.99646e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=61512
2021-06-19 11:44:09 | INFO | train_inner | epoch 002: 2457 / 3002 loss=2.647, ppl=6.26, wps=5876.2, ups=0.09, wpb=64843, bsz=128, num_updates=5424, lr=9.99646e-05, gnorm=2.245, loss_scale=8, train_wall=11, gb_free=2.8, wall=61523
2021-06-19 11:44:20 | INFO | train_inner | epoch 002: 2458 / 3002 loss=2.801, ppl=6.97, wps=5749.2, ups=0.09, wpb=64827, bsz=128, num_updates=5425, lr=9.99646e-05, gnorm=2.149, loss_scale=8, train_wall=11, gb_free=2.8, wall=61535
2021-06-19 11:44:32 | INFO | train_inner | epoch 002: 2459 / 3002 loss=2.836, ppl=7.14, wps=5855.7, ups=0.09, wpb=64908, bsz=128, num_updates=5426, lr=9.99646e-05, gnorm=2.227, loss_scale=8, train_wall=11, gb_free=2.8, wall=61546
2021-06-19 11:44:43 | INFO | train_inner | epoch 002: 2460 / 3002 loss=2.743, ppl=6.69, wps=5792.2, ups=0.09, wpb=64828, bsz=128, num_updates=5427, lr=9.99646e-05, gnorm=2.289, loss_scale=8, train_wall=11, gb_free=2.8, wall=61557
2021-06-19 11:44:54 | INFO | train_inner | epoch 002: 2461 / 3002 loss=2.819, ppl=7.05, wps=5916.8, ups=0.09, wpb=64849, bsz=128, num_updates=5428, lr=9.99646e-05, gnorm=2.084, loss_scale=8, train_wall=11, gb_free=2.8, wall=61568
2021-06-19 11:45:05 | INFO | train_inner | epoch 002: 2462 / 3002 loss=2.705, ppl=6.52, wps=5824.6, ups=0.09, wpb=64743, bsz=128, num_updates=5429, lr=9.99646e-05, gnorm=2.406, loss_scale=8, train_wall=11, gb_free=2.8, wall=61579
2021-06-19 11:45:16 | INFO | train_inner | epoch 002: 2463 / 3002 loss=2.702, ppl=6.51, wps=5818.6, ups=0.09, wpb=64851, bsz=128, num_updates=5430, lr=9.99646e-05, gnorm=2.503, loss_scale=8, train_wall=11, gb_free=2.8, wall=61590
2021-06-19 11:45:27 | INFO | train_inner | epoch 002: 2464 / 3002 loss=2.776, ppl=6.85, wps=5794.5, ups=0.09, wpb=64896, bsz=128, num_updates=5431, lr=9.99645e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=61601
2021-06-19 11:45:38 | INFO | train_inner | epoch 002: 2465 / 3002 loss=2.623, ppl=6.16, wps=5901.7, ups=0.09, wpb=64914, bsz=128, num_updates=5432, lr=9.99645e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=61612
2021-06-19 11:45:49 | INFO | train_inner | epoch 002: 2466 / 3002 loss=2.687, ppl=6.44, wps=5956.3, ups=0.09, wpb=64855, bsz=128, num_updates=5433, lr=9.99645e-05, gnorm=2.237, loss_scale=8, train_wall=10, gb_free=2.8, wall=61623
2021-06-19 11:46:00 | INFO | train_inner | epoch 002: 2467 / 3002 loss=2.697, ppl=6.49, wps=5814.5, ups=0.09, wpb=64781, bsz=128, num_updates=5434, lr=9.99645e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=61635
2021-06-19 11:46:11 | INFO | train_inner | epoch 002: 2468 / 3002 loss=2.714, ppl=6.56, wps=5798.6, ups=0.09, wpb=64796, bsz=128, num_updates=5435, lr=9.99645e-05, gnorm=2.205, loss_scale=8, train_wall=11, gb_free=2.8, wall=61646
2021-06-19 11:46:22 | INFO | train_inner | epoch 002: 2469 / 3002 loss=2.668, ppl=6.35, wps=5817.2, ups=0.09, wpb=64817, bsz=128, num_updates=5436, lr=9.99645e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=61657
2021-06-19 11:46:34 | INFO | train_inner | epoch 002: 2470 / 3002 loss=2.648, ppl=6.27, wps=5825, ups=0.09, wpb=64819, bsz=128, num_updates=5437, lr=9.99645e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=61668
2021-06-19 11:46:45 | INFO | train_inner | epoch 002: 2471 / 3002 loss=2.706, ppl=6.52, wps=5661.6, ups=0.09, wpb=64785, bsz=128, num_updates=5438, lr=9.99645e-05, gnorm=2.132, loss_scale=8, train_wall=11, gb_free=2.8, wall=61679
2021-06-19 11:46:56 | INFO | train_inner | epoch 002: 2472 / 3002 loss=2.808, ppl=7, wps=5870.4, ups=0.09, wpb=64896, bsz=128, num_updates=5439, lr=9.99645e-05, gnorm=2.538, loss_scale=8, train_wall=11, gb_free=2.8, wall=61690
2021-06-19 11:47:07 | INFO | train_inner | epoch 002: 2473 / 3002 loss=2.749, ppl=6.72, wps=5870.3, ups=0.09, wpb=64854, bsz=128, num_updates=5440, lr=9.99645e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=61701
2021-06-19 11:47:18 | INFO | train_inner | epoch 002: 2474 / 3002 loss=2.805, ppl=6.99, wps=5935, ups=0.09, wpb=64885, bsz=128, num_updates=5441, lr=9.99645e-05, gnorm=2.253, loss_scale=8, train_wall=10, gb_free=2.8, wall=61712
2021-06-19 11:47:29 | INFO | train_inner | epoch 002: 2475 / 3002 loss=2.734, ppl=6.65, wps=5959.7, ups=0.09, wpb=64733, bsz=128, num_updates=5442, lr=9.99645e-05, gnorm=2.687, loss_scale=8, train_wall=10, gb_free=2.8, wall=61723
2021-06-19 11:47:40 | INFO | train_inner | epoch 002: 2476 / 3002 loss=2.665, ppl=6.34, wps=5990.9, ups=0.09, wpb=64834, bsz=128, num_updates=5443, lr=9.99645e-05, gnorm=2.29, loss_scale=8, train_wall=10, gb_free=2.8, wall=61734
2021-06-19 11:47:51 | INFO | train_inner | epoch 002: 2477 / 3002 loss=2.735, ppl=6.66, wps=5883.2, ups=0.09, wpb=64834, bsz=128, num_updates=5444, lr=9.99644e-05, gnorm=3.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=61745
2021-06-19 11:48:02 | INFO | train_inner | epoch 002: 2478 / 3002 loss=2.736, ppl=6.66, wps=5838.4, ups=0.09, wpb=64853, bsz=128, num_updates=5445, lr=9.99644e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=61756
2021-06-19 11:48:13 | INFO | train_inner | epoch 002: 2479 / 3002 loss=2.815, ppl=7.04, wps=5939, ups=0.09, wpb=64822, bsz=128, num_updates=5446, lr=9.99644e-05, gnorm=2.327, loss_scale=8, train_wall=10, gb_free=2.8, wall=61767
2021-06-19 11:48:24 | INFO | train_inner | epoch 002: 2480 / 3002 loss=2.725, ppl=6.61, wps=5932.8, ups=0.09, wpb=64810, bsz=128, num_updates=5447, lr=9.99644e-05, gnorm=2.325, loss_scale=8, train_wall=10, gb_free=2.8, wall=61778
2021-06-19 11:48:35 | INFO | train_inner | epoch 002: 2481 / 3002 loss=2.631, ppl=6.19, wps=5938.2, ups=0.09, wpb=64882, bsz=128, num_updates=5448, lr=9.99644e-05, gnorm=2.067, loss_scale=8, train_wall=10, gb_free=2.8, wall=61789
2021-06-19 11:48:46 | INFO | train_inner | epoch 002: 2482 / 3002 loss=2.755, ppl=6.75, wps=5873.7, ups=0.09, wpb=64809, bsz=128, num_updates=5449, lr=9.99644e-05, gnorm=2.352, loss_scale=8, train_wall=11, gb_free=2.8, wall=61800
2021-06-19 11:48:57 | INFO | train_inner | epoch 002: 2483 / 3002 loss=2.535, ppl=5.79, wps=5821.1, ups=0.09, wpb=64886, bsz=128, num_updates=5450, lr=9.99644e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=61811
2021-06-19 11:49:08 | INFO | train_inner | epoch 002: 2484 / 3002 loss=2.871, ppl=7.32, wps=5822.7, ups=0.09, wpb=64725, bsz=128, num_updates=5451, lr=9.99644e-05, gnorm=2.355, loss_scale=8, train_wall=11, gb_free=2.8, wall=61822
2021-06-19 11:49:19 | INFO | train_inner | epoch 002: 2485 / 3002 loss=2.787, ppl=6.9, wps=5801.1, ups=0.09, wpb=64890, bsz=128, num_updates=5452, lr=9.99644e-05, gnorm=2.414, loss_scale=8, train_wall=11, gb_free=2.8, wall=61833
2021-06-19 11:49:30 | INFO | train_inner | epoch 002: 2486 / 3002 loss=2.895, ppl=7.44, wps=5926.3, ups=0.09, wpb=64923, bsz=128, num_updates=5453, lr=9.99644e-05, gnorm=2.296, loss_scale=8, train_wall=11, gb_free=2.8, wall=61844
2021-06-19 11:49:41 | INFO | train_inner | epoch 002: 2487 / 3002 loss=2.741, ppl=6.68, wps=5914.3, ups=0.09, wpb=64862, bsz=128, num_updates=5454, lr=9.99644e-05, gnorm=2.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=61855
2021-06-19 11:49:52 | INFO | train_inner | epoch 002: 2488 / 3002 loss=2.802, ppl=6.98, wps=5919.7, ups=0.09, wpb=64880, bsz=128, num_updates=5455, lr=9.99644e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=61866
2021-06-19 11:50:03 | INFO | train_inner | epoch 002: 2489 / 3002 loss=2.767, ppl=6.81, wps=5850.5, ups=0.09, wpb=64860, bsz=128, num_updates=5456, lr=9.99643e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=61877
2021-06-19 11:50:14 | INFO | train_inner | epoch 002: 2490 / 3002 loss=2.686, ppl=6.44, wps=5796.7, ups=0.09, wpb=64764, bsz=128, num_updates=5457, lr=9.99643e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=61889
2021-06-19 11:50:25 | INFO | train_inner | epoch 002: 2491 / 3002 loss=2.652, ppl=6.29, wps=5916.9, ups=0.09, wpb=64745, bsz=128, num_updates=5458, lr=9.99643e-05, gnorm=2.349, loss_scale=8, train_wall=10, gb_free=2.8, wall=61900
2021-06-19 11:50:36 | INFO | train_inner | epoch 002: 2492 / 3002 loss=2.583, ppl=5.99, wps=5764.3, ups=0.09, wpb=64863, bsz=128, num_updates=5459, lr=9.99643e-05, gnorm=2.434, loss_scale=8, train_wall=11, gb_free=2.8, wall=61911
2021-06-19 11:50:48 | INFO | train_inner | epoch 002: 2493 / 3002 loss=2.701, ppl=6.5, wps=5843.6, ups=0.09, wpb=64863, bsz=128, num_updates=5460, lr=9.99643e-05, gnorm=2.372, loss_scale=8, train_wall=11, gb_free=2.8, wall=61922
2021-06-19 11:50:59 | INFO | train_inner | epoch 002: 2494 / 3002 loss=2.653, ppl=6.29, wps=5875.8, ups=0.09, wpb=64856, bsz=128, num_updates=5461, lr=9.99643e-05, gnorm=2.809, loss_scale=8, train_wall=11, gb_free=2.8, wall=61933
2021-06-19 11:51:10 | INFO | train_inner | epoch 002: 2495 / 3002 loss=2.809, ppl=7.01, wps=5845, ups=0.09, wpb=64771, bsz=128, num_updates=5462, lr=9.99643e-05, gnorm=2.287, loss_scale=8, train_wall=11, gb_free=2.8, wall=61944
2021-06-19 11:51:21 | INFO | train_inner | epoch 002: 2496 / 3002 loss=2.703, ppl=6.51, wps=5751.2, ups=0.09, wpb=64827, bsz=128, num_updates=5463, lr=9.99643e-05, gnorm=2.259, loss_scale=8, train_wall=11, gb_free=2.8, wall=61955
2021-06-19 11:51:32 | INFO | train_inner | epoch 002: 2497 / 3002 loss=2.867, ppl=7.29, wps=6069.3, ups=0.09, wpb=64863, bsz=128, num_updates=5464, lr=9.99643e-05, gnorm=2.343, loss_scale=8, train_wall=10, gb_free=2.8, wall=61966
2021-06-19 11:51:43 | INFO | train_inner | epoch 002: 2498 / 3002 loss=2.687, ppl=6.44, wps=5718.5, ups=0.09, wpb=64838, bsz=128, num_updates=5465, lr=9.99643e-05, gnorm=2.404, loss_scale=8, train_wall=11, gb_free=2.8, wall=61977
2021-06-19 11:51:54 | INFO | train_inner | epoch 002: 2499 / 3002 loss=2.644, ppl=6.25, wps=5720.2, ups=0.09, wpb=64886, bsz=128, num_updates=5466, lr=9.99643e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=61989
2021-06-19 11:52:05 | INFO | train_inner | epoch 002: 2500 / 3002 loss=2.61, ppl=6.11, wps=5832.3, ups=0.09, wpb=64823, bsz=128, num_updates=5467, lr=9.99643e-05, gnorm=2.444, loss_scale=8, train_wall=11, gb_free=2.8, wall=62000
2021-06-19 11:52:16 | INFO | train_inner | epoch 002: 2501 / 3002 loss=2.654, ppl=6.29, wps=5908.4, ups=0.09, wpb=64923, bsz=128, num_updates=5468, lr=9.99643e-05, gnorm=3.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=62011
2021-06-19 11:52:28 | INFO | train_inner | epoch 002: 2502 / 3002 loss=2.589, ppl=6.02, wps=5810.7, ups=0.09, wpb=64834, bsz=128, num_updates=5469, lr=9.99642e-05, gnorm=2.199, loss_scale=8, train_wall=11, gb_free=2.8, wall=62022
2021-06-19 11:52:39 | INFO | train_inner | epoch 002: 2503 / 3002 loss=2.767, ppl=6.81, wps=5949.8, ups=0.09, wpb=64890, bsz=128, num_updates=5470, lr=9.99642e-05, gnorm=2.44, loss_scale=8, train_wall=10, gb_free=2.8, wall=62033
2021-06-19 11:52:50 | INFO | train_inner | epoch 002: 2504 / 3002 loss=2.673, ppl=6.38, wps=5860.6, ups=0.09, wpb=64881, bsz=128, num_updates=5471, lr=9.99642e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=62044
2021-06-19 11:53:01 | INFO | train_inner | epoch 002: 2505 / 3002 loss=2.732, ppl=6.64, wps=5808.5, ups=0.09, wpb=64787, bsz=128, num_updates=5472, lr=9.99642e-05, gnorm=2.607, loss_scale=8, train_wall=11, gb_free=2.8, wall=62055
2021-06-19 11:53:12 | INFO | train_inner | epoch 002: 2506 / 3002 loss=2.675, ppl=6.39, wps=5915.2, ups=0.09, wpb=64867, bsz=128, num_updates=5473, lr=9.99642e-05, gnorm=2.24, loss_scale=8, train_wall=10, gb_free=2.8, wall=62066
2021-06-19 11:53:23 | INFO | train_inner | epoch 002: 2507 / 3002 loss=2.819, ppl=7.06, wps=5865.4, ups=0.09, wpb=64897, bsz=128, num_updates=5474, lr=9.99642e-05, gnorm=2.259, loss_scale=8, train_wall=11, gb_free=2.8, wall=62077
2021-06-19 11:53:34 | INFO | train_inner | epoch 002: 2508 / 3002 loss=2.737, ppl=6.67, wps=5798.5, ups=0.09, wpb=64903, bsz=128, num_updates=5475, lr=9.99642e-05, gnorm=2.216, loss_scale=8, train_wall=11, gb_free=2.8, wall=62088
2021-06-19 11:53:45 | INFO | train_inner | epoch 002: 2509 / 3002 loss=2.76, ppl=6.77, wps=5893.6, ups=0.09, wpb=64781, bsz=128, num_updates=5476, lr=9.99642e-05, gnorm=2.358, loss_scale=8, train_wall=11, gb_free=2.8, wall=62099
2021-06-19 11:53:56 | INFO | train_inner | epoch 002: 2510 / 3002 loss=2.776, ppl=6.85, wps=5825, ups=0.09, wpb=64843, bsz=128, num_updates=5477, lr=9.99642e-05, gnorm=2.431, loss_scale=8, train_wall=11, gb_free=2.8, wall=62110
2021-06-19 11:54:07 | INFO | train_inner | epoch 002: 2511 / 3002 loss=2.724, ppl=6.61, wps=5869, ups=0.09, wpb=64851, bsz=128, num_updates=5478, lr=9.99642e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=62121
2021-06-19 11:54:18 | INFO | train_inner | epoch 002: 2512 / 3002 loss=2.816, ppl=7.04, wps=5811, ups=0.09, wpb=64877, bsz=128, num_updates=5479, lr=9.99642e-05, gnorm=2.479, loss_scale=8, train_wall=11, gb_free=2.8, wall=62133
2021-06-19 11:54:29 | INFO | train_inner | epoch 002: 2513 / 3002 loss=2.682, ppl=6.42, wps=5976.8, ups=0.09, wpb=64776, bsz=128, num_updates=5480, lr=9.99642e-05, gnorm=2.261, loss_scale=8, train_wall=10, gb_free=2.8, wall=62143
2021-06-19 11:54:40 | INFO | train_inner | epoch 002: 2514 / 3002 loss=2.618, ppl=6.14, wps=5862.4, ups=0.09, wpb=64920, bsz=128, num_updates=5481, lr=9.99641e-05, gnorm=2.11, loss_scale=8, train_wall=11, gb_free=2.8, wall=62155
2021-06-19 11:54:51 | INFO | train_inner | epoch 002: 2515 / 3002 loss=2.77, ppl=6.82, wps=5876.8, ups=0.09, wpb=64842, bsz=128, num_updates=5482, lr=9.99641e-05, gnorm=2.204, loss_scale=8, train_wall=11, gb_free=2.8, wall=62166
2021-06-19 11:55:02 | INFO | train_inner | epoch 002: 2516 / 3002 loss=2.667, ppl=6.35, wps=5853.2, ups=0.09, wpb=64852, bsz=128, num_updates=5483, lr=9.99641e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=62177
2021-06-19 11:55:13 | INFO | train_inner | epoch 002: 2517 / 3002 loss=2.752, ppl=6.74, wps=5856.9, ups=0.09, wpb=64828, bsz=128, num_updates=5484, lr=9.99641e-05, gnorm=2.252, loss_scale=8, train_wall=11, gb_free=2.8, wall=62188
2021-06-19 11:55:24 | INFO | train_inner | epoch 002: 2518 / 3002 loss=2.541, ppl=5.82, wps=5890.9, ups=0.09, wpb=64815, bsz=128, num_updates=5485, lr=9.99641e-05, gnorm=2.323, loss_scale=8, train_wall=11, gb_free=2.8, wall=62199
2021-06-19 11:55:36 | INFO | train_inner | epoch 002: 2519 / 3002 loss=2.75, ppl=6.73, wps=5776, ups=0.09, wpb=64886, bsz=128, num_updates=5486, lr=9.99641e-05, gnorm=2.374, loss_scale=8, train_wall=11, gb_free=2.8, wall=62210
2021-06-19 11:55:46 | INFO | train_inner | epoch 002: 2520 / 3002 loss=2.749, ppl=6.72, wps=5989.3, ups=0.09, wpb=64800, bsz=128, num_updates=5487, lr=9.99641e-05, gnorm=2.204, loss_scale=8, train_wall=10, gb_free=2.8, wall=62221
2021-06-19 11:55:58 | INFO | train_inner | epoch 002: 2521 / 3002 loss=2.738, ppl=6.67, wps=5832.6, ups=0.09, wpb=64778, bsz=128, num_updates=5488, lr=9.99641e-05, gnorm=2.216, loss_scale=8, train_wall=11, gb_free=2.8, wall=62232
2021-06-19 11:56:09 | INFO | train_inner | epoch 002: 2522 / 3002 loss=2.702, ppl=6.51, wps=5838.4, ups=0.09, wpb=64724, bsz=128, num_updates=5489, lr=9.99641e-05, gnorm=3.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=62243
2021-06-19 11:56:20 | INFO | train_inner | epoch 002: 2523 / 3002 loss=2.774, ppl=6.84, wps=5767.2, ups=0.09, wpb=64837, bsz=128, num_updates=5490, lr=9.99641e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=62254
2021-06-19 11:56:31 | INFO | train_inner | epoch 002: 2524 / 3002 loss=2.69, ppl=6.45, wps=5908, ups=0.09, wpb=64772, bsz=128, num_updates=5491, lr=9.99641e-05, gnorm=2.312, loss_scale=8, train_wall=10, gb_free=2.8, wall=62265
2021-06-19 11:56:42 | INFO | train_inner | epoch 002: 2525 / 3002 loss=2.833, ppl=7.13, wps=5812.6, ups=0.09, wpb=64879, bsz=128, num_updates=5492, lr=9.99641e-05, gnorm=2.25, loss_scale=8, train_wall=11, gb_free=2.8, wall=62276
2021-06-19 11:56:53 | INFO | train_inner | epoch 002: 2526 / 3002 loss=2.865, ppl=7.29, wps=5742.5, ups=0.09, wpb=64805, bsz=128, num_updates=5493, lr=9.99641e-05, gnorm=2.221, loss_scale=16, train_wall=11, gb_free=2.8, wall=62288
2021-06-19 11:57:05 | INFO | train_inner | epoch 002: 2527 / 3002 loss=2.735, ppl=6.66, wps=5795.9, ups=0.09, wpb=64898, bsz=128, num_updates=5494, lr=9.9964e-05, gnorm=2.175, loss_scale=16, train_wall=11, gb_free=2.8, wall=62299
2021-06-19 11:57:16 | INFO | train_inner | epoch 002: 2528 / 3002 loss=2.794, ppl=6.94, wps=5888.9, ups=0.09, wpb=64822, bsz=128, num_updates=5495, lr=9.9964e-05, gnorm=2.206, loss_scale=16, train_wall=11, gb_free=2.8, wall=62310
2021-06-19 11:57:27 | INFO | train_inner | epoch 002: 2529 / 3002 loss=2.704, ppl=6.51, wps=5719.4, ups=0.09, wpb=64797, bsz=128, num_updates=5496, lr=9.9964e-05, gnorm=2.224, loss_scale=16, train_wall=11, gb_free=2.8, wall=62321
2021-06-19 11:57:38 | INFO | train_inner | epoch 002: 2530 / 3002 loss=2.596, ppl=6.05, wps=5912.5, ups=0.09, wpb=64821, bsz=128, num_updates=5497, lr=9.9964e-05, gnorm=2.215, loss_scale=16, train_wall=11, gb_free=2.8, wall=62332
2021-06-19 11:57:49 | INFO | train_inner | epoch 002: 2531 / 3002 loss=2.615, ppl=6.13, wps=5786.7, ups=0.09, wpb=64829, bsz=128, num_updates=5498, lr=9.9964e-05, gnorm=2.865, loss_scale=16, train_wall=11, gb_free=2.8, wall=62343
2021-06-19 11:58:00 | INFO | train_inner | epoch 002: 2532 / 3002 loss=2.712, ppl=6.55, wps=5845.7, ups=0.09, wpb=64776, bsz=128, num_updates=5499, lr=9.9964e-05, gnorm=2.339, loss_scale=16, train_wall=11, gb_free=2.8, wall=62354
2021-06-19 11:58:11 | INFO | train_inner | epoch 002: 2533 / 3002 loss=2.699, ppl=6.49, wps=5800.5, ups=0.09, wpb=64871, bsz=128, num_updates=5500, lr=9.9964e-05, gnorm=2.8, loss_scale=16, train_wall=11, gb_free=2.8, wall=62366
2021-06-19 11:58:22 | INFO | train_inner | epoch 002: 2534 / 3002 loss=2.74, ppl=6.68, wps=5907.7, ups=0.09, wpb=64903, bsz=128, num_updates=5501, lr=9.9964e-05, gnorm=2.25, loss_scale=16, train_wall=11, gb_free=2.8, wall=62377
2021-06-19 11:58:33 | INFO | train_inner | epoch 002: 2535 / 3002 loss=2.701, ppl=6.5, wps=5789.6, ups=0.09, wpb=64861, bsz=128, num_updates=5502, lr=9.9964e-05, gnorm=2.834, loss_scale=16, train_wall=11, gb_free=2.8, wall=62388
2021-06-19 11:58:45 | INFO | train_inner | epoch 002: 2536 / 3002 loss=2.575, ppl=5.96, wps=5830.6, ups=0.09, wpb=64798, bsz=128, num_updates=5503, lr=9.9964e-05, gnorm=2.254, loss_scale=16, train_wall=11, gb_free=2.8, wall=62399
2021-06-19 11:58:55 | INFO | train_inner | epoch 002: 2537 / 3002 loss=2.676, ppl=6.39, wps=5939.5, ups=0.09, wpb=64841, bsz=128, num_updates=5504, lr=9.9964e-05, gnorm=2.306, loss_scale=16, train_wall=10, gb_free=2.8, wall=62410
2021-06-19 11:59:07 | INFO | train_inner | epoch 002: 2538 / 3002 loss=2.66, ppl=6.32, wps=5788.2, ups=0.09, wpb=64773, bsz=128, num_updates=5505, lr=9.9964e-05, gnorm=2.28, loss_scale=16, train_wall=11, gb_free=2.8, wall=62421
2021-06-19 11:59:18 | INFO | train_inner | epoch 002: 2539 / 3002 loss=2.685, ppl=6.43, wps=5783.8, ups=0.09, wpb=64793, bsz=128, num_updates=5506, lr=9.99639e-05, gnorm=2.212, loss_scale=16, train_wall=11, gb_free=2.8, wall=62432
2021-06-19 11:59:29 | INFO | train_inner | epoch 002: 2540 / 3002 loss=2.877, ppl=7.34, wps=5815.9, ups=0.09, wpb=64776, bsz=128, num_updates=5507, lr=9.99639e-05, gnorm=2.258, loss_scale=16, train_wall=11, gb_free=2.8, wall=62443
2021-06-19 11:59:40 | INFO | train_inner | epoch 002: 2541 / 3002 loss=2.677, ppl=6.39, wps=5919.6, ups=0.09, wpb=64953, bsz=128, num_updates=5508, lr=9.99639e-05, gnorm=2.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=62454
2021-06-19 11:59:51 | INFO | train_inner | epoch 002: 2542 / 3002 loss=2.769, ppl=6.82, wps=5838.4, ups=0.09, wpb=64816, bsz=128, num_updates=5509, lr=9.99639e-05, gnorm=2.216, loss_scale=16, train_wall=11, gb_free=2.8, wall=62465
2021-06-19 12:00:02 | INFO | train_inner | epoch 002: 2543 / 3002 loss=2.738, ppl=6.67, wps=5910.8, ups=0.09, wpb=64843, bsz=128, num_updates=5510, lr=9.99639e-05, gnorm=2.32, loss_scale=16, train_wall=11, gb_free=2.8, wall=62476
2021-06-19 12:00:13 | INFO | train_inner | epoch 002: 2544 / 3002 loss=2.697, ppl=6.49, wps=6018.5, ups=0.09, wpb=64794, bsz=128, num_updates=5511, lr=9.99639e-05, gnorm=2.211, loss_scale=16, train_wall=10, gb_free=2.8, wall=62487
2021-06-19 12:00:24 | INFO | train_inner | epoch 002: 2545 / 3002 loss=2.651, ppl=6.28, wps=5808.9, ups=0.09, wpb=64861, bsz=128, num_updates=5512, lr=9.99639e-05, gnorm=2.319, loss_scale=16, train_wall=11, gb_free=2.8, wall=62498
2021-06-19 12:00:35 | INFO | train_inner | epoch 002: 2546 / 3002 loss=2.742, ppl=6.69, wps=5950.6, ups=0.09, wpb=64842, bsz=128, num_updates=5513, lr=9.99639e-05, gnorm=2.222, loss_scale=16, train_wall=10, gb_free=2.8, wall=62509
2021-06-19 12:00:46 | INFO | train_inner | epoch 002: 2547 / 3002 loss=2.758, ppl=6.77, wps=5757, ups=0.09, wpb=64749, bsz=128, num_updates=5514, lr=9.99639e-05, gnorm=2.231, loss_scale=16, train_wall=11, gb_free=2.8, wall=62520
2021-06-19 12:00:57 | INFO | train_inner | epoch 002: 2548 / 3002 loss=2.764, ppl=6.79, wps=5920.8, ups=0.09, wpb=64900, bsz=128, num_updates=5515, lr=9.99639e-05, gnorm=2.16, loss_scale=16, train_wall=10, gb_free=2.8, wall=62531
2021-06-19 12:01:08 | INFO | train_inner | epoch 002: 2549 / 3002 loss=2.829, ppl=7.1, wps=5752.7, ups=0.09, wpb=64841, bsz=128, num_updates=5516, lr=9.99639e-05, gnorm=2.151, loss_scale=16, train_wall=11, gb_free=2.8, wall=62543
2021-06-19 12:01:19 | INFO | train_inner | epoch 002: 2550 / 3002 loss=2.787, ppl=6.9, wps=5835.3, ups=0.09, wpb=64805, bsz=128, num_updates=5517, lr=9.99639e-05, gnorm=2.204, loss_scale=16, train_wall=11, gb_free=2.8, wall=62554
2021-06-19 12:01:30 | INFO | train_inner | epoch 002: 2551 / 3002 loss=2.672, ppl=6.37, wps=5921.9, ups=0.09, wpb=64871, bsz=128, num_updates=5518, lr=9.99639e-05, gnorm=2.079, loss_scale=16, train_wall=10, gb_free=2.8, wall=62565
2021-06-19 12:01:42 | INFO | train_inner | epoch 002: 2552 / 3002 loss=2.553, ppl=5.87, wps=5708.1, ups=0.09, wpb=64804, bsz=128, num_updates=5519, lr=9.99638e-05, gnorm=2.119, loss_scale=16, train_wall=11, gb_free=2.8, wall=62576
2021-06-19 12:01:53 | INFO | train_inner | epoch 002: 2553 / 3002 loss=2.83, ppl=7.11, wps=5761.3, ups=0.09, wpb=64807, bsz=128, num_updates=5520, lr=9.99638e-05, gnorm=2.165, loss_scale=16, train_wall=11, gb_free=2.8, wall=62587
2021-06-19 12:02:04 | INFO | train_inner | epoch 002: 2554 / 3002 loss=2.836, ppl=7.14, wps=5872.6, ups=0.09, wpb=64838, bsz=128, num_updates=5521, lr=9.99638e-05, gnorm=2.256, loss_scale=16, train_wall=11, gb_free=2.8, wall=62598
2021-06-19 12:02:15 | INFO | train_inner | epoch 002: 2555 / 3002 loss=2.547, ppl=5.84, wps=5903.8, ups=0.09, wpb=64743, bsz=128, num_updates=5522, lr=9.99638e-05, gnorm=2.247, loss_scale=16, train_wall=10, gb_free=2.8, wall=62609
2021-06-19 12:02:26 | INFO | train_inner | epoch 002: 2556 / 3002 loss=2.732, ppl=6.64, wps=5789.6, ups=0.09, wpb=64861, bsz=128, num_updates=5523, lr=9.99638e-05, gnorm=2.217, loss_scale=16, train_wall=11, gb_free=2.8, wall=62621
2021-06-19 12:02:37 | INFO | train_inner | epoch 002: 2557 / 3002 loss=2.737, ppl=6.67, wps=5839.2, ups=0.09, wpb=64839, bsz=128, num_updates=5524, lr=9.99638e-05, gnorm=2.291, loss_scale=16, train_wall=11, gb_free=2.8, wall=62632
2021-06-19 12:02:49 | INFO | train_inner | epoch 002: 2558 / 3002 loss=2.851, ppl=7.21, wps=5795.2, ups=0.09, wpb=64838, bsz=128, num_updates=5525, lr=9.99638e-05, gnorm=2.18, loss_scale=16, train_wall=11, gb_free=2.8, wall=62643
2021-06-19 12:03:00 | INFO | train_inner | epoch 002: 2559 / 3002 loss=2.778, ppl=6.86, wps=5779.7, ups=0.09, wpb=64796, bsz=128, num_updates=5526, lr=9.99638e-05, gnorm=2.219, loss_scale=16, train_wall=11, gb_free=2.8, wall=62654
2021-06-19 12:03:11 | INFO | train_inner | epoch 002: 2560 / 3002 loss=2.803, ppl=6.98, wps=5821.4, ups=0.09, wpb=64728, bsz=128, num_updates=5527, lr=9.99638e-05, gnorm=2.133, loss_scale=16, train_wall=11, gb_free=2.8, wall=62665
2021-06-19 12:03:22 | INFO | train_inner | epoch 002: 2561 / 3002 loss=2.588, ppl=6.01, wps=5795.4, ups=0.09, wpb=64817, bsz=128, num_updates=5528, lr=9.99638e-05, gnorm=2.081, loss_scale=16, train_wall=11, gb_free=2.8, wall=62676
2021-06-19 12:03:33 | INFO | train_inner | epoch 002: 2562 / 3002 loss=2.605, ppl=6.08, wps=5778.4, ups=0.09, wpb=64850, bsz=128, num_updates=5529, lr=9.99638e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=62688
2021-06-19 12:03:45 | INFO | train_inner | epoch 002: 2563 / 3002 loss=2.821, ppl=7.07, wps=5739.5, ups=0.09, wpb=64856, bsz=128, num_updates=5530, lr=9.99638e-05, gnorm=2.159, loss_scale=16, train_wall=11, gb_free=2.8, wall=62699
2021-06-19 12:03:56 | INFO | train_inner | epoch 002: 2564 / 3002 loss=2.664, ppl=6.34, wps=5916.2, ups=0.09, wpb=64896, bsz=128, num_updates=5531, lr=9.99637e-05, gnorm=2.189, loss_scale=16, train_wall=10, gb_free=2.8, wall=62710
2021-06-19 12:04:07 | INFO | train_inner | epoch 002: 2565 / 3002 loss=2.646, ppl=6.26, wps=5900.1, ups=0.09, wpb=64856, bsz=128, num_updates=5532, lr=9.99637e-05, gnorm=2.154, loss_scale=16, train_wall=11, gb_free=2.8, wall=62721
2021-06-19 12:04:18 | INFO | train_inner | epoch 002: 2566 / 3002 loss=2.644, ppl=6.25, wps=5833.6, ups=0.09, wpb=64818, bsz=128, num_updates=5533, lr=9.99637e-05, gnorm=2.243, loss_scale=16, train_wall=11, gb_free=2.8, wall=62732
2021-06-19 12:04:29 | INFO | train_inner | epoch 002: 2567 / 3002 loss=2.751, ppl=6.73, wps=5799.2, ups=0.09, wpb=64860, bsz=128, num_updates=5534, lr=9.99637e-05, gnorm=2.259, loss_scale=16, train_wall=11, gb_free=2.8, wall=62743
2021-06-19 12:04:40 | INFO | train_inner | epoch 002: 2568 / 3002 loss=2.66, ppl=6.32, wps=5862.9, ups=0.09, wpb=64733, bsz=128, num_updates=5535, lr=9.99637e-05, gnorm=2.225, loss_scale=16, train_wall=11, gb_free=2.8, wall=62754
2021-06-19 12:04:51 | INFO | train_inner | epoch 002: 2569 / 3002 loss=2.864, ppl=7.28, wps=5806.5, ups=0.09, wpb=64896, bsz=128, num_updates=5536, lr=9.99637e-05, gnorm=2.252, loss_scale=16, train_wall=11, gb_free=2.8, wall=62765
2021-06-19 12:05:02 | INFO | train_inner | epoch 002: 2570 / 3002 loss=2.687, ppl=6.44, wps=5885.2, ups=0.09, wpb=64900, bsz=128, num_updates=5537, lr=9.99637e-05, gnorm=2.201, loss_scale=16, train_wall=11, gb_free=2.8, wall=62776
2021-06-19 12:05:13 | INFO | train_inner | epoch 002: 2571 / 3002 loss=2.667, ppl=6.35, wps=5729.4, ups=0.09, wpb=64797, bsz=128, num_updates=5538, lr=9.99637e-05, gnorm=2.175, loss_scale=16, train_wall=11, gb_free=2.8, wall=62788
2021-06-19 12:05:25 | INFO | train_inner | epoch 002: 2572 / 3002 loss=2.708, ppl=6.53, wps=5818.2, ups=0.09, wpb=64852, bsz=128, num_updates=5539, lr=9.99637e-05, gnorm=2.193, loss_scale=16, train_wall=11, gb_free=2.8, wall=62799
2021-06-19 12:05:36 | INFO | train_inner | epoch 002: 2573 / 3002 loss=2.744, ppl=6.7, wps=5846.7, ups=0.09, wpb=64793, bsz=128, num_updates=5540, lr=9.99637e-05, gnorm=2.17, loss_scale=16, train_wall=11, gb_free=2.8, wall=62810
2021-06-19 12:05:47 | INFO | train_inner | epoch 002: 2574 / 3002 loss=2.682, ppl=6.42, wps=5873.7, ups=0.09, wpb=64832, bsz=128, num_updates=5541, lr=9.99637e-05, gnorm=2.191, loss_scale=16, train_wall=11, gb_free=2.8, wall=62821
2021-06-19 12:05:58 | INFO | train_inner | epoch 002: 2575 / 3002 loss=2.509, ppl=5.69, wps=5778.2, ups=0.09, wpb=64900, bsz=128, num_updates=5542, lr=9.99637e-05, gnorm=2.136, loss_scale=16, train_wall=11, gb_free=2.8, wall=62832
2021-06-19 12:06:09 | INFO | train_inner | epoch 002: 2576 / 3002 loss=2.693, ppl=6.47, wps=5788.3, ups=0.09, wpb=64791, bsz=128, num_updates=5543, lr=9.99637e-05, gnorm=2.181, loss_scale=16, train_wall=11, gb_free=2.8, wall=62843
2021-06-19 12:06:20 | INFO | train_inner | epoch 002: 2577 / 3002 loss=2.658, ppl=6.31, wps=5918.8, ups=0.09, wpb=64883, bsz=128, num_updates=5544, lr=9.99636e-05, gnorm=2.284, loss_scale=16, train_wall=11, gb_free=2.8, wall=62854
2021-06-19 12:06:31 | INFO | train_inner | epoch 002: 2578 / 3002 loss=2.592, ppl=6.03, wps=5878.1, ups=0.09, wpb=64730, bsz=128, num_updates=5545, lr=9.99636e-05, gnorm=2.088, loss_scale=16, train_wall=11, gb_free=2.8, wall=62865
2021-06-19 12:06:42 | INFO | train_inner | epoch 002: 2579 / 3002 loss=2.577, ppl=5.97, wps=5894.8, ups=0.09, wpb=64822, bsz=128, num_updates=5546, lr=9.99636e-05, gnorm=2.136, loss_scale=16, train_wall=11, gb_free=2.8, wall=62876
2021-06-19 12:06:53 | INFO | train_inner | epoch 002: 2580 / 3002 loss=2.839, ppl=7.15, wps=5867.3, ups=0.09, wpb=64806, bsz=128, num_updates=5547, lr=9.99636e-05, gnorm=2.554, loss_scale=16, train_wall=11, gb_free=2.8, wall=62887
2021-06-19 12:07:04 | INFO | train_inner | epoch 002: 2581 / 3002 loss=2.542, ppl=5.83, wps=5974.3, ups=0.09, wpb=64912, bsz=128, num_updates=5548, lr=9.99636e-05, gnorm=2.223, loss_scale=16, train_wall=10, gb_free=2.8, wall=62898
2021-06-19 12:07:15 | INFO | train_inner | epoch 002: 2582 / 3002 loss=2.676, ppl=6.39, wps=5904.7, ups=0.09, wpb=64771, bsz=128, num_updates=5549, lr=9.99636e-05, gnorm=2.161, loss_scale=16, train_wall=11, gb_free=2.8, wall=62909
2021-06-19 12:07:26 | INFO | train_inner | epoch 002: 2583 / 3002 loss=2.702, ppl=6.51, wps=5842.1, ups=0.09, wpb=64767, bsz=128, num_updates=5550, lr=9.99636e-05, gnorm=2.15, loss_scale=16, train_wall=11, gb_free=2.8, wall=62920
2021-06-19 12:07:37 | INFO | train_inner | epoch 002: 2584 / 3002 loss=2.779, ppl=6.86, wps=5746.9, ups=0.09, wpb=64864, bsz=128, num_updates=5551, lr=9.99636e-05, gnorm=2.063, loss_scale=16, train_wall=11, gb_free=2.8, wall=62932
2021-06-19 12:07:48 | INFO | train_inner | epoch 002: 2585 / 3002 loss=2.813, ppl=7.03, wps=5974.5, ups=0.09, wpb=64735, bsz=128, num_updates=5552, lr=9.99636e-05, gnorm=2.189, loss_scale=16, train_wall=10, gb_free=2.8, wall=62942
2021-06-19 12:07:59 | INFO | train_inner | epoch 002: 2586 / 3002 loss=2.713, ppl=6.56, wps=5811, ups=0.09, wpb=64917, bsz=128, num_updates=5553, lr=9.99636e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=62954
2021-06-19 12:08:10 | INFO | train_inner | epoch 002: 2587 / 3002 loss=2.801, ppl=6.97, wps=5810, ups=0.09, wpb=64806, bsz=128, num_updates=5554, lr=9.99636e-05, gnorm=2.196, loss_scale=16, train_wall=11, gb_free=2.8, wall=62965
2021-06-19 12:08:22 | INFO | train_inner | epoch 002: 2588 / 3002 loss=2.646, ppl=6.26, wps=5842.8, ups=0.09, wpb=64806, bsz=128, num_updates=5555, lr=9.99636e-05, gnorm=2.213, loss_scale=16, train_wall=11, gb_free=2.8, wall=62976
2021-06-19 12:08:33 | INFO | train_inner | epoch 002: 2589 / 3002 loss=2.764, ppl=6.8, wps=5872.8, ups=0.09, wpb=64776, bsz=128, num_updates=5556, lr=9.99635e-05, gnorm=2.268, loss_scale=16, train_wall=11, gb_free=2.8, wall=62987
2021-06-19 12:08:44 | INFO | train_inner | epoch 002: 2590 / 3002 loss=2.628, ppl=6.18, wps=5733.4, ups=0.09, wpb=64767, bsz=128, num_updates=5557, lr=9.99635e-05, gnorm=2.41, loss_scale=16, train_wall=11, gb_free=2.8, wall=62998
2021-06-19 12:08:55 | INFO | train_inner | epoch 002: 2591 / 3002 loss=2.736, ppl=6.66, wps=5921, ups=0.09, wpb=64894, bsz=128, num_updates=5558, lr=9.99635e-05, gnorm=2.2, loss_scale=16, train_wall=11, gb_free=2.8, wall=63009
2021-06-19 12:09:06 | INFO | train_inner | epoch 002: 2592 / 3002 loss=2.705, ppl=6.52, wps=5866.4, ups=0.09, wpb=64846, bsz=128, num_updates=5559, lr=9.99635e-05, gnorm=2.198, loss_scale=16, train_wall=11, gb_free=2.8, wall=63020
2021-06-19 12:09:17 | INFO | train_inner | epoch 002: 2593 / 3002 loss=2.66, ppl=6.32, wps=5830.8, ups=0.09, wpb=64858, bsz=128, num_updates=5560, lr=9.99635e-05, gnorm=4.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=63031
2021-06-19 12:09:28 | INFO | train_inner | epoch 002: 2594 / 3002 loss=2.659, ppl=6.31, wps=5831.6, ups=0.09, wpb=64800, bsz=128, num_updates=5561, lr=9.99635e-05, gnorm=3.656, loss_scale=16, train_wall=11, gb_free=2.8, wall=63042
2021-06-19 12:09:39 | INFO | train_inner | epoch 002: 2595 / 3002 loss=2.743, ppl=6.69, wps=5877.8, ups=0.09, wpb=64788, bsz=128, num_updates=5562, lr=9.99635e-05, gnorm=2.131, loss_scale=16, train_wall=11, gb_free=2.8, wall=63054
2021-06-19 12:09:50 | INFO | train_inner | epoch 002: 2596 / 3002 loss=2.771, ppl=6.83, wps=5837.3, ups=0.09, wpb=64842, bsz=128, num_updates=5563, lr=9.99635e-05, gnorm=2.271, loss_scale=16, train_wall=11, gb_free=2.8, wall=63065
2021-06-19 12:10:01 | INFO | train_inner | epoch 002: 2597 / 3002 loss=2.836, ppl=7.14, wps=5794.1, ups=0.09, wpb=64854, bsz=128, num_updates=5564, lr=9.99635e-05, gnorm=2.226, loss_scale=16, train_wall=11, gb_free=2.8, wall=63076
2021-06-19 12:10:13 | INFO | train_inner | epoch 002: 2598 / 3002 loss=2.634, ppl=6.21, wps=5805.5, ups=0.09, wpb=64880, bsz=128, num_updates=5565, lr=9.99635e-05, gnorm=2.264, loss_scale=16, train_wall=11, gb_free=2.8, wall=63087
2021-06-19 12:10:24 | INFO | train_inner | epoch 002: 2599 / 3002 loss=2.829, ppl=7.11, wps=5903.2, ups=0.09, wpb=64745, bsz=128, num_updates=5566, lr=9.99635e-05, gnorm=2.24, loss_scale=16, train_wall=11, gb_free=2.8, wall=63098
2021-06-19 12:10:35 | INFO | train_inner | epoch 002: 2600 / 3002 loss=2.759, ppl=6.77, wps=5846.5, ups=0.09, wpb=64800, bsz=128, num_updates=5567, lr=9.99635e-05, gnorm=2.308, loss_scale=16, train_wall=11, gb_free=2.8, wall=63109
2021-06-19 12:10:46 | INFO | train_inner | epoch 002: 2601 / 3002 loss=2.905, ppl=7.49, wps=5734.8, ups=0.09, wpb=64799, bsz=128, num_updates=5568, lr=9.99635e-05, gnorm=2.377, loss_scale=16, train_wall=11, gb_free=2.8, wall=63120
2021-06-19 12:10:57 | INFO | train_inner | epoch 002: 2602 / 3002 loss=2.788, ppl=6.91, wps=5951.1, ups=0.09, wpb=64787, bsz=128, num_updates=5569, lr=9.99634e-05, gnorm=2.496, loss_scale=16, train_wall=10, gb_free=2.8, wall=63131
2021-06-19 12:11:08 | INFO | train_inner | epoch 002: 2603 / 3002 loss=2.783, ppl=6.88, wps=5884.3, ups=0.09, wpb=64819, bsz=128, num_updates=5570, lr=9.99634e-05, gnorm=2.156, loss_scale=16, train_wall=11, gb_free=2.8, wall=63142
2021-06-19 12:11:19 | INFO | train_inner | epoch 002: 2604 / 3002 loss=2.698, ppl=6.49, wps=5909.9, ups=0.09, wpb=64863, bsz=128, num_updates=5571, lr=9.99634e-05, gnorm=2.336, loss_scale=16, train_wall=11, gb_free=2.8, wall=63153
2021-06-19 12:11:30 | INFO | train_inner | epoch 002: 2605 / 3002 loss=2.75, ppl=6.73, wps=5931.6, ups=0.09, wpb=64847, bsz=128, num_updates=5572, lr=9.99634e-05, gnorm=2.217, loss_scale=16, train_wall=10, gb_free=2.8, wall=63164
2021-06-19 12:11:41 | INFO | train_inner | epoch 002: 2606 / 3002 loss=2.755, ppl=6.75, wps=5869.5, ups=0.09, wpb=64819, bsz=128, num_updates=5573, lr=9.99634e-05, gnorm=2.268, loss_scale=16, train_wall=11, gb_free=2.8, wall=63175
2021-06-19 12:11:52 | INFO | train_inner | epoch 002: 2607 / 3002 loss=2.768, ppl=6.81, wps=5957.1, ups=0.09, wpb=64836, bsz=128, num_updates=5574, lr=9.99634e-05, gnorm=2.177, loss_scale=16, train_wall=10, gb_free=2.8, wall=63186
2021-06-19 12:12:03 | INFO | train_inner | epoch 002: 2608 / 3002 loss=2.771, ppl=6.83, wps=6002.8, ups=0.09, wpb=64892, bsz=128, num_updates=5575, lr=9.99634e-05, gnorm=2.258, loss_scale=16, train_wall=10, gb_free=2.8, wall=63197
2021-06-19 12:12:14 | INFO | train_inner | epoch 002: 2609 / 3002 loss=2.691, ppl=6.46, wps=5878.3, ups=0.09, wpb=64866, bsz=128, num_updates=5576, lr=9.99634e-05, gnorm=2.364, loss_scale=16, train_wall=11, gb_free=2.8, wall=63208
2021-06-19 12:12:25 | INFO | train_inner | epoch 002: 2610 / 3002 loss=2.602, ppl=6.07, wps=5862, ups=0.09, wpb=64811, bsz=128, num_updates=5577, lr=9.99634e-05, gnorm=2.201, loss_scale=16, train_wall=11, gb_free=2.8, wall=63219
2021-06-19 12:12:36 | INFO | train_inner | epoch 002: 2611 / 3002 loss=2.626, ppl=6.17, wps=5871.3, ups=0.09, wpb=64813, bsz=128, num_updates=5578, lr=9.99634e-05, gnorm=2.219, loss_scale=16, train_wall=11, gb_free=2.8, wall=63230
2021-06-19 12:12:47 | INFO | train_inner | epoch 002: 2612 / 3002 loss=2.596, ppl=6.05, wps=5813.9, ups=0.09, wpb=64835, bsz=128, num_updates=5579, lr=9.99634e-05, gnorm=2.261, loss_scale=16, train_wall=11, gb_free=2.8, wall=63241
2021-06-19 12:12:58 | INFO | train_inner | epoch 002: 2613 / 3002 loss=2.749, ppl=6.72, wps=5876.6, ups=0.09, wpb=64731, bsz=128, num_updates=5580, lr=9.99634e-05, gnorm=2.25, loss_scale=16, train_wall=11, gb_free=2.8, wall=63252
2021-06-19 12:13:09 | INFO | train_inner | epoch 002: 2614 / 3002 loss=2.738, ppl=6.67, wps=5892.8, ups=0.09, wpb=64810, bsz=128, num_updates=5581, lr=9.99633e-05, gnorm=2.155, loss_scale=16, train_wall=11, gb_free=2.8, wall=63263
2021-06-19 12:13:20 | INFO | train_inner | epoch 002: 2615 / 3002 loss=2.695, ppl=6.47, wps=5830.1, ups=0.09, wpb=64799, bsz=128, num_updates=5582, lr=9.99633e-05, gnorm=2.196, loss_scale=16, train_wall=11, gb_free=2.8, wall=63274
2021-06-19 12:13:31 | INFO | train_inner | epoch 002: 2616 / 3002 loss=2.872, ppl=7.32, wps=5940.7, ups=0.09, wpb=64767, bsz=128, num_updates=5583, lr=9.99633e-05, gnorm=2.306, loss_scale=16, train_wall=10, gb_free=2.8, wall=63285
2021-06-19 12:13:42 | INFO | train_inner | epoch 002: 2617 / 3002 loss=2.592, ppl=6.03, wps=5867.3, ups=0.09, wpb=64905, bsz=128, num_updates=5584, lr=9.99633e-05, gnorm=2.163, loss_scale=16, train_wall=11, gb_free=2.8, wall=63296
2021-06-19 12:13:53 | INFO | train_inner | epoch 002: 2618 / 3002 loss=2.631, ppl=6.19, wps=5812.6, ups=0.09, wpb=64819, bsz=128, num_updates=5585, lr=9.99633e-05, gnorm=2.15, loss_scale=16, train_wall=11, gb_free=2.8, wall=63307
2021-06-19 12:14:04 | INFO | train_inner | epoch 002: 2619 / 3002 loss=2.746, ppl=6.71, wps=5759.7, ups=0.09, wpb=64808, bsz=128, num_updates=5586, lr=9.99633e-05, gnorm=2.234, loss_scale=16, train_wall=11, gb_free=2.8, wall=63319
2021-06-19 12:14:15 | INFO | train_inner | epoch 002: 2620 / 3002 loss=2.75, ppl=6.73, wps=5897.5, ups=0.09, wpb=64834, bsz=128, num_updates=5587, lr=9.99633e-05, gnorm=2.403, loss_scale=16, train_wall=11, gb_free=2.8, wall=63330
2021-06-19 12:14:26 | INFO | train_inner | epoch 002: 2621 / 3002 loss=2.626, ppl=6.17, wps=5861.1, ups=0.09, wpb=64768, bsz=128, num_updates=5588, lr=9.99633e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=63341
2021-06-19 12:14:37 | INFO | train_inner | epoch 002: 2622 / 3002 loss=2.623, ppl=6.16, wps=5942.8, ups=0.09, wpb=64864, bsz=128, num_updates=5589, lr=9.99633e-05, gnorm=2.194, loss_scale=16, train_wall=10, gb_free=2.8, wall=63352
2021-06-19 12:14:48 | INFO | train_inner | epoch 002: 2623 / 3002 loss=2.817, ppl=7.05, wps=5983.5, ups=0.09, wpb=64735, bsz=128, num_updates=5590, lr=9.99633e-05, gnorm=2.255, loss_scale=16, train_wall=10, gb_free=2.8, wall=63362
2021-06-19 12:14:59 | INFO | train_inner | epoch 002: 2624 / 3002 loss=2.548, ppl=5.85, wps=5756.1, ups=0.09, wpb=64788, bsz=128, num_updates=5591, lr=9.99633e-05, gnorm=7.313, loss_scale=16, train_wall=11, gb_free=2.8, wall=63374
2021-06-19 12:15:10 | INFO | train_inner | epoch 002: 2625 / 3002 loss=2.668, ppl=6.35, wps=5884.1, ups=0.09, wpb=64900, bsz=128, num_updates=5592, lr=9.99633e-05, gnorm=2.077, loss_scale=16, train_wall=11, gb_free=2.8, wall=63385
2021-06-19 12:15:22 | INFO | train_inner | epoch 002: 2626 / 3002 loss=2.749, ppl=6.72, wps=5790.1, ups=0.09, wpb=64737, bsz=128, num_updates=5593, lr=9.99633e-05, gnorm=3.189, loss_scale=16, train_wall=11, gb_free=2.8, wall=63396
2021-06-19 12:15:32 | INFO | train_inner | epoch 002: 2627 / 3002 loss=2.738, ppl=6.67, wps=5963.5, ups=0.09, wpb=64854, bsz=128, num_updates=5594, lr=9.99632e-05, gnorm=2.199, loss_scale=16, train_wall=10, gb_free=2.8, wall=63407
2021-06-19 12:15:44 | INFO | train_inner | epoch 002: 2628 / 3002 loss=2.637, ppl=6.22, wps=5738.2, ups=0.09, wpb=64862, bsz=128, num_updates=5595, lr=9.99632e-05, gnorm=2.158, loss_scale=16, train_wall=11, gb_free=2.8, wall=63418
2021-06-19 12:15:55 | INFO | train_inner | epoch 002: 2629 / 3002 loss=2.628, ppl=6.18, wps=5720, ups=0.09, wpb=64855, bsz=128, num_updates=5596, lr=9.99632e-05, gnorm=2.096, loss_scale=16, train_wall=11, gb_free=2.8, wall=63429
2021-06-19 12:16:06 | INFO | train_inner | epoch 002: 2630 / 3002 loss=2.784, ppl=6.89, wps=5939.5, ups=0.09, wpb=64843, bsz=128, num_updates=5597, lr=9.99632e-05, gnorm=2.202, loss_scale=16, train_wall=10, gb_free=2.8, wall=63440
2021-06-19 12:16:17 | INFO | train_inner | epoch 002: 2631 / 3002 loss=2.612, ppl=6.11, wps=6004.3, ups=0.09, wpb=64825, bsz=128, num_updates=5598, lr=9.99632e-05, gnorm=2.172, loss_scale=16, train_wall=10, gb_free=2.8, wall=63451
2021-06-19 12:16:28 | INFO | train_inner | epoch 002: 2632 / 3002 loss=2.8, ppl=6.97, wps=5951.3, ups=0.09, wpb=64794, bsz=128, num_updates=5599, lr=9.99632e-05, gnorm=2.189, loss_scale=16, train_wall=10, gb_free=2.8, wall=63462
2021-06-19 12:16:39 | INFO | train_inner | epoch 002: 2633 / 3002 loss=2.676, ppl=6.39, wps=5938.4, ups=0.09, wpb=64826, bsz=128, num_updates=5600, lr=9.99632e-05, gnorm=2.225, loss_scale=16, train_wall=10, gb_free=2.8, wall=63473
2021-06-19 12:16:50 | INFO | train_inner | epoch 002: 2634 / 3002 loss=2.591, ppl=6.02, wps=5846.1, ups=0.09, wpb=64798, bsz=128, num_updates=5601, lr=9.99632e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=63484
2021-06-19 12:17:01 | INFO | train_inner | epoch 002: 2635 / 3002 loss=2.717, ppl=6.57, wps=5772.6, ups=0.09, wpb=64812, bsz=128, num_updates=5602, lr=9.99632e-05, gnorm=2.353, loss_scale=16, train_wall=11, gb_free=2.8, wall=63495
2021-06-19 12:17:12 | INFO | train_inner | epoch 002: 2636 / 3002 loss=2.574, ppl=5.95, wps=5815.1, ups=0.09, wpb=64812, bsz=128, num_updates=5603, lr=9.99632e-05, gnorm=2.607, loss_scale=16, train_wall=11, gb_free=2.8, wall=63506
2021-06-19 12:17:23 | INFO | train_inner | epoch 002: 2637 / 3002 loss=2.778, ppl=6.86, wps=5822.1, ups=0.09, wpb=64891, bsz=128, num_updates=5604, lr=9.99632e-05, gnorm=2.367, loss_scale=16, train_wall=11, gb_free=2.8, wall=63518
2021-06-19 12:17:34 | INFO | train_inner | epoch 002: 2638 / 3002 loss=2.61, ppl=6.11, wps=5793.1, ups=0.09, wpb=64874, bsz=128, num_updates=5605, lr=9.99632e-05, gnorm=2.199, loss_scale=16, train_wall=11, gb_free=2.8, wall=63529
2021-06-19 12:17:45 | INFO | train_inner | epoch 002: 2639 / 3002 loss=2.689, ppl=6.45, wps=5847.8, ups=0.09, wpb=64797, bsz=128, num_updates=5606, lr=9.99631e-05, gnorm=2.542, loss_scale=16, train_wall=11, gb_free=2.8, wall=63540
2021-06-19 12:17:57 | INFO | train_inner | epoch 002: 2640 / 3002 loss=2.824, ppl=7.08, wps=5748, ups=0.09, wpb=64805, bsz=128, num_updates=5607, lr=9.99631e-05, gnorm=2.438, loss_scale=16, train_wall=11, gb_free=2.8, wall=63551
2021-06-19 12:18:08 | INFO | train_inner | epoch 002: 2641 / 3002 loss=2.676, ppl=6.39, wps=5793.5, ups=0.09, wpb=64853, bsz=128, num_updates=5608, lr=9.99631e-05, gnorm=2.392, loss_scale=16, train_wall=11, gb_free=2.8, wall=63562
2021-06-19 12:18:19 | INFO | train_inner | epoch 002: 2642 / 3002 loss=2.88, ppl=7.36, wps=5918, ups=0.09, wpb=64821, bsz=128, num_updates=5609, lr=9.99631e-05, gnorm=2.187, loss_scale=16, train_wall=10, gb_free=2.8, wall=63573
2021-06-19 12:18:30 | INFO | train_inner | epoch 002: 2643 / 3002 loss=2.618, ppl=6.14, wps=5969.7, ups=0.09, wpb=64843, bsz=128, num_updates=5610, lr=9.99631e-05, gnorm=2.095, loss_scale=16, train_wall=10, gb_free=2.8, wall=63584
2021-06-19 12:18:41 | INFO | train_inner | epoch 002: 2644 / 3002 loss=2.687, ppl=6.44, wps=5798.2, ups=0.09, wpb=64795, bsz=128, num_updates=5611, lr=9.99631e-05, gnorm=2.149, loss_scale=16, train_wall=11, gb_free=2.8, wall=63595
2021-06-19 12:18:52 | INFO | train_inner | epoch 002: 2645 / 3002 loss=2.72, ppl=6.59, wps=5843.6, ups=0.09, wpb=64840, bsz=128, num_updates=5612, lr=9.99631e-05, gnorm=2.132, loss_scale=16, train_wall=11, gb_free=2.8, wall=63606
2021-06-19 12:19:03 | INFO | train_inner | epoch 002: 2646 / 3002 loss=2.791, ppl=6.92, wps=5912.9, ups=0.09, wpb=64875, bsz=128, num_updates=5613, lr=9.99631e-05, gnorm=2.208, loss_scale=16, train_wall=10, gb_free=2.8, wall=63617
2021-06-19 12:19:14 | INFO | train_inner | epoch 002: 2647 / 3002 loss=2.951, ppl=7.73, wps=5889.8, ups=0.09, wpb=64835, bsz=128, num_updates=5614, lr=9.99631e-05, gnorm=2.288, loss_scale=16, train_wall=11, gb_free=2.8, wall=63628
2021-06-19 12:19:25 | INFO | train_inner | epoch 002: 2648 / 3002 loss=2.744, ppl=6.7, wps=5855.3, ups=0.09, wpb=64791, bsz=128, num_updates=5615, lr=9.99631e-05, gnorm=2.152, loss_scale=16, train_wall=11, gb_free=2.8, wall=63639
2021-06-19 12:19:36 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-19 12:19:47 | INFO | train_inner | epoch 002: 2650 / 3002 loss=2.725, ppl=6.61, wps=2924.1, ups=0.05, wpb=64746, bsz=128, num_updates=5616, lr=9.99631e-05, gnorm=2.512, loss_scale=8, train_wall=21, gb_free=2.8, wall=63662
2021-06-19 12:19:58 | INFO | train_inner | epoch 002: 2651 / 3002 loss=2.91, ppl=7.52, wps=5847.3, ups=0.09, wpb=64871, bsz=128, num_updates=5617, lr=9.99631e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=63673
2021-06-19 12:20:09 | INFO | train_inner | epoch 002: 2652 / 3002 loss=2.943, ppl=7.69, wps=5993.9, ups=0.09, wpb=64861, bsz=128, num_updates=5618, lr=9.99631e-05, gnorm=2.182, loss_scale=8, train_wall=10, gb_free=2.8, wall=63683
2021-06-19 12:20:20 | INFO | train_inner | epoch 002: 2653 / 3002 loss=2.778, ppl=6.86, wps=5823.1, ups=0.09, wpb=64801, bsz=128, num_updates=5619, lr=9.9963e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=63695
2021-06-19 12:20:31 | INFO | train_inner | epoch 002: 2654 / 3002 loss=2.75, ppl=6.73, wps=5852.7, ups=0.09, wpb=64758, bsz=128, num_updates=5620, lr=9.9963e-05, gnorm=4.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=63706
2021-06-19 12:20:42 | INFO | train_inner | epoch 002: 2655 / 3002 loss=2.738, ppl=6.67, wps=5827.2, ups=0.09, wpb=64877, bsz=128, num_updates=5621, lr=9.9963e-05, gnorm=2.116, loss_scale=8, train_wall=11, gb_free=2.8, wall=63717
2021-06-19 12:20:54 | INFO | train_inner | epoch 002: 2656 / 3002 loss=2.622, ppl=6.15, wps=5770.9, ups=0.09, wpb=64799, bsz=128, num_updates=5622, lr=9.9963e-05, gnorm=2.16, loss_scale=8, train_wall=11, gb_free=2.8, wall=63728
2021-06-19 12:21:05 | INFO | train_inner | epoch 002: 2657 / 3002 loss=2.667, ppl=6.35, wps=5923.8, ups=0.09, wpb=64773, bsz=128, num_updates=5623, lr=9.9963e-05, gnorm=2.24, loss_scale=8, train_wall=10, gb_free=2.8, wall=63739
2021-06-19 12:21:16 | INFO | train_inner | epoch 002: 2658 / 3002 loss=2.535, ppl=5.79, wps=5788.5, ups=0.09, wpb=64825, bsz=128, num_updates=5624, lr=9.9963e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=63750
2021-06-19 12:21:27 | INFO | train_inner | epoch 002: 2659 / 3002 loss=2.645, ppl=6.26, wps=5919.8, ups=0.09, wpb=64811, bsz=128, num_updates=5625, lr=9.9963e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=63761
2021-06-19 12:21:38 | INFO | train_inner | epoch 002: 2660 / 3002 loss=2.814, ppl=7.03, wps=5842.8, ups=0.09, wpb=64814, bsz=128, num_updates=5626, lr=9.9963e-05, gnorm=2.367, loss_scale=8, train_wall=11, gb_free=2.8, wall=63772
2021-06-19 12:21:49 | INFO | train_inner | epoch 002: 2661 / 3002 loss=2.746, ppl=6.71, wps=5936.1, ups=0.09, wpb=64864, bsz=128, num_updates=5627, lr=9.9963e-05, gnorm=2.479, loss_scale=8, train_wall=10, gb_free=2.8, wall=63783
2021-06-19 12:22:00 | INFO | train_inner | epoch 002: 2662 / 3002 loss=2.649, ppl=6.27, wps=5894, ups=0.09, wpb=64860, bsz=128, num_updates=5628, lr=9.9963e-05, gnorm=2.206, loss_scale=8, train_wall=11, gb_free=2.8, wall=63794
2021-06-19 12:22:11 | INFO | train_inner | epoch 002: 2663 / 3002 loss=2.773, ppl=6.84, wps=5901.3, ups=0.09, wpb=64796, bsz=128, num_updates=5629, lr=9.9963e-05, gnorm=2.233, loss_scale=8, train_wall=11, gb_free=2.8, wall=63805
2021-06-19 12:22:22 | INFO | train_inner | epoch 002: 2664 / 3002 loss=2.827, ppl=7.1, wps=5933.2, ups=0.09, wpb=64832, bsz=128, num_updates=5630, lr=9.9963e-05, gnorm=2.25, loss_scale=8, train_wall=10, gb_free=2.8, wall=63816
2021-06-19 12:22:33 | INFO | train_inner | epoch 002: 2665 / 3002 loss=2.67, ppl=6.37, wps=5870.5, ups=0.09, wpb=64637, bsz=128, num_updates=5631, lr=9.99629e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=63827
2021-06-19 12:22:44 | INFO | train_inner | epoch 002: 2666 / 3002 loss=2.655, ppl=6.3, wps=5992.6, ups=0.09, wpb=64809, bsz=128, num_updates=5632, lr=9.99629e-05, gnorm=2.216, loss_scale=8, train_wall=10, gb_free=2.8, wall=63838
2021-06-19 12:22:55 | INFO | train_inner | epoch 002: 2667 / 3002 loss=2.756, ppl=6.76, wps=5830.7, ups=0.09, wpb=64831, bsz=128, num_updates=5633, lr=9.99629e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=63849
2021-06-19 12:23:06 | INFO | train_inner | epoch 002: 2668 / 3002 loss=2.754, ppl=6.75, wps=5834.6, ups=0.09, wpb=64857, bsz=128, num_updates=5634, lr=9.99629e-05, gnorm=2.106, loss_scale=8, train_wall=11, gb_free=2.8, wall=63860
2021-06-19 12:23:17 | INFO | train_inner | epoch 002: 2669 / 3002 loss=2.719, ppl=6.58, wps=5834.8, ups=0.09, wpb=64795, bsz=128, num_updates=5635, lr=9.99629e-05, gnorm=2.082, loss_scale=8, train_wall=11, gb_free=2.8, wall=63871
2021-06-19 12:23:28 | INFO | train_inner | epoch 002: 2670 / 3002 loss=2.64, ppl=6.23, wps=5763.1, ups=0.09, wpb=64764, bsz=128, num_updates=5636, lr=9.99629e-05, gnorm=2.193, loss_scale=8, train_wall=11, gb_free=2.8, wall=63882
2021-06-19 12:23:39 | INFO | train_inner | epoch 002: 2671 / 3002 loss=2.754, ppl=6.74, wps=5831.7, ups=0.09, wpb=64800, bsz=128, num_updates=5637, lr=9.99629e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=63894
2021-06-19 12:23:50 | INFO | train_inner | epoch 002: 2672 / 3002 loss=2.791, ppl=6.92, wps=5836.5, ups=0.09, wpb=64815, bsz=128, num_updates=5638, lr=9.99629e-05, gnorm=2.365, loss_scale=8, train_wall=11, gb_free=2.8, wall=63905
2021-06-19 12:24:02 | INFO | train_inner | epoch 002: 2673 / 3002 loss=2.755, ppl=6.75, wps=5759.3, ups=0.09, wpb=64807, bsz=128, num_updates=5639, lr=9.99629e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=63916
2021-06-19 12:24:13 | INFO | train_inner | epoch 002: 2674 / 3002 loss=2.659, ppl=6.32, wps=5816.3, ups=0.09, wpb=64866, bsz=128, num_updates=5640, lr=9.99629e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=63927
2021-06-19 12:24:24 | INFO | train_inner | epoch 002: 2675 / 3002 loss=2.782, ppl=6.88, wps=5829.6, ups=0.09, wpb=64807, bsz=128, num_updates=5641, lr=9.99629e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=63938
2021-06-19 12:24:35 | INFO | train_inner | epoch 002: 2676 / 3002 loss=2.617, ppl=6.13, wps=5830.4, ups=0.09, wpb=64838, bsz=128, num_updates=5642, lr=9.99629e-05, gnorm=2.253, loss_scale=8, train_wall=11, gb_free=2.8, wall=63949
2021-06-19 12:24:46 | INFO | train_inner | epoch 002: 2677 / 3002 loss=2.716, ppl=6.57, wps=5777.4, ups=0.09, wpb=64769, bsz=128, num_updates=5643, lr=9.99629e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=63961
2021-06-19 12:24:57 | INFO | train_inner | epoch 002: 2678 / 3002 loss=2.71, ppl=6.54, wps=5847.8, ups=0.09, wpb=64821, bsz=128, num_updates=5644, lr=9.99628e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=63972
2021-06-19 12:25:08 | INFO | train_inner | epoch 002: 2679 / 3002 loss=2.697, ppl=6.48, wps=5958.5, ups=0.09, wpb=64876, bsz=128, num_updates=5645, lr=9.99628e-05, gnorm=2.122, loss_scale=8, train_wall=10, gb_free=2.8, wall=63983
2021-06-19 12:25:19 | INFO | train_inner | epoch 002: 2680 / 3002 loss=2.717, ppl=6.57, wps=5866.4, ups=0.09, wpb=64831, bsz=128, num_updates=5646, lr=9.99628e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=63994
2021-06-19 12:25:30 | INFO | train_inner | epoch 002: 2681 / 3002 loss=2.78, ppl=6.87, wps=5854.1, ups=0.09, wpb=64796, bsz=128, num_updates=5647, lr=9.99628e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=64005
2021-06-19 12:25:42 | INFO | train_inner | epoch 002: 2682 / 3002 loss=2.827, ppl=7.1, wps=5776.8, ups=0.09, wpb=64858, bsz=128, num_updates=5648, lr=9.99628e-05, gnorm=2.099, loss_scale=8, train_wall=11, gb_free=2.8, wall=64016
2021-06-19 12:25:53 | INFO | train_inner | epoch 002: 2683 / 3002 loss=2.883, ppl=7.38, wps=5860.1, ups=0.09, wpb=64843, bsz=128, num_updates=5649, lr=9.99628e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=64027
2021-06-19 12:26:04 | INFO | train_inner | epoch 002: 2684 / 3002 loss=2.632, ppl=6.2, wps=5781.6, ups=0.09, wpb=64877, bsz=128, num_updates=5650, lr=9.99628e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=64038
2021-06-19 12:26:15 | INFO | train_inner | epoch 002: 2685 / 3002 loss=2.593, ppl=6.03, wps=5815.8, ups=0.09, wpb=64914, bsz=128, num_updates=5651, lr=9.99628e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=64049
2021-06-19 12:26:26 | INFO | train_inner | epoch 002: 2686 / 3002 loss=2.793, ppl=6.93, wps=5808.2, ups=0.09, wpb=64740, bsz=128, num_updates=5652, lr=9.99628e-05, gnorm=2.278, loss_scale=8, train_wall=11, gb_free=2.8, wall=64060
2021-06-19 12:26:37 | INFO | train_inner | epoch 002: 2687 / 3002 loss=2.554, ppl=5.87, wps=5915.4, ups=0.09, wpb=64806, bsz=128, num_updates=5653, lr=9.99628e-05, gnorm=2.089, loss_scale=8, train_wall=10, gb_free=2.8, wall=64071
2021-06-19 12:26:48 | INFO | train_inner | epoch 002: 2688 / 3002 loss=2.634, ppl=6.21, wps=5881.2, ups=0.09, wpb=64792, bsz=128, num_updates=5654, lr=9.99628e-05, gnorm=2.113, loss_scale=8, train_wall=11, gb_free=2.8, wall=64082
2021-06-19 12:26:59 | INFO | train_inner | epoch 002: 2689 / 3002 loss=2.75, ppl=6.73, wps=5881.9, ups=0.09, wpb=64911, bsz=128, num_updates=5655, lr=9.99628e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=64093
2021-06-19 12:27:10 | INFO | train_inner | epoch 002: 2690 / 3002 loss=2.723, ppl=6.6, wps=5905.8, ups=0.09, wpb=64841, bsz=128, num_updates=5656, lr=9.99627e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=64104
2021-06-19 12:27:21 | INFO | train_inner | epoch 002: 2691 / 3002 loss=2.627, ppl=6.18, wps=5815.6, ups=0.09, wpb=64801, bsz=128, num_updates=5657, lr=9.99627e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=64116
2021-06-19 12:27:32 | INFO | train_inner | epoch 002: 2692 / 3002 loss=2.75, ppl=6.73, wps=5929, ups=0.09, wpb=64837, bsz=128, num_updates=5658, lr=9.99627e-05, gnorm=2.395, loss_scale=8, train_wall=10, gb_free=2.8, wall=64127
2021-06-19 12:27:43 | INFO | train_inner | epoch 002: 2693 / 3002 loss=2.761, ppl=6.78, wps=5855.2, ups=0.09, wpb=64805, bsz=128, num_updates=5659, lr=9.99627e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=64138
2021-06-19 12:27:54 | INFO | train_inner | epoch 002: 2694 / 3002 loss=2.698, ppl=6.49, wps=5910.2, ups=0.09, wpb=64902, bsz=128, num_updates=5660, lr=9.99627e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=64149
2021-06-19 12:28:05 | INFO | train_inner | epoch 002: 2695 / 3002 loss=2.744, ppl=6.7, wps=5846.5, ups=0.09, wpb=64826, bsz=128, num_updates=5661, lr=9.99627e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=64160
2021-06-19 12:28:17 | INFO | train_inner | epoch 002: 2696 / 3002 loss=2.81, ppl=7.01, wps=5737.7, ups=0.09, wpb=64792, bsz=128, num_updates=5662, lr=9.99627e-05, gnorm=5.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=64171
2021-06-19 12:28:28 | INFO | train_inner | epoch 002: 2697 / 3002 loss=2.721, ppl=6.59, wps=5717.6, ups=0.09, wpb=64803, bsz=128, num_updates=5663, lr=9.99627e-05, gnorm=2.134, loss_scale=8, train_wall=11, gb_free=2.8, wall=64182
2021-06-19 12:28:39 | INFO | train_inner | epoch 002: 2698 / 3002 loss=2.609, ppl=6.1, wps=5845.5, ups=0.09, wpb=64824, bsz=128, num_updates=5664, lr=9.99627e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=64193
2021-06-19 12:28:50 | INFO | train_inner | epoch 002: 2699 / 3002 loss=2.521, ppl=5.74, wps=5875.7, ups=0.09, wpb=64824, bsz=128, num_updates=5665, lr=9.99627e-05, gnorm=2.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=64204
2021-06-19 12:29:01 | INFO | train_inner | epoch 002: 2700 / 3002 loss=2.768, ppl=6.81, wps=5734.3, ups=0.09, wpb=64751, bsz=128, num_updates=5666, lr=9.99627e-05, gnorm=2.294, loss_scale=8, train_wall=11, gb_free=2.8, wall=64216
2021-06-19 12:29:13 | INFO | train_inner | epoch 002: 2701 / 3002 loss=2.809, ppl=7.01, wps=5755.9, ups=0.09, wpb=64841, bsz=128, num_updates=5667, lr=9.99627e-05, gnorm=2.427, loss_scale=8, train_wall=11, gb_free=2.8, wall=64227
2021-06-19 12:29:24 | INFO | train_inner | epoch 002: 2702 / 3002 loss=2.701, ppl=6.5, wps=5887.8, ups=0.09, wpb=64827, bsz=128, num_updates=5668, lr=9.99627e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=64238
2021-06-19 12:29:35 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 12:29:46 | INFO | train_inner | epoch 002: 2704 / 3002 loss=2.697, ppl=6.48, wps=2938.3, ups=0.05, wpb=64804, bsz=128, num_updates=5669, lr=9.99626e-05, gnorm=2.41, loss_scale=4, train_wall=21, gb_free=2.8, wall=64260
2021-06-19 12:29:57 | INFO | train_inner | epoch 002: 2705 / 3002 loss=2.617, ppl=6.13, wps=5875, ups=0.09, wpb=64934, bsz=128, num_updates=5670, lr=9.99626e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=64271
2021-06-19 12:30:08 | INFO | train_inner | epoch 002: 2706 / 3002 loss=2.706, ppl=6.52, wps=5795.7, ups=0.09, wpb=64838, bsz=128, num_updates=5671, lr=9.99626e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=64282
2021-06-19 12:30:19 | INFO | train_inner | epoch 002: 2707 / 3002 loss=2.77, ppl=6.82, wps=5927.1, ups=0.09, wpb=64835, bsz=128, num_updates=5672, lr=9.99626e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=64293
2021-06-19 12:30:30 | INFO | train_inner | epoch 002: 2708 / 3002 loss=2.644, ppl=6.25, wps=5797, ups=0.09, wpb=64772, bsz=128, num_updates=5673, lr=9.99626e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=64304
2021-06-19 12:30:41 | INFO | train_inner | epoch 002: 2709 / 3002 loss=2.723, ppl=6.6, wps=5924.4, ups=0.09, wpb=64844, bsz=128, num_updates=5674, lr=9.99626e-05, gnorm=2.244, loss_scale=4, train_wall=10, gb_free=2.8, wall=64315
2021-06-19 12:30:52 | INFO | train_inner | epoch 002: 2710 / 3002 loss=2.669, ppl=6.36, wps=5859.1, ups=0.09, wpb=64878, bsz=128, num_updates=5675, lr=9.99626e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=64326
2021-06-19 12:31:03 | INFO | train_inner | epoch 002: 2711 / 3002 loss=2.717, ppl=6.58, wps=5897.7, ups=0.09, wpb=64854, bsz=128, num_updates=5676, lr=9.99626e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=64337
2021-06-19 12:31:14 | INFO | train_inner | epoch 002: 2712 / 3002 loss=2.749, ppl=6.72, wps=5859.3, ups=0.09, wpb=64711, bsz=128, num_updates=5677, lr=9.99626e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=64348
2021-06-19 12:31:25 | INFO | train_inner | epoch 002: 2713 / 3002 loss=2.75, ppl=6.73, wps=5932.2, ups=0.09, wpb=64803, bsz=128, num_updates=5678, lr=9.99626e-05, gnorm=2.195, loss_scale=4, train_wall=10, gb_free=2.8, wall=64359
2021-06-19 12:31:36 | INFO | train_inner | epoch 002: 2714 / 3002 loss=2.651, ppl=6.28, wps=5870.2, ups=0.09, wpb=64774, bsz=128, num_updates=5679, lr=9.99626e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=64370
2021-06-19 12:31:47 | INFO | train_inner | epoch 002: 2715 / 3002 loss=2.704, ppl=6.52, wps=5811.4, ups=0.09, wpb=64876, bsz=128, num_updates=5680, lr=9.99626e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=64382
2021-06-19 12:31:59 | INFO | train_inner | epoch 002: 2716 / 3002 loss=2.762, ppl=6.78, wps=5755.3, ups=0.09, wpb=64838, bsz=128, num_updates=5681, lr=9.99625e-05, gnorm=2.139, loss_scale=4, train_wall=11, gb_free=2.8, wall=64393
2021-06-19 12:32:10 | INFO | train_inner | epoch 002: 2717 / 3002 loss=2.722, ppl=6.6, wps=5708.1, ups=0.09, wpb=64828, bsz=128, num_updates=5682, lr=9.99625e-05, gnorm=2.367, loss_scale=4, train_wall=11, gb_free=2.8, wall=64404
2021-06-19 12:32:21 | INFO | train_inner | epoch 002: 2718 / 3002 loss=2.591, ppl=6.02, wps=5771, ups=0.09, wpb=64853, bsz=128, num_updates=5683, lr=9.99625e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=64415
2021-06-19 12:32:32 | INFO | train_inner | epoch 002: 2719 / 3002 loss=2.641, ppl=6.24, wps=5866, ups=0.09, wpb=64797, bsz=128, num_updates=5684, lr=9.99625e-05, gnorm=2.848, loss_scale=4, train_wall=11, gb_free=2.8, wall=64426
2021-06-19 12:32:43 | INFO | train_inner | epoch 002: 2720 / 3002 loss=2.689, ppl=6.45, wps=5839.7, ups=0.09, wpb=64803, bsz=128, num_updates=5685, lr=9.99625e-05, gnorm=4.468, loss_scale=4, train_wall=11, gb_free=2.8, wall=64438
2021-06-19 12:32:54 | INFO | train_inner | epoch 002: 2721 / 3002 loss=2.654, ppl=6.29, wps=5791.7, ups=0.09, wpb=64865, bsz=128, num_updates=5686, lr=9.99625e-05, gnorm=2.161, loss_scale=4, train_wall=11, gb_free=2.8, wall=64449
2021-06-19 12:33:06 | INFO | train_inner | epoch 002: 2722 / 3002 loss=2.878, ppl=7.35, wps=5727.3, ups=0.09, wpb=64917, bsz=128, num_updates=5687, lr=9.99625e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=64460
2021-06-19 12:33:17 | INFO | train_inner | epoch 002: 2723 / 3002 loss=2.842, ppl=7.17, wps=5732.3, ups=0.09, wpb=64884, bsz=128, num_updates=5688, lr=9.99625e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=64471
2021-06-19 12:33:28 | INFO | train_inner | epoch 002: 2724 / 3002 loss=2.755, ppl=6.75, wps=5922.2, ups=0.09, wpb=64876, bsz=128, num_updates=5689, lr=9.99625e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=64482
2021-06-19 12:33:39 | INFO | train_inner | epoch 002: 2725 / 3002 loss=2.718, ppl=6.58, wps=5800.8, ups=0.09, wpb=64866, bsz=128, num_updates=5690, lr=9.99625e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=64494
2021-06-19 12:33:51 | INFO | train_inner | epoch 002: 2726 / 3002 loss=2.654, ppl=6.3, wps=5749.1, ups=0.09, wpb=64773, bsz=128, num_updates=5691, lr=9.99625e-05, gnorm=2.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=64505
2021-06-19 12:34:02 | INFO | train_inner | epoch 002: 2727 / 3002 loss=2.728, ppl=6.63, wps=5819, ups=0.09, wpb=64790, bsz=128, num_updates=5692, lr=9.99625e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=64516
2021-06-19 12:34:13 | INFO | train_inner | epoch 002: 2728 / 3002 loss=2.84, ppl=7.16, wps=5847.8, ups=0.09, wpb=64770, bsz=128, num_updates=5693, lr=9.99625e-05, gnorm=2.296, loss_scale=4, train_wall=11, gb_free=2.8, wall=64527
2021-06-19 12:34:24 | INFO | train_inner | epoch 002: 2729 / 3002 loss=2.713, ppl=6.56, wps=5913.6, ups=0.09, wpb=64924, bsz=128, num_updates=5694, lr=9.99624e-05, gnorm=2.186, loss_scale=4, train_wall=10, gb_free=2.8, wall=64538
2021-06-19 12:34:35 | INFO | train_inner | epoch 002: 2730 / 3002 loss=2.791, ppl=6.92, wps=5852.1, ups=0.09, wpb=64924, bsz=128, num_updates=5695, lr=9.99624e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=64549
2021-06-19 12:34:46 | INFO | train_inner | epoch 002: 2731 / 3002 loss=2.707, ppl=6.53, wps=5954.5, ups=0.09, wpb=64776, bsz=128, num_updates=5696, lr=9.99624e-05, gnorm=2.158, loss_scale=4, train_wall=10, gb_free=2.8, wall=64560
2021-06-19 12:34:57 | INFO | train_inner | epoch 002: 2732 / 3002 loss=2.746, ppl=6.71, wps=5846.7, ups=0.09, wpb=64814, bsz=128, num_updates=5697, lr=9.99624e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=64571
2021-06-19 12:35:08 | INFO | train_inner | epoch 002: 2733 / 3002 loss=2.753, ppl=6.74, wps=5782, ups=0.09, wpb=64797, bsz=128, num_updates=5698, lr=9.99624e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=64582
2021-06-19 12:35:19 | INFO | train_inner | epoch 002: 2734 / 3002 loss=2.708, ppl=6.53, wps=5776, ups=0.09, wpb=64842, bsz=128, num_updates=5699, lr=9.99624e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=64594
2021-06-19 12:35:30 | INFO | train_inner | epoch 002: 2735 / 3002 loss=2.703, ppl=6.51, wps=5880.1, ups=0.09, wpb=64863, bsz=128, num_updates=5700, lr=9.99624e-05, gnorm=2.281, loss_scale=4, train_wall=11, gb_free=2.8, wall=64605
2021-06-19 12:35:42 | INFO | train_inner | epoch 002: 2736 / 3002 loss=2.642, ppl=6.24, wps=5727.9, ups=0.09, wpb=64784, bsz=128, num_updates=5701, lr=9.99624e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=64616
2021-06-19 12:35:53 | INFO | train_inner | epoch 002: 2737 / 3002 loss=2.653, ppl=6.29, wps=5768.1, ups=0.09, wpb=64840, bsz=128, num_updates=5702, lr=9.99624e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=64627
2021-06-19 12:36:04 | INFO | train_inner | epoch 002: 2738 / 3002 loss=2.759, ppl=6.77, wps=5844.6, ups=0.09, wpb=64807, bsz=128, num_updates=5703, lr=9.99624e-05, gnorm=2.252, loss_scale=4, train_wall=11, gb_free=2.8, wall=64638
2021-06-19 12:36:15 | INFO | train_inner | epoch 002: 2739 / 3002 loss=2.508, ppl=5.69, wps=5962.2, ups=0.09, wpb=64809, bsz=128, num_updates=5704, lr=9.99624e-05, gnorm=2.392, loss_scale=4, train_wall=10, gb_free=2.8, wall=64649
2021-06-19 12:36:26 | INFO | train_inner | epoch 002: 2740 / 3002 loss=2.54, ppl=5.81, wps=5816, ups=0.09, wpb=64874, bsz=128, num_updates=5705, lr=9.99624e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=64660
2021-06-19 12:36:37 | INFO | train_inner | epoch 002: 2741 / 3002 loss=2.746, ppl=6.71, wps=5816.7, ups=0.09, wpb=64770, bsz=128, num_updates=5706, lr=9.99623e-05, gnorm=2.123, loss_scale=4, train_wall=11, gb_free=2.8, wall=64671
2021-06-19 12:36:48 | INFO | train_inner | epoch 002: 2742 / 3002 loss=2.794, ppl=6.94, wps=5886.8, ups=0.09, wpb=64800, bsz=128, num_updates=5707, lr=9.99623e-05, gnorm=2.175, loss_scale=4, train_wall=11, gb_free=2.8, wall=64682
2021-06-19 12:36:59 | INFO | train_inner | epoch 002: 2743 / 3002 loss=2.586, ppl=6, wps=5761.6, ups=0.09, wpb=64821, bsz=128, num_updates=5708, lr=9.99623e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=64694
2021-06-19 12:37:10 | INFO | train_inner | epoch 002: 2744 / 3002 loss=2.727, ppl=6.62, wps=5881.9, ups=0.09, wpb=64808, bsz=128, num_updates=5709, lr=9.99623e-05, gnorm=2.182, loss_scale=4, train_wall=11, gb_free=2.8, wall=64705
2021-06-19 12:37:21 | INFO | train_inner | epoch 002: 2745 / 3002 loss=2.884, ppl=7.38, wps=5851, ups=0.09, wpb=64806, bsz=128, num_updates=5710, lr=9.99623e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=64716
2021-06-19 12:37:32 | INFO | train_inner | epoch 002: 2746 / 3002 loss=2.742, ppl=6.69, wps=5986.4, ups=0.09, wpb=64781, bsz=128, num_updates=5711, lr=9.99623e-05, gnorm=3.145, loss_scale=4, train_wall=10, gb_free=2.8, wall=64727
2021-06-19 12:37:43 | INFO | train_inner | epoch 002: 2747 / 3002 loss=2.562, ppl=5.9, wps=5878, ups=0.09, wpb=64840, bsz=128, num_updates=5712, lr=9.99623e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=64738
2021-06-19 12:37:54 | INFO | train_inner | epoch 002: 2748 / 3002 loss=2.605, ppl=6.08, wps=5867.9, ups=0.09, wpb=64880, bsz=128, num_updates=5713, lr=9.99623e-05, gnorm=2.348, loss_scale=4, train_wall=11, gb_free=2.8, wall=64749
2021-06-19 12:38:05 | INFO | train_inner | epoch 002: 2749 / 3002 loss=2.792, ppl=6.93, wps=5786.2, ups=0.09, wpb=64821, bsz=128, num_updates=5714, lr=9.99623e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=64760
2021-06-19 12:38:17 | INFO | train_inner | epoch 002: 2750 / 3002 loss=2.771, ppl=6.82, wps=5787.2, ups=0.09, wpb=64825, bsz=128, num_updates=5715, lr=9.99623e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=64771
2021-06-19 12:38:28 | INFO | train_inner | epoch 002: 2751 / 3002 loss=2.681, ppl=6.41, wps=5766.5, ups=0.09, wpb=64773, bsz=128, num_updates=5716, lr=9.99623e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=64782
2021-06-19 12:38:39 | INFO | train_inner | epoch 002: 2752 / 3002 loss=2.735, ppl=6.66, wps=5929.5, ups=0.09, wpb=64848, bsz=128, num_updates=5717, lr=9.99623e-05, gnorm=2.197, loss_scale=4, train_wall=10, gb_free=2.8, wall=64793
2021-06-19 12:38:50 | INFO | train_inner | epoch 002: 2753 / 3002 loss=2.787, ppl=6.9, wps=5820.1, ups=0.09, wpb=64868, bsz=128, num_updates=5718, lr=9.99623e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=64804
2021-06-19 12:39:01 | INFO | train_inner | epoch 002: 2754 / 3002 loss=2.804, ppl=6.98, wps=5789.6, ups=0.09, wpb=64783, bsz=128, num_updates=5719, lr=9.99622e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=64816
2021-06-19 12:39:12 | INFO | train_inner | epoch 002: 2755 / 3002 loss=2.708, ppl=6.53, wps=5917.7, ups=0.09, wpb=64877, bsz=128, num_updates=5720, lr=9.99622e-05, gnorm=3.864, loss_scale=4, train_wall=11, gb_free=2.8, wall=64826
2021-06-19 12:39:23 | INFO | train_inner | epoch 002: 2756 / 3002 loss=2.86, ppl=7.26, wps=5903, ups=0.09, wpb=64857, bsz=128, num_updates=5721, lr=9.99622e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=64837
2021-06-19 12:39:34 | INFO | train_inner | epoch 002: 2757 / 3002 loss=2.712, ppl=6.55, wps=5792.8, ups=0.09, wpb=64934, bsz=128, num_updates=5722, lr=9.99622e-05, gnorm=2.142, loss_scale=4, train_wall=11, gb_free=2.8, wall=64849
2021-06-19 12:39:46 | INFO | train_inner | epoch 002: 2758 / 3002 loss=2.579, ppl=5.97, wps=5773.1, ups=0.09, wpb=64831, bsz=128, num_updates=5723, lr=9.99622e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=64860
2021-06-19 12:39:56 | INFO | train_inner | epoch 002: 2759 / 3002 loss=2.799, ppl=6.96, wps=5964.2, ups=0.09, wpb=64863, bsz=128, num_updates=5724, lr=9.99622e-05, gnorm=2.303, loss_scale=4, train_wall=10, gb_free=2.8, wall=64871
2021-06-19 12:40:08 | INFO | train_inner | epoch 002: 2760 / 3002 loss=2.803, ppl=6.98, wps=5818.3, ups=0.09, wpb=64734, bsz=128, num_updates=5725, lr=9.99622e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=64882
2021-06-19 12:40:19 | INFO | train_inner | epoch 002: 2761 / 3002 loss=2.792, ppl=6.93, wps=5785, ups=0.09, wpb=64752, bsz=128, num_updates=5726, lr=9.99622e-05, gnorm=2.796, loss_scale=4, train_wall=11, gb_free=2.8, wall=64893
2021-06-19 12:40:30 | INFO | train_inner | epoch 002: 2762 / 3002 loss=2.717, ppl=6.57, wps=5835.7, ups=0.09, wpb=64864, bsz=128, num_updates=5727, lr=9.99622e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=64904
2021-06-19 12:40:41 | INFO | train_inner | epoch 002: 2763 / 3002 loss=2.758, ppl=6.76, wps=5915, ups=0.09, wpb=64847, bsz=128, num_updates=5728, lr=9.99622e-05, gnorm=2.23, loss_scale=4, train_wall=10, gb_free=2.8, wall=64915
2021-06-19 12:40:52 | INFO | train_inner | epoch 002: 2764 / 3002 loss=2.789, ppl=6.91, wps=5932.1, ups=0.09, wpb=64790, bsz=128, num_updates=5729, lr=9.99622e-05, gnorm=2.204, loss_scale=4, train_wall=10, gb_free=2.8, wall=64926
2021-06-19 12:41:03 | INFO | train_inner | epoch 002: 2765 / 3002 loss=2.768, ppl=6.81, wps=5847.2, ups=0.09, wpb=64861, bsz=128, num_updates=5730, lr=9.99622e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=64937
2021-06-19 12:41:14 | INFO | train_inner | epoch 002: 2766 / 3002 loss=2.852, ppl=7.22, wps=5913.7, ups=0.09, wpb=64782, bsz=128, num_updates=5731, lr=9.99621e-05, gnorm=2.21, loss_scale=4, train_wall=10, gb_free=2.8, wall=64948
2021-06-19 12:41:25 | INFO | train_inner | epoch 002: 2767 / 3002 loss=2.734, ppl=6.65, wps=5859, ups=0.09, wpb=64815, bsz=128, num_updates=5732, lr=9.99621e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=64959
2021-06-19 12:41:36 | INFO | train_inner | epoch 002: 2768 / 3002 loss=2.698, ppl=6.49, wps=5942.5, ups=0.09, wpb=64845, bsz=128, num_updates=5733, lr=9.99621e-05, gnorm=2.251, loss_scale=4, train_wall=10, gb_free=2.8, wall=64970
2021-06-19 12:41:47 | INFO | train_inner | epoch 002: 2769 / 3002 loss=2.724, ppl=6.61, wps=5750.9, ups=0.09, wpb=64825, bsz=128, num_updates=5734, lr=9.99621e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=64981
2021-06-19 12:41:58 | INFO | train_inner | epoch 002: 2770 / 3002 loss=2.524, ppl=5.75, wps=5880.4, ups=0.09, wpb=64767, bsz=128, num_updates=5735, lr=9.99621e-05, gnorm=2.112, loss_scale=4, train_wall=11, gb_free=2.8, wall=64992
2021-06-19 12:42:09 | INFO | train_inner | epoch 002: 2771 / 3002 loss=2.58, ppl=5.98, wps=5889.5, ups=0.09, wpb=64859, bsz=128, num_updates=5736, lr=9.99621e-05, gnorm=2.181, loss_scale=4, train_wall=11, gb_free=2.8, wall=65003
2021-06-19 12:42:20 | INFO | train_inner | epoch 002: 2772 / 3002 loss=2.723, ppl=6.6, wps=5841.5, ups=0.09, wpb=64813, bsz=128, num_updates=5737, lr=9.99621e-05, gnorm=2.782, loss_scale=4, train_wall=11, gb_free=2.8, wall=65015
2021-06-19 12:42:31 | INFO | train_inner | epoch 002: 2773 / 3002 loss=2.546, ppl=5.84, wps=5753.8, ups=0.09, wpb=64773, bsz=128, num_updates=5738, lr=9.99621e-05, gnorm=2.422, loss_scale=4, train_wall=11, gb_free=2.8, wall=65026
2021-06-19 12:42:43 | INFO | train_inner | epoch 002: 2774 / 3002 loss=2.771, ppl=6.83, wps=5746, ups=0.09, wpb=64854, bsz=128, num_updates=5739, lr=9.99621e-05, gnorm=2.102, loss_scale=4, train_wall=11, gb_free=2.8, wall=65037
2021-06-19 12:42:54 | INFO | train_inner | epoch 002: 2775 / 3002 loss=2.749, ppl=6.72, wps=5844.6, ups=0.09, wpb=64741, bsz=128, num_updates=5740, lr=9.99621e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=65048
2021-06-19 12:43:05 | INFO | train_inner | epoch 002: 2776 / 3002 loss=2.67, ppl=6.37, wps=5805, ups=0.09, wpb=64809, bsz=128, num_updates=5741, lr=9.99621e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=65059
2021-06-19 12:43:16 | INFO | train_inner | epoch 002: 2777 / 3002 loss=2.646, ppl=6.26, wps=5761.4, ups=0.09, wpb=64872, bsz=128, num_updates=5742, lr=9.99621e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=65071
2021-06-19 12:43:27 | INFO | train_inner | epoch 002: 2778 / 3002 loss=2.761, ppl=6.78, wps=5827.6, ups=0.09, wpb=64784, bsz=128, num_updates=5743, lr=9.99621e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=65082
2021-06-19 12:43:39 | INFO | train_inner | epoch 002: 2779 / 3002 loss=2.652, ppl=6.29, wps=5738.8, ups=0.09, wpb=64808, bsz=128, num_updates=5744, lr=9.9962e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=65093
2021-06-19 12:43:50 | INFO | train_inner | epoch 002: 2780 / 3002 loss=2.693, ppl=6.46, wps=5855.3, ups=0.09, wpb=64860, bsz=128, num_updates=5745, lr=9.9962e-05, gnorm=2.119, loss_scale=4, train_wall=11, gb_free=2.8, wall=65104
2021-06-19 12:44:01 | INFO | train_inner | epoch 002: 2781 / 3002 loss=2.638, ppl=6.22, wps=5908.1, ups=0.09, wpb=64839, bsz=128, num_updates=5746, lr=9.9962e-05, gnorm=2.365, loss_scale=4, train_wall=11, gb_free=2.8, wall=65115
2021-06-19 12:44:12 | INFO | train_inner | epoch 002: 2782 / 3002 loss=2.628, ppl=6.18, wps=5726.6, ups=0.09, wpb=64874, bsz=128, num_updates=5747, lr=9.9962e-05, gnorm=2.674, loss_scale=4, train_wall=11, gb_free=2.8, wall=65126
2021-06-19 12:44:23 | INFO | train_inner | epoch 002: 2783 / 3002 loss=2.565, ppl=5.92, wps=5997.2, ups=0.09, wpb=64820, bsz=128, num_updates=5748, lr=9.9962e-05, gnorm=2.26, loss_scale=4, train_wall=10, gb_free=2.8, wall=65137
2021-06-19 12:44:34 | INFO | train_inner | epoch 002: 2784 / 3002 loss=2.791, ppl=6.92, wps=5960.5, ups=0.09, wpb=64811, bsz=128, num_updates=5749, lr=9.9962e-05, gnorm=2.355, loss_scale=4, train_wall=10, gb_free=2.8, wall=65148
2021-06-19 12:44:45 | INFO | train_inner | epoch 002: 2785 / 3002 loss=2.679, ppl=6.41, wps=5784.5, ups=0.09, wpb=64789, bsz=128, num_updates=5750, lr=9.9962e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=65159
2021-06-19 12:44:56 | INFO | train_inner | epoch 002: 2786 / 3002 loss=2.715, ppl=6.57, wps=5849.2, ups=0.09, wpb=64864, bsz=128, num_updates=5751, lr=9.9962e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=65170
2021-06-19 12:45:07 | INFO | train_inner | epoch 002: 2787 / 3002 loss=2.784, ppl=6.89, wps=5817.3, ups=0.09, wpb=64891, bsz=128, num_updates=5752, lr=9.9962e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=65181
2021-06-19 12:45:18 | INFO | train_inner | epoch 002: 2788 / 3002 loss=2.737, ppl=6.67, wps=5872.5, ups=0.09, wpb=64867, bsz=128, num_updates=5753, lr=9.9962e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=65193
2021-06-19 12:45:29 | INFO | train_inner | epoch 002: 2789 / 3002 loss=2.732, ppl=6.64, wps=5836.5, ups=0.09, wpb=64818, bsz=128, num_updates=5754, lr=9.9962e-05, gnorm=10.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=65204
2021-06-19 12:45:41 | INFO | train_inner | epoch 002: 2790 / 3002 loss=2.818, ppl=7.05, wps=5796.8, ups=0.09, wpb=64842, bsz=128, num_updates=5755, lr=9.9962e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=65215
2021-06-19 12:45:51 | INFO | train_inner | epoch 002: 2791 / 3002 loss=2.657, ppl=6.31, wps=5912.6, ups=0.09, wpb=64820, bsz=128, num_updates=5756, lr=9.99619e-05, gnorm=4.831, loss_scale=4, train_wall=11, gb_free=2.8, wall=65226
2021-06-19 12:46:03 | INFO | train_inner | epoch 002: 2792 / 3002 loss=2.592, ppl=6.03, wps=5854.1, ups=0.09, wpb=64828, bsz=128, num_updates=5757, lr=9.99619e-05, gnorm=2.136, loss_scale=4, train_wall=11, gb_free=2.8, wall=65237
2021-06-19 12:46:14 | INFO | train_inner | epoch 002: 2793 / 3002 loss=2.827, ppl=7.09, wps=5908.6, ups=0.09, wpb=64798, bsz=128, num_updates=5758, lr=9.99619e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=65248
2021-06-19 12:46:25 | INFO | train_inner | epoch 002: 2794 / 3002 loss=2.616, ppl=6.13, wps=5662.2, ups=0.09, wpb=64769, bsz=128, num_updates=5759, lr=9.99619e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=65259
2021-06-19 12:46:36 | INFO | train_inner | epoch 002: 2795 / 3002 loss=2.889, ppl=7.41, wps=5809.3, ups=0.09, wpb=64773, bsz=128, num_updates=5760, lr=9.99619e-05, gnorm=2.432, loss_scale=4, train_wall=11, gb_free=2.8, wall=65270
2021-06-19 12:46:47 | INFO | train_inner | epoch 002: 2796 / 3002 loss=2.721, ppl=6.59, wps=5928.5, ups=0.09, wpb=64938, bsz=128, num_updates=5761, lr=9.99619e-05, gnorm=2.106, loss_scale=4, train_wall=10, gb_free=2.8, wall=65281
2021-06-19 12:46:58 | INFO | train_inner | epoch 002: 2797 / 3002 loss=2.838, ppl=7.15, wps=5832.9, ups=0.09, wpb=64804, bsz=128, num_updates=5762, lr=9.99619e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=65292
2021-06-19 12:47:09 | INFO | train_inner | epoch 002: 2798 / 3002 loss=2.689, ppl=6.45, wps=5858.3, ups=0.09, wpb=64820, bsz=128, num_updates=5763, lr=9.99619e-05, gnorm=3.523, loss_scale=4, train_wall=11, gb_free=2.8, wall=65304
2021-06-19 12:47:20 | INFO | train_inner | epoch 002: 2799 / 3002 loss=2.606, ppl=6.09, wps=5820, ups=0.09, wpb=64819, bsz=128, num_updates=5764, lr=9.99619e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=65315
2021-06-19 12:47:31 | INFO | train_inner | epoch 002: 2800 / 3002 loss=2.74, ppl=6.68, wps=6033.3, ups=0.09, wpb=64877, bsz=128, num_updates=5765, lr=9.99619e-05, gnorm=2.231, loss_scale=4, train_wall=10, gb_free=2.8, wall=65325
2021-06-19 12:47:42 | INFO | train_inner | epoch 002: 2801 / 3002 loss=2.687, ppl=6.44, wps=6013.5, ups=0.09, wpb=64932, bsz=128, num_updates=5766, lr=9.99619e-05, gnorm=2.267, loss_scale=4, train_wall=10, gb_free=2.8, wall=65336
2021-06-19 12:47:53 | INFO | train_inner | epoch 002: 2802 / 3002 loss=2.66, ppl=6.32, wps=5742, ups=0.09, wpb=64713, bsz=128, num_updates=5767, lr=9.99619e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=65348
2021-06-19 12:48:04 | INFO | train_inner | epoch 002: 2803 / 3002 loss=2.738, ppl=6.67, wps=5965.1, ups=0.09, wpb=64778, bsz=128, num_updates=5768, lr=9.99619e-05, gnorm=2.258, loss_scale=4, train_wall=10, gb_free=2.8, wall=65358
2021-06-19 12:48:15 | INFO | train_inner | epoch 002: 2804 / 3002 loss=2.729, ppl=6.63, wps=5929.6, ups=0.09, wpb=64761, bsz=128, num_updates=5769, lr=9.99618e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=65369
2021-06-19 12:48:26 | INFO | train_inner | epoch 002: 2805 / 3002 loss=2.709, ppl=6.54, wps=5971.8, ups=0.09, wpb=64823, bsz=128, num_updates=5770, lr=9.99618e-05, gnorm=2.283, loss_scale=4, train_wall=10, gb_free=2.8, wall=65380
2021-06-19 12:48:37 | INFO | train_inner | epoch 002: 2806 / 3002 loss=2.735, ppl=6.66, wps=5934.3, ups=0.09, wpb=64927, bsz=128, num_updates=5771, lr=9.99618e-05, gnorm=2.511, loss_scale=4, train_wall=10, gb_free=2.8, wall=65391
2021-06-19 12:48:48 | INFO | train_inner | epoch 002: 2807 / 3002 loss=2.695, ppl=6.48, wps=5834.1, ups=0.09, wpb=64752, bsz=128, num_updates=5772, lr=9.99618e-05, gnorm=2.421, loss_scale=4, train_wall=11, gb_free=2.8, wall=65402
2021-06-19 12:48:59 | INFO | train_inner | epoch 002: 2808 / 3002 loss=2.75, ppl=6.73, wps=5756.2, ups=0.09, wpb=64818, bsz=128, num_updates=5773, lr=9.99618e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=65413
2021-06-19 12:49:10 | INFO | train_inner | epoch 002: 2809 / 3002 loss=2.573, ppl=5.95, wps=5868.6, ups=0.09, wpb=64802, bsz=128, num_updates=5774, lr=9.99618e-05, gnorm=2.117, loss_scale=4, train_wall=11, gb_free=2.8, wall=65424
2021-06-19 12:49:21 | INFO | train_inner | epoch 002: 2810 / 3002 loss=2.797, ppl=6.95, wps=5891, ups=0.09, wpb=64794, bsz=128, num_updates=5775, lr=9.99618e-05, gnorm=2.334, loss_scale=4, train_wall=11, gb_free=2.8, wall=65435
2021-06-19 12:49:32 | INFO | train_inner | epoch 002: 2811 / 3002 loss=2.676, ppl=6.39, wps=5873, ups=0.09, wpb=64869, bsz=128, num_updates=5776, lr=9.99618e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=65447
2021-06-19 12:49:43 | INFO | train_inner | epoch 002: 2812 / 3002 loss=2.686, ppl=6.44, wps=5819.6, ups=0.09, wpb=64781, bsz=128, num_updates=5777, lr=9.99618e-05, gnorm=2.163, loss_scale=4, train_wall=11, gb_free=2.8, wall=65458
2021-06-19 12:49:55 | INFO | train_inner | epoch 002: 2813 / 3002 loss=2.818, ppl=7.05, wps=5751.7, ups=0.09, wpb=64780, bsz=128, num_updates=5778, lr=9.99618e-05, gnorm=2.184, loss_scale=4, train_wall=11, gb_free=2.8, wall=65469
2021-06-19 12:50:06 | INFO | train_inner | epoch 002: 2814 / 3002 loss=2.863, ppl=7.27, wps=5884.1, ups=0.09, wpb=64912, bsz=128, num_updates=5779, lr=9.99618e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=65480
2021-06-19 12:50:17 | INFO | train_inner | epoch 002: 2815 / 3002 loss=2.676, ppl=6.39, wps=5774.2, ups=0.09, wpb=64885, bsz=128, num_updates=5780, lr=9.99618e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=65491
2021-06-19 12:50:28 | INFO | train_inner | epoch 002: 2816 / 3002 loss=2.628, ppl=6.18, wps=5831.5, ups=0.09, wpb=64899, bsz=128, num_updates=5781, lr=9.99617e-05, gnorm=2.263, loss_scale=4, train_wall=11, gb_free=2.8, wall=65502
2021-06-19 12:50:39 | INFO | train_inner | epoch 002: 2817 / 3002 loss=2.635, ppl=6.21, wps=5766.5, ups=0.09, wpb=64811, bsz=128, num_updates=5782, lr=9.99617e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=65514
2021-06-19 12:50:50 | INFO | train_inner | epoch 002: 2818 / 3002 loss=2.746, ppl=6.71, wps=5884.6, ups=0.09, wpb=64849, bsz=128, num_updates=5783, lr=9.99617e-05, gnorm=2.764, loss_scale=4, train_wall=11, gb_free=2.8, wall=65525
2021-06-19 12:51:01 | INFO | train_inner | epoch 002: 2819 / 3002 loss=2.789, ppl=6.91, wps=5889.5, ups=0.09, wpb=64837, bsz=128, num_updates=5784, lr=9.99617e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=65536
2021-06-19 12:51:12 | INFO | train_inner | epoch 002: 2820 / 3002 loss=2.883, ppl=7.38, wps=5863.1, ups=0.09, wpb=64792, bsz=128, num_updates=5785, lr=9.99617e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=65547
2021-06-19 12:51:23 | INFO | train_inner | epoch 002: 2821 / 3002 loss=2.713, ppl=6.56, wps=5845.4, ups=0.09, wpb=64833, bsz=128, num_updates=5786, lr=9.99617e-05, gnorm=2.507, loss_scale=4, train_wall=11, gb_free=2.8, wall=65558
2021-06-19 12:51:34 | INFO | train_inner | epoch 002: 2822 / 3002 loss=2.673, ppl=6.38, wps=5881, ups=0.09, wpb=64830, bsz=128, num_updates=5787, lr=9.99617e-05, gnorm=4.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=65569
2021-06-19 12:51:46 | INFO | train_inner | epoch 002: 2823 / 3002 loss=2.658, ppl=6.31, wps=5839, ups=0.09, wpb=64872, bsz=128, num_updates=5788, lr=9.99617e-05, gnorm=2.289, loss_scale=4, train_wall=11, gb_free=2.8, wall=65580
2021-06-19 12:51:57 | INFO | train_inner | epoch 002: 2824 / 3002 loss=2.677, ppl=6.39, wps=5875.4, ups=0.09, wpb=64848, bsz=128, num_updates=5789, lr=9.99617e-05, gnorm=7.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=65591
2021-06-19 12:52:08 | INFO | train_inner | epoch 002: 2825 / 3002 loss=2.792, ppl=6.92, wps=5804.8, ups=0.09, wpb=64682, bsz=128, num_updates=5790, lr=9.99617e-05, gnorm=2.52, loss_scale=4, train_wall=11, gb_free=2.8, wall=65602
2021-06-19 12:52:19 | INFO | train_inner | epoch 002: 2826 / 3002 loss=2.582, ppl=5.99, wps=5762.8, ups=0.09, wpb=64830, bsz=128, num_updates=5791, lr=9.99617e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=65613
2021-06-19 12:52:30 | INFO | train_inner | epoch 002: 2827 / 3002 loss=2.835, ppl=7.14, wps=5875.6, ups=0.09, wpb=64833, bsz=128, num_updates=5792, lr=9.99617e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=65624
2021-06-19 12:52:41 | INFO | train_inner | epoch 002: 2828 / 3002 loss=2.56, ppl=5.9, wps=5816, ups=0.09, wpb=64869, bsz=128, num_updates=5793, lr=9.99617e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=65636
2021-06-19 12:52:52 | INFO | train_inner | epoch 002: 2829 / 3002 loss=2.69, ppl=6.45, wps=5884.9, ups=0.09, wpb=64804, bsz=128, num_updates=5794, lr=9.99616e-05, gnorm=2.295, loss_scale=4, train_wall=11, gb_free=2.8, wall=65647
2021-06-19 12:53:03 | INFO | train_inner | epoch 002: 2830 / 3002 loss=2.546, ppl=5.84, wps=5826.4, ups=0.09, wpb=64828, bsz=128, num_updates=5795, lr=9.99616e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=65658
2021-06-19 12:53:14 | INFO | train_inner | epoch 002: 2831 / 3002 loss=2.63, ppl=6.19, wps=5922.2, ups=0.09, wpb=64793, bsz=128, num_updates=5796, lr=9.99616e-05, gnorm=2.11, loss_scale=8, train_wall=10, gb_free=2.8, wall=65669
2021-06-19 12:53:25 | INFO | train_inner | epoch 002: 2832 / 3002 loss=2.733, ppl=6.65, wps=5776.4, ups=0.09, wpb=64773, bsz=128, num_updates=5797, lr=9.99616e-05, gnorm=4.661, loss_scale=8, train_wall=11, gb_free=2.8, wall=65680
2021-06-19 12:53:37 | INFO | train_inner | epoch 002: 2833 / 3002 loss=2.716, ppl=6.57, wps=5802.9, ups=0.09, wpb=64827, bsz=128, num_updates=5798, lr=9.99616e-05, gnorm=2.206, loss_scale=8, train_wall=11, gb_free=2.8, wall=65691
2021-06-19 12:53:48 | INFO | train_inner | epoch 002: 2834 / 3002 loss=2.632, ppl=6.2, wps=5923.9, ups=0.09, wpb=64827, bsz=128, num_updates=5799, lr=9.99616e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=65702
2021-06-19 12:53:59 | INFO | train_inner | epoch 002: 2835 / 3002 loss=2.825, ppl=7.09, wps=5834.5, ups=0.09, wpb=64758, bsz=128, num_updates=5800, lr=9.99616e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=65713
2021-06-19 12:54:10 | INFO | train_inner | epoch 002: 2836 / 3002 loss=2.705, ppl=6.52, wps=5833.8, ups=0.09, wpb=64771, bsz=128, num_updates=5801, lr=9.99616e-05, gnorm=2.213, loss_scale=8, train_wall=11, gb_free=2.8, wall=65724
2021-06-19 12:54:21 | INFO | train_inner | epoch 002: 2837 / 3002 loss=2.669, ppl=6.36, wps=5874.5, ups=0.09, wpb=64771, bsz=128, num_updates=5802, lr=9.99616e-05, gnorm=2.304, loss_scale=8, train_wall=11, gb_free=2.8, wall=65735
2021-06-19 12:54:32 | INFO | train_inner | epoch 002: 2838 / 3002 loss=2.584, ppl=5.99, wps=5798.2, ups=0.09, wpb=64858, bsz=128, num_updates=5803, lr=9.99616e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=65746
2021-06-19 12:54:43 | INFO | train_inner | epoch 002: 2839 / 3002 loss=2.702, ppl=6.51, wps=5803.6, ups=0.09, wpb=64779, bsz=128, num_updates=5804, lr=9.99616e-05, gnorm=2.368, loss_scale=8, train_wall=11, gb_free=2.8, wall=65757
2021-06-19 12:54:54 | INFO | train_inner | epoch 002: 2840 / 3002 loss=2.764, ppl=6.79, wps=5784.7, ups=0.09, wpb=64792, bsz=128, num_updates=5805, lr=9.99616e-05, gnorm=3.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=65769
2021-06-19 12:55:05 | INFO | train_inner | epoch 002: 2841 / 3002 loss=2.69, ppl=6.45, wps=5925.7, ups=0.09, wpb=64833, bsz=128, num_updates=5806, lr=9.99615e-05, gnorm=2.249, loss_scale=8, train_wall=10, gb_free=2.8, wall=65780
2021-06-19 12:55:16 | INFO | train_inner | epoch 002: 2842 / 3002 loss=2.638, ppl=6.22, wps=5899.4, ups=0.09, wpb=64929, bsz=128, num_updates=5807, lr=9.99615e-05, gnorm=2.397, loss_scale=8, train_wall=11, gb_free=2.8, wall=65791
2021-06-19 12:55:27 | INFO | train_inner | epoch 002: 2843 / 3002 loss=2.712, ppl=6.55, wps=5841.6, ups=0.09, wpb=64805, bsz=128, num_updates=5808, lr=9.99615e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=65802
2021-06-19 12:55:38 | INFO | train_inner | epoch 002: 2844 / 3002 loss=2.626, ppl=6.17, wps=5883, ups=0.09, wpb=64917, bsz=128, num_updates=5809, lr=9.99615e-05, gnorm=2.173, loss_scale=8, train_wall=11, gb_free=2.8, wall=65813
2021-06-19 12:55:50 | INFO | train_inner | epoch 002: 2845 / 3002 loss=2.929, ppl=7.62, wps=5805.5, ups=0.09, wpb=64721, bsz=128, num_updates=5810, lr=9.99615e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=65824
2021-06-19 12:56:01 | INFO | train_inner | epoch 002: 2846 / 3002 loss=2.61, ppl=6.11, wps=5753.9, ups=0.09, wpb=64827, bsz=128, num_updates=5811, lr=9.99615e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=65835
2021-06-19 12:56:12 | INFO | train_inner | epoch 002: 2847 / 3002 loss=2.824, ppl=7.08, wps=5874.4, ups=0.09, wpb=64831, bsz=128, num_updates=5812, lr=9.99615e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=65846
2021-06-19 12:56:23 | INFO | train_inner | epoch 002: 2848 / 3002 loss=2.762, ppl=6.78, wps=5823.3, ups=0.09, wpb=64812, bsz=128, num_updates=5813, lr=9.99615e-05, gnorm=2.214, loss_scale=8, train_wall=11, gb_free=2.8, wall=65857
2021-06-19 12:56:34 | INFO | train_inner | epoch 002: 2849 / 3002 loss=2.577, ppl=5.96, wps=5805.9, ups=0.09, wpb=64885, bsz=128, num_updates=5814, lr=9.99615e-05, gnorm=2.336, loss_scale=8, train_wall=11, gb_free=2.8, wall=65869
2021-06-19 12:56:45 | INFO | train_inner | epoch 002: 2850 / 3002 loss=2.764, ppl=6.79, wps=5904.9, ups=0.09, wpb=64858, bsz=128, num_updates=5815, lr=9.99615e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=65880
2021-06-19 12:56:56 | INFO | train_inner | epoch 002: 2851 / 3002 loss=2.846, ppl=7.19, wps=5764.9, ups=0.09, wpb=64729, bsz=128, num_updates=5816, lr=9.99615e-05, gnorm=2.559, loss_scale=8, train_wall=11, gb_free=2.8, wall=65891
2021-06-19 12:57:08 | INFO | train_inner | epoch 002: 2852 / 3002 loss=2.89, ppl=7.41, wps=5843.7, ups=0.09, wpb=64948, bsz=128, num_updates=5817, lr=9.99615e-05, gnorm=2.177, loss_scale=8, train_wall=11, gb_free=2.8, wall=65902
2021-06-19 12:57:18 | INFO | train_inner | epoch 002: 2853 / 3002 loss=2.882, ppl=7.37, wps=5932.4, ups=0.09, wpb=64834, bsz=128, num_updates=5818, lr=9.99615e-05, gnorm=2.126, loss_scale=8, train_wall=10, gb_free=2.8, wall=65913
2021-06-19 12:57:30 | INFO | train_inner | epoch 002: 2854 / 3002 loss=2.783, ppl=6.88, wps=5824.8, ups=0.09, wpb=64890, bsz=128, num_updates=5819, lr=9.99614e-05, gnorm=2.134, loss_scale=8, train_wall=11, gb_free=2.8, wall=65924
2021-06-19 12:57:41 | INFO | train_inner | epoch 002: 2855 / 3002 loss=2.723, ppl=6.6, wps=5907.5, ups=0.09, wpb=64803, bsz=128, num_updates=5820, lr=9.99614e-05, gnorm=2.187, loss_scale=8, train_wall=10, gb_free=2.8, wall=65935
2021-06-19 12:57:51 | INFO | train_inner | epoch 002: 2856 / 3002 loss=2.727, ppl=6.62, wps=5966.1, ups=0.09, wpb=64831, bsz=128, num_updates=5821, lr=9.99614e-05, gnorm=2.134, loss_scale=8, train_wall=10, gb_free=2.8, wall=65946
2021-06-19 12:58:02 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 12:58:14 | INFO | train_inner | epoch 002: 2858 / 3002 loss=2.746, ppl=6.71, wps=2929.3, ups=0.05, wpb=64786, bsz=128, num_updates=5822, lr=9.99614e-05, gnorm=2.631, loss_scale=4, train_wall=21, gb_free=2.8, wall=65968
2021-06-19 12:58:25 | INFO | train_inner | epoch 002: 2859 / 3002 loss=2.578, ppl=5.97, wps=5889.7, ups=0.09, wpb=64878, bsz=128, num_updates=5823, lr=9.99614e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=65979
2021-06-19 12:58:35 | INFO | train_inner | epoch 002: 2860 / 3002 loss=2.517, ppl=5.72, wps=5933.1, ups=0.09, wpb=64833, bsz=128, num_updates=5824, lr=9.99614e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=65990
2021-06-19 12:58:47 | INFO | train_inner | epoch 002: 2861 / 3002 loss=2.646, ppl=6.26, wps=5750.5, ups=0.09, wpb=64696, bsz=128, num_updates=5825, lr=9.99614e-05, gnorm=2.465, loss_scale=4, train_wall=11, gb_free=2.8, wall=66001
2021-06-19 12:58:58 | INFO | train_inner | epoch 002: 2862 / 3002 loss=2.815, ppl=7.04, wps=5810, ups=0.09, wpb=64871, bsz=128, num_updates=5826, lr=9.99614e-05, gnorm=2.787, loss_scale=4, train_wall=11, gb_free=2.8, wall=66012
2021-06-19 12:59:09 | INFO | train_inner | epoch 002: 2863 / 3002 loss=2.626, ppl=6.17, wps=5765.5, ups=0.09, wpb=64727, bsz=128, num_updates=5827, lr=9.99614e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=66023
2021-06-19 12:59:20 | INFO | train_inner | epoch 002: 2864 / 3002 loss=2.619, ppl=6.14, wps=5865.3, ups=0.09, wpb=64866, bsz=128, num_updates=5828, lr=9.99614e-05, gnorm=2.111, loss_scale=4, train_wall=11, gb_free=2.8, wall=66035
2021-06-19 12:59:31 | INFO | train_inner | epoch 002: 2865 / 3002 loss=2.755, ppl=6.75, wps=5792.4, ups=0.09, wpb=64815, bsz=128, num_updates=5829, lr=9.99614e-05, gnorm=3.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=66046
2021-06-19 12:59:43 | INFO | train_inner | epoch 002: 2866 / 3002 loss=2.527, ppl=5.77, wps=5748.8, ups=0.09, wpb=64877, bsz=128, num_updates=5830, lr=9.99614e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=66057
2021-06-19 12:59:54 | INFO | train_inner | epoch 002: 2867 / 3002 loss=2.729, ppl=6.63, wps=5890.2, ups=0.09, wpb=64874, bsz=128, num_updates=5831, lr=9.99613e-05, gnorm=2.503, loss_scale=4, train_wall=11, gb_free=2.8, wall=66068
2021-06-19 13:00:05 | INFO | train_inner | epoch 002: 2868 / 3002 loss=2.723, ppl=6.6, wps=5764.8, ups=0.09, wpb=64817, bsz=128, num_updates=5832, lr=9.99613e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=66079
2021-06-19 13:00:16 | INFO | train_inner | epoch 002: 2869 / 3002 loss=2.807, ppl=7, wps=5863.3, ups=0.09, wpb=64824, bsz=128, num_updates=5833, lr=9.99613e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=66090
2021-06-19 13:00:27 | INFO | train_inner | epoch 002: 2870 / 3002 loss=2.675, ppl=6.38, wps=5827.7, ups=0.09, wpb=64873, bsz=128, num_updates=5834, lr=9.99613e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=66101
2021-06-19 13:00:38 | INFO | train_inner | epoch 002: 2871 / 3002 loss=2.721, ppl=6.59, wps=5815.9, ups=0.09, wpb=64813, bsz=128, num_updates=5835, lr=9.99613e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=66113
2021-06-19 13:00:49 | INFO | train_inner | epoch 002: 2872 / 3002 loss=2.993, ppl=7.96, wps=5830.3, ups=0.09, wpb=64734, bsz=128, num_updates=5836, lr=9.99613e-05, gnorm=2.401, loss_scale=4, train_wall=11, gb_free=2.8, wall=66124
2021-06-19 13:01:00 | INFO | train_inner | epoch 002: 2873 / 3002 loss=2.678, ppl=6.4, wps=5872.6, ups=0.09, wpb=64844, bsz=128, num_updates=5837, lr=9.99613e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=66135
2021-06-19 13:01:12 | INFO | train_inner | epoch 002: 2874 / 3002 loss=2.769, ppl=6.82, wps=5833.4, ups=0.09, wpb=64819, bsz=128, num_updates=5838, lr=9.99613e-05, gnorm=2.146, loss_scale=4, train_wall=11, gb_free=2.8, wall=66146
2021-06-19 13:01:23 | INFO | train_inner | epoch 002: 2875 / 3002 loss=2.684, ppl=6.43, wps=5801.3, ups=0.09, wpb=64860, bsz=128, num_updates=5839, lr=9.99613e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=66157
2021-06-19 13:01:34 | INFO | train_inner | epoch 002: 2876 / 3002 loss=2.851, ppl=7.21, wps=5765.4, ups=0.09, wpb=64764, bsz=128, num_updates=5840, lr=9.99613e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=66168
2021-06-19 13:01:45 | INFO | train_inner | epoch 002: 2877 / 3002 loss=2.799, ppl=6.96, wps=5853.9, ups=0.09, wpb=64828, bsz=128, num_updates=5841, lr=9.99613e-05, gnorm=2.218, loss_scale=4, train_wall=11, gb_free=2.8, wall=66179
2021-06-19 13:01:56 | INFO | train_inner | epoch 002: 2878 / 3002 loss=2.877, ppl=7.35, wps=5720, ups=0.09, wpb=64863, bsz=128, num_updates=5842, lr=9.99613e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=66191
2021-06-19 13:02:07 | INFO | train_inner | epoch 002: 2879 / 3002 loss=2.707, ppl=6.53, wps=5834.8, ups=0.09, wpb=64776, bsz=128, num_updates=5843, lr=9.99613e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=66202
2021-06-19 13:02:18 | INFO | train_inner | epoch 002: 2880 / 3002 loss=2.622, ppl=6.16, wps=5900.2, ups=0.09, wpb=64862, bsz=128, num_updates=5844, lr=9.99612e-05, gnorm=4.497, loss_scale=4, train_wall=10, gb_free=2.8, wall=66213
2021-06-19 13:02:30 | INFO | train_inner | epoch 002: 2881 / 3002 loss=2.812, ppl=7.02, wps=5740.3, ups=0.09, wpb=64797, bsz=128, num_updates=5845, lr=9.99612e-05, gnorm=5.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=66224
2021-06-19 13:02:41 | INFO | train_inner | epoch 002: 2882 / 3002 loss=2.648, ppl=6.27, wps=5782.9, ups=0.09, wpb=64760, bsz=128, num_updates=5846, lr=9.99612e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=66235
2021-06-19 13:02:52 | INFO | train_inner | epoch 002: 2883 / 3002 loss=2.61, ppl=6.11, wps=5770.5, ups=0.09, wpb=64851, bsz=128, num_updates=5847, lr=9.99612e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=66246
2021-06-19 13:03:03 | INFO | train_inner | epoch 002: 2884 / 3002 loss=2.798, ppl=6.96, wps=6006.6, ups=0.09, wpb=64866, bsz=128, num_updates=5848, lr=9.99612e-05, gnorm=2.296, loss_scale=4, train_wall=10, gb_free=2.8, wall=66257
2021-06-19 13:03:14 | INFO | train_inner | epoch 002: 2885 / 3002 loss=2.577, ppl=5.97, wps=5841.4, ups=0.09, wpb=64825, bsz=128, num_updates=5849, lr=9.99612e-05, gnorm=2.173, loss_scale=4, train_wall=11, gb_free=2.8, wall=66268
2021-06-19 13:03:25 | INFO | train_inner | epoch 002: 2886 / 3002 loss=2.649, ppl=6.27, wps=5848.2, ups=0.09, wpb=64877, bsz=128, num_updates=5850, lr=9.99612e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=66279
2021-06-19 13:03:36 | INFO | train_inner | epoch 002: 2887 / 3002 loss=2.635, ppl=6.21, wps=5736.3, ups=0.09, wpb=64788, bsz=128, num_updates=5851, lr=9.99612e-05, gnorm=6.702, loss_scale=4, train_wall=11, gb_free=2.8, wall=66291
2021-06-19 13:03:48 | INFO | train_inner | epoch 002: 2888 / 3002 loss=2.762, ppl=6.78, wps=5841.9, ups=0.09, wpb=64841, bsz=128, num_updates=5852, lr=9.99612e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=66302
2021-06-19 13:03:58 | INFO | train_inner | epoch 002: 2889 / 3002 loss=2.837, ppl=7.14, wps=5925.5, ups=0.09, wpb=64863, bsz=128, num_updates=5853, lr=9.99612e-05, gnorm=5.563, loss_scale=4, train_wall=10, gb_free=2.8, wall=66313
2021-06-19 13:04:10 | INFO | train_inner | epoch 002: 2890 / 3002 loss=2.722, ppl=6.6, wps=5804, ups=0.09, wpb=64833, bsz=128, num_updates=5854, lr=9.99612e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=66324
2021-06-19 13:04:21 | INFO | train_inner | epoch 002: 2891 / 3002 loss=2.672, ppl=6.37, wps=5837.5, ups=0.09, wpb=64834, bsz=128, num_updates=5855, lr=9.99612e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=66335
2021-06-19 13:04:32 | INFO | train_inner | epoch 002: 2892 / 3002 loss=2.775, ppl=6.84, wps=5838.8, ups=0.09, wpb=64893, bsz=128, num_updates=5856, lr=9.99611e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=66346
2021-06-19 13:04:43 | INFO | train_inner | epoch 002: 2893 / 3002 loss=2.562, ppl=5.91, wps=6004.6, ups=0.09, wpb=64811, bsz=128, num_updates=5857, lr=9.99611e-05, gnorm=2.14, loss_scale=4, train_wall=10, gb_free=2.8, wall=66357
2021-06-19 13:04:54 | INFO | train_inner | epoch 002: 2894 / 3002 loss=2.801, ppl=6.97, wps=5890.8, ups=0.09, wpb=64757, bsz=128, num_updates=5858, lr=9.99611e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=66368
2021-06-19 13:05:05 | INFO | train_inner | epoch 002: 2895 / 3002 loss=2.666, ppl=6.35, wps=5685.8, ups=0.09, wpb=64825, bsz=128, num_updates=5859, lr=9.99611e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=66379
2021-06-19 13:05:16 | INFO | train_inner | epoch 002: 2896 / 3002 loss=2.882, ppl=7.37, wps=5927.9, ups=0.09, wpb=64839, bsz=128, num_updates=5860, lr=9.99611e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=66390
2021-06-19 13:05:27 | INFO | train_inner | epoch 002: 2897 / 3002 loss=2.792, ppl=6.92, wps=5858.2, ups=0.09, wpb=64906, bsz=128, num_updates=5861, lr=9.99611e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=66401
2021-06-19 13:05:38 | INFO | train_inner | epoch 002: 2898 / 3002 loss=2.665, ppl=6.34, wps=5874.7, ups=0.09, wpb=64855, bsz=128, num_updates=5862, lr=9.99611e-05, gnorm=6.676, loss_scale=4, train_wall=11, gb_free=2.8, wall=66412
2021-06-19 13:05:49 | INFO | train_inner | epoch 002: 2899 / 3002 loss=2.611, ppl=6.11, wps=5887.6, ups=0.09, wpb=64807, bsz=128, num_updates=5863, lr=9.99611e-05, gnorm=2.246, loss_scale=4, train_wall=11, gb_free=2.8, wall=66423
2021-06-19 13:06:00 | INFO | train_inner | epoch 002: 2900 / 3002 loss=2.794, ppl=6.93, wps=5776.5, ups=0.09, wpb=64835, bsz=128, num_updates=5864, lr=9.99611e-05, gnorm=2.687, loss_scale=4, train_wall=11, gb_free=2.8, wall=66435
2021-06-19 13:06:11 | INFO | train_inner | epoch 002: 2901 / 3002 loss=2.812, ppl=7.02, wps=5890.1, ups=0.09, wpb=64776, bsz=128, num_updates=5865, lr=9.99611e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=66446
2021-06-19 13:06:22 | INFO | train_inner | epoch 002: 2902 / 3002 loss=2.75, ppl=6.73, wps=5940.2, ups=0.09, wpb=64930, bsz=128, num_updates=5866, lr=9.99611e-05, gnorm=2.283, loss_scale=4, train_wall=10, gb_free=2.8, wall=66457
2021-06-19 13:06:33 | INFO | train_inner | epoch 002: 2903 / 3002 loss=2.714, ppl=6.56, wps=5882.9, ups=0.09, wpb=64784, bsz=128, num_updates=5867, lr=9.99611e-05, gnorm=2.388, loss_scale=4, train_wall=11, gb_free=2.8, wall=66468
2021-06-19 13:06:44 | INFO | train_inner | epoch 002: 2904 / 3002 loss=2.647, ppl=6.26, wps=5919.7, ups=0.09, wpb=64769, bsz=128, num_updates=5868, lr=9.99611e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=66479
2021-06-19 13:06:55 | INFO | train_inner | epoch 002: 2905 / 3002 loss=2.638, ppl=6.23, wps=5817.9, ups=0.09, wpb=64801, bsz=128, num_updates=5869, lr=9.9961e-05, gnorm=2.406, loss_scale=4, train_wall=11, gb_free=2.8, wall=66490
2021-06-19 13:07:06 | INFO | train_inner | epoch 002: 2906 / 3002 loss=2.738, ppl=6.67, wps=5921.8, ups=0.09, wpb=64834, bsz=128, num_updates=5870, lr=9.9961e-05, gnorm=2.424, loss_scale=4, train_wall=11, gb_free=2.8, wall=66501
2021-06-19 13:07:17 | INFO | train_inner | epoch 002: 2907 / 3002 loss=2.692, ppl=6.46, wps=5850.8, ups=0.09, wpb=64825, bsz=128, num_updates=5871, lr=9.9961e-05, gnorm=2.319, loss_scale=4, train_wall=11, gb_free=2.8, wall=66512
2021-06-19 13:07:29 | INFO | train_inner | epoch 002: 2908 / 3002 loss=2.567, ppl=5.92, wps=5829.6, ups=0.09, wpb=64807, bsz=128, num_updates=5872, lr=9.9961e-05, gnorm=2.258, loss_scale=4, train_wall=11, gb_free=2.8, wall=66523
2021-06-19 13:07:40 | INFO | train_inner | epoch 002: 2909 / 3002 loss=2.616, ppl=6.13, wps=5802.4, ups=0.09, wpb=64832, bsz=128, num_updates=5873, lr=9.9961e-05, gnorm=2.282, loss_scale=4, train_wall=11, gb_free=2.8, wall=66534
2021-06-19 13:07:51 | INFO | train_inner | epoch 002: 2910 / 3002 loss=2.727, ppl=6.62, wps=5864.2, ups=0.09, wpb=64872, bsz=128, num_updates=5874, lr=9.9961e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=66545
2021-06-19 13:08:02 | INFO | train_inner | epoch 002: 2911 / 3002 loss=2.538, ppl=5.81, wps=5909, ups=0.09, wpb=64825, bsz=128, num_updates=5875, lr=9.9961e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=66556
2021-06-19 13:08:13 | INFO | train_inner | epoch 002: 2912 / 3002 loss=2.759, ppl=6.77, wps=5829.3, ups=0.09, wpb=64798, bsz=128, num_updates=5876, lr=9.9961e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=66567
2021-06-19 13:08:24 | INFO | train_inner | epoch 002: 2913 / 3002 loss=2.687, ppl=6.44, wps=5873.2, ups=0.09, wpb=64892, bsz=128, num_updates=5877, lr=9.9961e-05, gnorm=2.286, loss_scale=4, train_wall=11, gb_free=2.8, wall=66578
2021-06-19 13:08:35 | INFO | train_inner | epoch 002: 2914 / 3002 loss=2.693, ppl=6.47, wps=5806.1, ups=0.09, wpb=64772, bsz=128, num_updates=5878, lr=9.9961e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=66589
2021-06-19 13:08:46 | INFO | train_inner | epoch 002: 2915 / 3002 loss=2.734, ppl=6.65, wps=5856.5, ups=0.09, wpb=64851, bsz=128, num_updates=5879, lr=9.9961e-05, gnorm=3.304, loss_scale=4, train_wall=11, gb_free=2.8, wall=66600
2021-06-19 13:08:57 | INFO | train_inner | epoch 002: 2916 / 3002 loss=2.696, ppl=6.48, wps=5887.9, ups=0.09, wpb=64879, bsz=128, num_updates=5880, lr=9.9961e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=66611
2021-06-19 13:09:08 | INFO | train_inner | epoch 002: 2917 / 3002 loss=2.685, ppl=6.43, wps=5835.6, ups=0.09, wpb=64843, bsz=128, num_updates=5881, lr=9.99609e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=66623
2021-06-19 13:09:19 | INFO | train_inner | epoch 002: 2918 / 3002 loss=2.8, ppl=6.97, wps=5950.7, ups=0.09, wpb=64875, bsz=128, num_updates=5882, lr=9.99609e-05, gnorm=2.549, loss_scale=4, train_wall=10, gb_free=2.8, wall=66634
2021-06-19 13:09:30 | INFO | train_inner | epoch 002: 2919 / 3002 loss=2.716, ppl=6.57, wps=5825, ups=0.09, wpb=64829, bsz=128, num_updates=5883, lr=9.99609e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=66645
2021-06-19 13:09:41 | INFO | train_inner | epoch 002: 2920 / 3002 loss=2.631, ppl=6.19, wps=5977.4, ups=0.09, wpb=64908, bsz=128, num_updates=5884, lr=9.99609e-05, gnorm=8.935, loss_scale=4, train_wall=10, gb_free=2.8, wall=66655
2021-06-19 13:09:52 | INFO | train_inner | epoch 002: 2921 / 3002 loss=2.562, ppl=5.9, wps=5844.7, ups=0.09, wpb=64780, bsz=128, num_updates=5885, lr=9.99609e-05, gnorm=2.104, loss_scale=4, train_wall=11, gb_free=2.8, wall=66667
2021-06-19 13:10:03 | INFO | train_inner | epoch 002: 2922 / 3002 loss=2.658, ppl=6.31, wps=5925.4, ups=0.09, wpb=64774, bsz=128, num_updates=5886, lr=9.99609e-05, gnorm=2.715, loss_scale=4, train_wall=10, gb_free=2.8, wall=66678
2021-06-19 13:10:14 | INFO | train_inner | epoch 002: 2923 / 3002 loss=2.697, ppl=6.48, wps=5797.5, ups=0.09, wpb=64834, bsz=128, num_updates=5887, lr=9.99609e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=66689
2021-06-19 13:10:25 | INFO | train_inner | epoch 002: 2924 / 3002 loss=2.772, ppl=6.83, wps=5831.7, ups=0.09, wpb=64834, bsz=128, num_updates=5888, lr=9.99609e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=66700
2021-06-19 13:10:37 | INFO | train_inner | epoch 002: 2925 / 3002 loss=2.636, ppl=6.21, wps=5835.4, ups=0.09, wpb=64779, bsz=128, num_updates=5889, lr=9.99609e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=66711
2021-06-19 13:10:48 | INFO | train_inner | epoch 002: 2926 / 3002 loss=2.772, ppl=6.83, wps=5853.1, ups=0.09, wpb=64827, bsz=128, num_updates=5890, lr=9.99609e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=66722
2021-06-19 13:10:59 | INFO | train_inner | epoch 002: 2927 / 3002 loss=2.686, ppl=6.44, wps=5769.2, ups=0.09, wpb=64796, bsz=128, num_updates=5891, lr=9.99609e-05, gnorm=2.2, loss_scale=4, train_wall=11, gb_free=2.8, wall=66733
2021-06-19 13:11:10 | INFO | train_inner | epoch 002: 2928 / 3002 loss=2.735, ppl=6.66, wps=5779.1, ups=0.09, wpb=64884, bsz=128, num_updates=5892, lr=9.99609e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=66744
2021-06-19 13:11:21 | INFO | train_inner | epoch 002: 2929 / 3002 loss=2.686, ppl=6.43, wps=5898.6, ups=0.09, wpb=64876, bsz=128, num_updates=5893, lr=9.99609e-05, gnorm=2.175, loss_scale=4, train_wall=11, gb_free=2.8, wall=66755
2021-06-19 13:11:32 | INFO | train_inner | epoch 002: 2930 / 3002 loss=2.609, ppl=6.1, wps=5925, ups=0.09, wpb=64926, bsz=128, num_updates=5894, lr=9.99608e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=66766
2021-06-19 13:11:43 | INFO | train_inner | epoch 002: 2931 / 3002 loss=2.826, ppl=7.09, wps=5840.4, ups=0.09, wpb=64859, bsz=128, num_updates=5895, lr=9.99608e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=66778
2021-06-19 13:11:54 | INFO | train_inner | epoch 002: 2932 / 3002 loss=2.729, ppl=6.63, wps=5856.8, ups=0.09, wpb=64858, bsz=128, num_updates=5896, lr=9.99608e-05, gnorm=2.508, loss_scale=4, train_wall=11, gb_free=2.8, wall=66789
2021-06-19 13:12:05 | INFO | train_inner | epoch 002: 2933 / 3002 loss=2.799, ppl=6.96, wps=5944.6, ups=0.09, wpb=64945, bsz=128, num_updates=5897, lr=9.99608e-05, gnorm=2.194, loss_scale=4, train_wall=10, gb_free=2.8, wall=66800
2021-06-19 13:12:16 | INFO | train_inner | epoch 002: 2934 / 3002 loss=2.729, ppl=6.63, wps=5819.3, ups=0.09, wpb=64872, bsz=128, num_updates=5898, lr=9.99608e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=66811
2021-06-19 13:12:27 | INFO | train_inner | epoch 002: 2935 / 3002 loss=2.67, ppl=6.36, wps=5827.2, ups=0.09, wpb=64858, bsz=128, num_updates=5899, lr=9.99608e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=66822
2021-06-19 13:12:38 | INFO | train_inner | epoch 002: 2936 / 3002 loss=2.64, ppl=6.23, wps=5947.8, ups=0.09, wpb=64811, bsz=128, num_updates=5900, lr=9.99608e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=66833
2021-06-19 13:12:49 | INFO | train_inner | epoch 002: 2937 / 3002 loss=2.804, ppl=6.98, wps=5971.9, ups=0.09, wpb=64832, bsz=128, num_updates=5901, lr=9.99608e-05, gnorm=13.585, loss_scale=4, train_wall=10, gb_free=2.8, wall=66844
2021-06-19 13:13:00 | INFO | train_inner | epoch 002: 2938 / 3002 loss=2.699, ppl=6.49, wps=5840.6, ups=0.09, wpb=64805, bsz=128, num_updates=5902, lr=9.99608e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=66855
2021-06-19 13:13:11 | INFO | train_inner | epoch 002: 2939 / 3002 loss=2.681, ppl=6.41, wps=5834.2, ups=0.09, wpb=64774, bsz=128, num_updates=5903, lr=9.99608e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=66866
2021-06-19 13:13:22 | INFO | train_inner | epoch 002: 2940 / 3002 loss=2.61, ppl=6.1, wps=5948.5, ups=0.09, wpb=64917, bsz=128, num_updates=5904, lr=9.99608e-05, gnorm=2.16, loss_scale=4, train_wall=10, gb_free=2.8, wall=66877
2021-06-19 13:13:33 | INFO | train_inner | epoch 002: 2941 / 3002 loss=2.634, ppl=6.21, wps=5919, ups=0.09, wpb=64909, bsz=128, num_updates=5905, lr=9.99608e-05, gnorm=2.16, loss_scale=4, train_wall=11, gb_free=2.8, wall=66888
2021-06-19 13:13:44 | INFO | train_inner | epoch 002: 2942 / 3002 loss=2.514, ppl=5.71, wps=5869, ups=0.09, wpb=64918, bsz=128, num_updates=5906, lr=9.99607e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=66899
2021-06-19 13:13:55 | INFO | train_inner | epoch 002: 2943 / 3002 loss=2.609, ppl=6.1, wps=5832.8, ups=0.09, wpb=64828, bsz=128, num_updates=5907, lr=9.99607e-05, gnorm=3.703, loss_scale=4, train_wall=11, gb_free=2.8, wall=66910
2021-06-19 13:14:06 | INFO | train_inner | epoch 002: 2944 / 3002 loss=2.673, ppl=6.38, wps=5875.2, ups=0.09, wpb=64823, bsz=128, num_updates=5908, lr=9.99607e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=66921
2021-06-19 13:14:18 | INFO | train_inner | epoch 002: 2945 / 3002 loss=2.821, ppl=7.07, wps=5864.7, ups=0.09, wpb=64842, bsz=128, num_updates=5909, lr=9.99607e-05, gnorm=3.601, loss_scale=4, train_wall=11, gb_free=2.8, wall=66932
2021-06-19 13:14:29 | INFO | train_inner | epoch 002: 2946 / 3002 loss=2.64, ppl=6.23, wps=5741.9, ups=0.09, wpb=64757, bsz=128, num_updates=5910, lr=9.99607e-05, gnorm=2.734, loss_scale=4, train_wall=11, gb_free=2.8, wall=66943
2021-06-19 13:14:40 | INFO | train_inner | epoch 002: 2947 / 3002 loss=2.603, ppl=6.07, wps=5902, ups=0.09, wpb=64882, bsz=128, num_updates=5911, lr=9.99607e-05, gnorm=2.174, loss_scale=4, train_wall=11, gb_free=2.8, wall=66954
2021-06-19 13:14:51 | INFO | train_inner | epoch 002: 2948 / 3002 loss=2.866, ppl=7.29, wps=5927.2, ups=0.09, wpb=64817, bsz=128, num_updates=5912, lr=9.99607e-05, gnorm=2.35, loss_scale=4, train_wall=10, gb_free=2.8, wall=66965
2021-06-19 13:15:02 | INFO | train_inner | epoch 002: 2949 / 3002 loss=2.928, ppl=7.61, wps=5869.2, ups=0.09, wpb=64837, bsz=128, num_updates=5913, lr=9.99607e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=66976
2021-06-19 13:15:13 | INFO | train_inner | epoch 002: 2950 / 3002 loss=2.755, ppl=6.75, wps=5711.2, ups=0.09, wpb=64769, bsz=128, num_updates=5914, lr=9.99607e-05, gnorm=2.146, loss_scale=4, train_wall=11, gb_free=2.8, wall=66987
2021-06-19 13:15:24 | INFO | train_inner | epoch 002: 2951 / 3002 loss=2.858, ppl=7.25, wps=5857.8, ups=0.09, wpb=64811, bsz=128, num_updates=5915, lr=9.99607e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=66999
2021-06-19 13:15:36 | INFO | train_inner | epoch 002: 2952 / 3002 loss=2.507, ppl=5.69, wps=5725.2, ups=0.09, wpb=64798, bsz=128, num_updates=5916, lr=9.99607e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=67010
2021-06-19 13:15:47 | INFO | train_inner | epoch 002: 2953 / 3002 loss=2.783, ppl=6.88, wps=5836.7, ups=0.09, wpb=64862, bsz=128, num_updates=5917, lr=9.99607e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=67021
2021-06-19 13:15:58 | INFO | train_inner | epoch 002: 2954 / 3002 loss=2.716, ppl=6.57, wps=5835.4, ups=0.09, wpb=64759, bsz=128, num_updates=5918, lr=9.99607e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=67032
2021-06-19 13:16:09 | INFO | train_inner | epoch 002: 2955 / 3002 loss=2.654, ppl=6.3, wps=5790, ups=0.09, wpb=64846, bsz=128, num_updates=5919, lr=9.99606e-05, gnorm=2.353, loss_scale=4, train_wall=11, gb_free=2.8, wall=67043
2021-06-19 13:16:20 | INFO | train_inner | epoch 002: 2956 / 3002 loss=2.693, ppl=6.47, wps=5894.8, ups=0.09, wpb=64845, bsz=128, num_updates=5920, lr=9.99606e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=67054
2021-06-19 13:16:31 | INFO | train_inner | epoch 002: 2957 / 3002 loss=2.784, ppl=6.89, wps=5814.9, ups=0.09, wpb=64900, bsz=128, num_updates=5921, lr=9.99606e-05, gnorm=3.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=67065
2021-06-19 13:16:42 | INFO | train_inner | epoch 002: 2958 / 3002 loss=2.583, ppl=5.99, wps=5724.8, ups=0.09, wpb=64829, bsz=128, num_updates=5922, lr=9.99606e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=67077
2021-06-19 13:16:53 | INFO | train_inner | epoch 002: 2959 / 3002 loss=2.675, ppl=6.38, wps=5927.8, ups=0.09, wpb=64932, bsz=128, num_updates=5923, lr=9.99606e-05, gnorm=2.31, loss_scale=4, train_wall=10, gb_free=2.8, wall=67088
2021-06-19 13:17:04 | INFO | train_inner | epoch 002: 2960 / 3002 loss=2.61, ppl=6.11, wps=5892.2, ups=0.09, wpb=64767, bsz=128, num_updates=5924, lr=9.99606e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=67099
2021-06-19 13:17:15 | INFO | train_inner | epoch 002: 2961 / 3002 loss=2.692, ppl=6.46, wps=5980.2, ups=0.09, wpb=64920, bsz=128, num_updates=5925, lr=9.99606e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=67110
2021-06-19 13:17:26 | INFO | train_inner | epoch 002: 2962 / 3002 loss=2.683, ppl=6.42, wps=5823.2, ups=0.09, wpb=64887, bsz=128, num_updates=5926, lr=9.99606e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=67121
2021-06-19 13:17:37 | INFO | train_inner | epoch 002: 2963 / 3002 loss=2.776, ppl=6.85, wps=5952.7, ups=0.09, wpb=64866, bsz=128, num_updates=5927, lr=9.99606e-05, gnorm=2.161, loss_scale=4, train_wall=10, gb_free=2.8, wall=67132
2021-06-19 13:17:48 | INFO | train_inner | epoch 002: 2964 / 3002 loss=2.878, ppl=7.35, wps=5834.3, ups=0.09, wpb=64801, bsz=128, num_updates=5928, lr=9.99606e-05, gnorm=2.274, loss_scale=4, train_wall=11, gb_free=2.8, wall=67143
2021-06-19 13:17:59 | INFO | train_inner | epoch 002: 2965 / 3002 loss=2.557, ppl=5.88, wps=5902, ups=0.09, wpb=64800, bsz=128, num_updates=5929, lr=9.99606e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=67154
2021-06-19 13:18:10 | INFO | train_inner | epoch 002: 2966 / 3002 loss=2.705, ppl=6.52, wps=5816.2, ups=0.09, wpb=64763, bsz=128, num_updates=5930, lr=9.99606e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=67165
2021-06-19 13:18:22 | INFO | train_inner | epoch 002: 2967 / 3002 loss=2.655, ppl=6.3, wps=5771.6, ups=0.09, wpb=64812, bsz=128, num_updates=5931, lr=9.99605e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=67176
2021-06-19 13:18:33 | INFO | train_inner | epoch 002: 2968 / 3002 loss=2.769, ppl=6.82, wps=5807.1, ups=0.09, wpb=64882, bsz=128, num_updates=5932, lr=9.99605e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=67187
2021-06-19 13:18:44 | INFO | train_inner | epoch 002: 2969 / 3002 loss=2.558, ppl=5.89, wps=5874.2, ups=0.09, wpb=64781, bsz=128, num_updates=5933, lr=9.99605e-05, gnorm=2.346, loss_scale=4, train_wall=11, gb_free=2.8, wall=67198
2021-06-19 13:18:55 | INFO | train_inner | epoch 002: 2970 / 3002 loss=2.586, ppl=6, wps=5880.7, ups=0.09, wpb=64830, bsz=128, num_updates=5934, lr=9.99605e-05, gnorm=2.404, loss_scale=4, train_wall=11, gb_free=2.8, wall=67209
2021-06-19 13:19:06 | INFO | train_inner | epoch 002: 2971 / 3002 loss=2.639, ppl=6.23, wps=5903.4, ups=0.09, wpb=64802, bsz=128, num_updates=5935, lr=9.99605e-05, gnorm=4.808, loss_scale=4, train_wall=11, gb_free=2.8, wall=67220
2021-06-19 13:19:17 | INFO | train_inner | epoch 002: 2972 / 3002 loss=2.909, ppl=7.51, wps=5861.3, ups=0.09, wpb=64817, bsz=128, num_updates=5936, lr=9.99605e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=67231
2021-06-19 13:19:28 | INFO | train_inner | epoch 002: 2973 / 3002 loss=2.787, ppl=6.9, wps=5809.3, ups=0.09, wpb=64760, bsz=128, num_updates=5937, lr=9.99605e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=67242
2021-06-19 13:19:39 | INFO | train_inner | epoch 002: 2974 / 3002 loss=2.601, ppl=6.07, wps=5892.1, ups=0.09, wpb=64829, bsz=128, num_updates=5938, lr=9.99605e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=67253
2021-06-19 13:19:50 | INFO | train_inner | epoch 002: 2975 / 3002 loss=2.678, ppl=6.4, wps=5947.2, ups=0.09, wpb=64793, bsz=128, num_updates=5939, lr=9.99605e-05, gnorm=2.887, loss_scale=4, train_wall=10, gb_free=2.8, wall=67264
2021-06-19 13:20:01 | INFO | train_inner | epoch 002: 2976 / 3002 loss=2.765, ppl=6.8, wps=5884.9, ups=0.09, wpb=64780, bsz=128, num_updates=5940, lr=9.99605e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=67275
2021-06-19 13:20:12 | INFO | train_inner | epoch 002: 2977 / 3002 loss=2.724, ppl=6.61, wps=5811.2, ups=0.09, wpb=64799, bsz=128, num_updates=5941, lr=9.99605e-05, gnorm=3.584, loss_scale=4, train_wall=11, gb_free=2.8, wall=67287
2021-06-19 13:20:23 | INFO | train_inner | epoch 002: 2978 / 3002 loss=2.698, ppl=6.49, wps=5899.4, ups=0.09, wpb=64815, bsz=128, num_updates=5942, lr=9.99605e-05, gnorm=2.141, loss_scale=4, train_wall=11, gb_free=2.8, wall=67298
2021-06-19 13:20:34 | INFO | train_inner | epoch 002: 2979 / 3002 loss=2.654, ppl=6.29, wps=5830.3, ups=0.09, wpb=64911, bsz=128, num_updates=5943, lr=9.99605e-05, gnorm=2.456, loss_scale=4, train_wall=11, gb_free=2.8, wall=67309
2021-06-19 13:20:45 | INFO | train_inner | epoch 002: 2980 / 3002 loss=2.659, ppl=6.32, wps=5815.2, ups=0.09, wpb=64850, bsz=128, num_updates=5944, lr=9.99604e-05, gnorm=2.612, loss_scale=4, train_wall=11, gb_free=2.8, wall=67320
2021-06-19 13:20:57 | INFO | train_inner | epoch 002: 2981 / 3002 loss=2.785, ppl=6.89, wps=5835.5, ups=0.09, wpb=64906, bsz=128, num_updates=5945, lr=9.99604e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=67331
2021-06-19 13:21:08 | INFO | train_inner | epoch 002: 2982 / 3002 loss=2.632, ppl=6.2, wps=5839.7, ups=0.09, wpb=64800, bsz=128, num_updates=5946, lr=9.99604e-05, gnorm=2.213, loss_scale=4, train_wall=11, gb_free=2.8, wall=67342
2021-06-19 13:21:19 | INFO | train_inner | epoch 002: 2983 / 3002 loss=2.7, ppl=6.5, wps=5954.6, ups=0.09, wpb=64870, bsz=128, num_updates=5947, lr=9.99604e-05, gnorm=2.224, loss_scale=4, train_wall=10, gb_free=2.8, wall=67353
2021-06-19 13:21:29 | INFO | train_inner | epoch 002: 2984 / 3002 loss=2.618, ppl=6.14, wps=5995.2, ups=0.09, wpb=64755, bsz=128, num_updates=5948, lr=9.99604e-05, gnorm=2.605, loss_scale=4, train_wall=10, gb_free=2.8, wall=67364
2021-06-19 13:21:41 | INFO | train_inner | epoch 002: 2985 / 3002 loss=2.595, ppl=6.04, wps=5811.5, ups=0.09, wpb=64858, bsz=128, num_updates=5949, lr=9.99604e-05, gnorm=2.16, loss_scale=8, train_wall=11, gb_free=2.8, wall=67375
2021-06-19 13:21:52 | INFO | train_inner | epoch 002: 2986 / 3002 loss=2.61, ppl=6.1, wps=5900.5, ups=0.09, wpb=64790, bsz=128, num_updates=5950, lr=9.99604e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=67386
2021-06-19 13:22:03 | INFO | train_inner | epoch 002: 2987 / 3002 loss=2.626, ppl=6.17, wps=5855.4, ups=0.09, wpb=64866, bsz=128, num_updates=5951, lr=9.99604e-05, gnorm=2.251, loss_scale=8, train_wall=11, gb_free=2.8, wall=67397
2021-06-19 13:22:14 | INFO | train_inner | epoch 002: 2988 / 3002 loss=2.875, ppl=7.34, wps=5915.7, ups=0.09, wpb=64800, bsz=128, num_updates=5952, lr=9.99604e-05, gnorm=2.187, loss_scale=8, train_wall=11, gb_free=2.8, wall=67408
2021-06-19 13:22:24 | INFO | train_inner | epoch 002: 2989 / 3002 loss=2.735, ppl=6.66, wps=5959.1, ups=0.09, wpb=64887, bsz=128, num_updates=5953, lr=9.99604e-05, gnorm=2.296, loss_scale=8, train_wall=10, gb_free=2.8, wall=67419
2021-06-19 13:22:36 | INFO | train_inner | epoch 002: 2990 / 3002 loss=2.618, ppl=6.14, wps=5811.4, ups=0.09, wpb=64832, bsz=128, num_updates=5954, lr=9.99604e-05, gnorm=2.236, loss_scale=8, train_wall=11, gb_free=2.8, wall=67430
2021-06-19 13:22:47 | INFO | train_inner | epoch 002: 2991 / 3002 loss=2.675, ppl=6.38, wps=5878, ups=0.09, wpb=64820, bsz=128, num_updates=5955, lr=9.99604e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=67441
2021-06-19 13:22:58 | INFO | train_inner | epoch 002: 2992 / 3002 loss=2.705, ppl=6.52, wps=5952.5, ups=0.09, wpb=64846, bsz=128, num_updates=5956, lr=9.99603e-05, gnorm=2.13, loss_scale=8, train_wall=10, gb_free=2.8, wall=67452
2021-06-19 13:23:09 | INFO | train_inner | epoch 002: 2993 / 3002 loss=2.583, ppl=5.99, wps=5846.8, ups=0.09, wpb=64811, bsz=128, num_updates=5957, lr=9.99603e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=67463
2021-06-19 13:23:20 | INFO | train_inner | epoch 002: 2994 / 3002 loss=2.709, ppl=6.54, wps=5910.6, ups=0.09, wpb=64823, bsz=128, num_updates=5958, lr=9.99603e-05, gnorm=2.275, loss_scale=8, train_wall=11, gb_free=2.8, wall=67474
2021-06-19 13:23:31 | INFO | train_inner | epoch 002: 2995 / 3002 loss=2.765, ppl=6.8, wps=5883.7, ups=0.09, wpb=64821, bsz=128, num_updates=5959, lr=9.99603e-05, gnorm=2.247, loss_scale=8, train_wall=11, gb_free=2.8, wall=67485
2021-06-19 13:23:41 | INFO | train_inner | epoch 002: 2996 / 3002 loss=2.553, ppl=5.87, wps=6022.7, ups=0.09, wpb=64881, bsz=128, num_updates=5960, lr=9.99603e-05, gnorm=2.224, loss_scale=8, train_wall=10, gb_free=2.8, wall=67496
2021-06-19 13:23:53 | INFO | train_inner | epoch 002: 2997 / 3002 loss=2.708, ppl=6.53, wps=5758.2, ups=0.09, wpb=64752, bsz=128, num_updates=5961, lr=9.99603e-05, gnorm=2.302, loss_scale=8, train_wall=11, gb_free=2.8, wall=67507
2021-06-19 13:24:04 | INFO | train_inner | epoch 002: 2998 / 3002 loss=2.783, ppl=6.88, wps=5826.9, ups=0.09, wpb=64842, bsz=128, num_updates=5962, lr=9.99603e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=67518
2021-06-19 13:24:15 | INFO | train_inner | epoch 002: 2999 / 3002 loss=2.658, ppl=6.31, wps=5749.1, ups=0.09, wpb=64876, bsz=128, num_updates=5963, lr=9.99603e-05, gnorm=2.241, loss_scale=8, train_wall=11, gb_free=2.8, wall=67529
2021-06-19 13:24:26 | INFO | train_inner | epoch 002: 3000 / 3002 loss=2.571, ppl=5.94, wps=5747.1, ups=0.09, wpb=64805, bsz=128, num_updates=5964, lr=9.99603e-05, gnorm=2.153, loss_scale=8, train_wall=11, gb_free=2.8, wall=67541
2021-06-19 13:24:37 | INFO | train_inner | epoch 002: 3001 / 3002 loss=2.687, ppl=6.44, wps=5954.8, ups=0.09, wpb=64797, bsz=128, num_updates=5965, lr=9.99603e-05, gnorm=2.218, loss_scale=8, train_wall=10, gb_free=2.8, wall=67552
2021-06-19 13:24:43 | INFO | train_inner | epoch 002: 3002 / 3002 loss=2.725, ppl=6.61, wps=5831.9, ups=0.16, wpb=36452, bsz=72, num_updates=5966, lr=9.99603e-05, gnorm=2.896, loss_scale=8, train_wall=6, gb_free=2.8, wall=67558
2021-06-19 13:24:43 | INFO | fairseq_cli.train | begin validation on "valid" subset
2021-06-19 13:39:39 | INFO | valid | epoch 002 | valid on 'valid' subset | loss 2.535 | ppl 5.79 | wps 19710.6 | wpb 506.5 | bsz 1 | num_updates 5966 | best_loss 2.535
2021-06-19 13:39:39 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 2 @ 5966 updates
2021-06-19 13:39:39 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint2.pt
2021-06-19 13:39:54 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint2.pt
2021-06-19 13:46:21 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint2.pt (epoch 2 @ 5966 updates, score 2.535) (writing took 402.46520497099846 seconds)
2021-06-19 13:46:21 | INFO | fairseq_cli.train | end of epoch 2 (average epoch stats below)
2021-06-19 13:46:21 | INFO | train | epoch 002 | loss 2.768 | ppl 6.81 | wps 5602.8 | ups 0.09 | wpb 64819.4 | bsz 128 | num_updates 5966 | lr 9.99603e-05 | gnorm 2.571 | loss_scale 8 | train_wall 31877 | gb_free 2.8 | wall 68856
2021-06-19 13:46:22 | INFO | fairseq.trainer | begin training epoch 3
2021-06-19 13:46:22 | INFO | fairseq_cli.train | Start iterating over samples
2021-06-19 13:46:32 | INFO | train_inner | epoch 003: 1 / 3002 loss=2.854, ppl=7.23, wps=49.6, ups=0, wpb=64861, bsz=128, num_updates=5967, lr=9.99603e-05, gnorm=2.125, loss_scale=8, train_wall=10, gb_free=2.8, wall=68866
2021-06-19 13:46:42 | INFO | train_inner | epoch 003: 2 / 3002 loss=2.708, ppl=6.53, wps=6305.1, ups=0.1, wpb=64843, bsz=128, num_updates=5968, lr=9.99603e-05, gnorm=2.231, loss_scale=8, train_wall=10, gb_free=2.8, wall=68877
2021-06-19 13:46:53 | INFO | train_inner | epoch 003: 3 / 3002 loss=2.592, ppl=6.03, wps=6232.8, ups=0.1, wpb=64801, bsz=128, num_updates=5969, lr=9.99602e-05, gnorm=2.101, loss_scale=8, train_wall=10, gb_free=2.8, wall=68887
2021-06-19 13:47:03 | INFO | train_inner | epoch 003: 4 / 3002 loss=2.667, ppl=6.35, wps=6134.2, ups=0.09, wpb=64800, bsz=128, num_updates=5970, lr=9.99602e-05, gnorm=2.166, loss_scale=8, train_wall=10, gb_free=2.8, wall=68898
2021-06-19 13:47:14 | INFO | train_inner | epoch 003: 5 / 3002 loss=2.685, ppl=6.43, wps=5998.3, ups=0.09, wpb=64840, bsz=128, num_updates=5971, lr=9.99602e-05, gnorm=2.265, loss_scale=8, train_wall=10, gb_free=2.8, wall=68909
2021-06-19 13:47:25 | INFO | train_inner | epoch 003: 6 / 3002 loss=2.803, ppl=6.98, wps=6144.4, ups=0.09, wpb=64837, bsz=128, num_updates=5972, lr=9.99602e-05, gnorm=3.475, loss_scale=8, train_wall=10, gb_free=2.8, wall=68919
2021-06-19 13:47:35 | INFO | train_inner | epoch 003: 7 / 3002 loss=2.606, ppl=6.09, wps=6059.5, ups=0.09, wpb=64829, bsz=128, num_updates=5973, lr=9.99602e-05, gnorm=2.227, loss_scale=8, train_wall=10, gb_free=2.8, wall=68930
2021-06-19 13:47:46 | INFO | train_inner | epoch 003: 8 / 3002 loss=2.633, ppl=6.2, wps=6043.3, ups=0.09, wpb=64784, bsz=128, num_updates=5974, lr=9.99602e-05, gnorm=2.139, loss_scale=8, train_wall=10, gb_free=2.8, wall=68941
2021-06-19 13:47:57 | INFO | train_inner | epoch 003: 9 / 3002 loss=2.654, ppl=6.29, wps=6006.5, ups=0.09, wpb=64807, bsz=128, num_updates=5975, lr=9.99602e-05, gnorm=2.161, loss_scale=8, train_wall=10, gb_free=2.8, wall=68951
2021-06-19 13:48:08 | INFO | train_inner | epoch 003: 10 / 3002 loss=2.823, ppl=7.07, wps=6054.7, ups=0.09, wpb=64825, bsz=128, num_updates=5976, lr=9.99602e-05, gnorm=2.447, loss_scale=8, train_wall=10, gb_free=2.8, wall=68962
2021-06-19 13:48:19 | INFO | train_inner | epoch 003: 11 / 3002 loss=2.632, ppl=6.2, wps=5949.8, ups=0.09, wpb=64836, bsz=128, num_updates=5977, lr=9.99602e-05, gnorm=2.158, loss_scale=8, train_wall=10, gb_free=2.8, wall=68973
2021-06-19 13:48:30 | INFO | train_inner | epoch 003: 12 / 3002 loss=2.636, ppl=6.21, wps=5889.1, ups=0.09, wpb=64880, bsz=128, num_updates=5978, lr=9.99602e-05, gnorm=2.181, loss_scale=8, train_wall=11, gb_free=2.8, wall=68984
2021-06-19 13:48:40 | INFO | train_inner | epoch 003: 13 / 3002 loss=2.704, ppl=6.51, wps=5960.2, ups=0.09, wpb=64840, bsz=128, num_updates=5979, lr=9.99602e-05, gnorm=2.189, loss_scale=8, train_wall=10, gb_free=2.8, wall=68995
2021-06-19 13:48:51 | INFO | train_inner | epoch 003: 14 / 3002 loss=2.649, ppl=6.27, wps=5908, ups=0.09, wpb=64830, bsz=128, num_updates=5980, lr=9.99602e-05, gnorm=2.475, loss_scale=8, train_wall=11, gb_free=2.8, wall=69006
2021-06-19 13:49:02 | INFO | train_inner | epoch 003: 15 / 3002 loss=2.594, ppl=6.04, wps=6005.3, ups=0.09, wpb=64845, bsz=128, num_updates=5981, lr=9.99601e-05, gnorm=3.109, loss_scale=8, train_wall=10, gb_free=2.8, wall=69017
2021-06-19 13:49:13 | INFO | train_inner | epoch 003: 16 / 3002 loss=2.7, ppl=6.5, wps=6027.3, ups=0.09, wpb=64936, bsz=128, num_updates=5982, lr=9.99601e-05, gnorm=2.163, loss_scale=8, train_wall=10, gb_free=2.8, wall=69027
2021-06-19 13:49:24 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 13:49:35 | INFO | train_inner | epoch 003: 18 / 3002 loss=2.536, ppl=5.8, wps=2970.6, ups=0.05, wpb=64932, bsz=128, num_updates=5983, lr=9.99601e-05, gnorm=2.118, loss_scale=4, train_wall=21, gb_free=2.8, wall=69049
2021-06-19 13:49:46 | INFO | train_inner | epoch 003: 19 / 3002 loss=2.571, ppl=5.94, wps=5848.3, ups=0.09, wpb=64799, bsz=128, num_updates=5984, lr=9.99601e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=69060
2021-06-19 13:49:57 | INFO | train_inner | epoch 003: 20 / 3002 loss=2.591, ppl=6.03, wps=5862.8, ups=0.09, wpb=64781, bsz=128, num_updates=5985, lr=9.99601e-05, gnorm=2.184, loss_scale=4, train_wall=11, gb_free=2.8, wall=69071
2021-06-19 13:50:08 | INFO | train_inner | epoch 003: 21 / 3002 loss=2.518, ppl=5.73, wps=5944.1, ups=0.09, wpb=64759, bsz=128, num_updates=5986, lr=9.99601e-05, gnorm=2.249, loss_scale=4, train_wall=10, gb_free=2.8, wall=69082
2021-06-19 13:50:19 | INFO | train_inner | epoch 003: 22 / 3002 loss=2.712, ppl=6.55, wps=5996.9, ups=0.09, wpb=64846, bsz=128, num_updates=5987, lr=9.99601e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=69093
2021-06-19 13:50:30 | INFO | train_inner | epoch 003: 23 / 3002 loss=2.71, ppl=6.54, wps=5872.7, ups=0.09, wpb=64855, bsz=128, num_updates=5988, lr=9.99601e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=69104
2021-06-19 13:50:41 | INFO | train_inner | epoch 003: 24 / 3002 loss=2.596, ppl=6.05, wps=5795.8, ups=0.09, wpb=64873, bsz=128, num_updates=5989, lr=9.99601e-05, gnorm=2.183, loss_scale=4, train_wall=11, gb_free=2.8, wall=69115
2021-06-19 13:50:52 | INFO | train_inner | epoch 003: 25 / 3002 loss=2.766, ppl=6.8, wps=5850.8, ups=0.09, wpb=64784, bsz=128, num_updates=5990, lr=9.99601e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=69126
2021-06-19 13:51:03 | INFO | train_inner | epoch 003: 26 / 3002 loss=2.512, ppl=5.71, wps=5771.9, ups=0.09, wpb=64732, bsz=128, num_updates=5991, lr=9.99601e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=69138
2021-06-19 13:51:15 | INFO | train_inner | epoch 003: 27 / 3002 loss=2.774, ppl=6.84, wps=5691.6, ups=0.09, wpb=64785, bsz=128, num_updates=5992, lr=9.99601e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=69149
2021-06-19 13:51:26 | INFO | train_inner | epoch 003: 28 / 3002 loss=2.657, ppl=6.31, wps=5743.9, ups=0.09, wpb=64853, bsz=128, num_updates=5993, lr=9.99601e-05, gnorm=2.118, loss_scale=4, train_wall=11, gb_free=2.8, wall=69160
2021-06-19 13:51:37 | INFO | train_inner | epoch 003: 29 / 3002 loss=2.597, ppl=6.05, wps=5798.6, ups=0.09, wpb=64825, bsz=128, num_updates=5994, lr=9.996e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=69171
2021-06-19 13:51:48 | INFO | train_inner | epoch 003: 30 / 3002 loss=2.609, ppl=6.1, wps=5765.7, ups=0.09, wpb=64824, bsz=128, num_updates=5995, lr=9.996e-05, gnorm=2.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=69183
2021-06-19 13:51:59 | INFO | train_inner | epoch 003: 31 / 3002 loss=2.712, ppl=6.55, wps=5805.4, ups=0.09, wpb=64769, bsz=128, num_updates=5996, lr=9.996e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=69194
2021-06-19 13:52:11 | INFO | train_inner | epoch 003: 32 / 3002 loss=2.99, ppl=7.95, wps=5880.6, ups=0.09, wpb=64796, bsz=128, num_updates=5997, lr=9.996e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=69205
2021-06-19 13:52:22 | INFO | train_inner | epoch 003: 33 / 3002 loss=2.666, ppl=6.35, wps=5881.3, ups=0.09, wpb=64882, bsz=128, num_updates=5998, lr=9.996e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=69216
2021-06-19 13:52:33 | INFO | train_inner | epoch 003: 34 / 3002 loss=2.553, ppl=5.87, wps=5886.8, ups=0.09, wpb=64861, bsz=128, num_updates=5999, lr=9.996e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=69227
2021-06-19 13:52:44 | INFO | train_inner | epoch 003: 35 / 3002 loss=2.541, ppl=5.82, wps=5909.3, ups=0.09, wpb=64908, bsz=128, num_updates=6000, lr=9.996e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=69238
2021-06-19 13:52:54 | INFO | train_inner | epoch 003: 36 / 3002 loss=2.64, ppl=6.23, wps=5949.6, ups=0.09, wpb=64889, bsz=128, num_updates=6001, lr=9.996e-05, gnorm=2.207, loss_scale=4, train_wall=10, gb_free=2.8, wall=69249
2021-06-19 13:53:06 | INFO | train_inner | epoch 003: 37 / 3002 loss=2.578, ppl=5.97, wps=5865.9, ups=0.09, wpb=64894, bsz=128, num_updates=6002, lr=9.996e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=69260
2021-06-19 13:53:17 | INFO | train_inner | epoch 003: 38 / 3002 loss=2.717, ppl=6.58, wps=5789, ups=0.09, wpb=64859, bsz=128, num_updates=6003, lr=9.996e-05, gnorm=2.129, loss_scale=4, train_wall=11, gb_free=2.8, wall=69271
2021-06-19 13:53:28 | INFO | train_inner | epoch 003: 39 / 3002 loss=2.565, ppl=5.92, wps=5870.5, ups=0.09, wpb=64852, bsz=128, num_updates=6004, lr=9.996e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=69282
2021-06-19 13:53:39 | INFO | train_inner | epoch 003: 40 / 3002 loss=2.645, ppl=6.26, wps=5869.7, ups=0.09, wpb=64719, bsz=128, num_updates=6005, lr=9.996e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=69293
2021-06-19 13:53:50 | INFO | train_inner | epoch 003: 41 / 3002 loss=2.672, ppl=6.37, wps=5837.6, ups=0.09, wpb=64864, bsz=128, num_updates=6006, lr=9.99599e-05, gnorm=2.227, loss_scale=4, train_wall=11, gb_free=2.8, wall=69304
2021-06-19 13:54:01 | INFO | train_inner | epoch 003: 42 / 3002 loss=2.733, ppl=6.65, wps=5864.8, ups=0.09, wpb=64761, bsz=128, num_updates=6007, lr=9.99599e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=69315
2021-06-19 13:54:12 | INFO | train_inner | epoch 003: 43 / 3002 loss=2.544, ppl=5.83, wps=5825.8, ups=0.09, wpb=64816, bsz=128, num_updates=6008, lr=9.99599e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=69326
2021-06-19 13:54:23 | INFO | train_inner | epoch 003: 44 / 3002 loss=2.793, ppl=6.93, wps=5774.4, ups=0.09, wpb=64708, bsz=128, num_updates=6009, lr=9.99599e-05, gnorm=2.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=69338
2021-06-19 13:54:35 | INFO | train_inner | epoch 003: 45 / 3002 loss=2.69, ppl=6.45, wps=5750.4, ups=0.09, wpb=64849, bsz=128, num_updates=6010, lr=9.99599e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=69349
2021-06-19 13:54:46 | INFO | train_inner | epoch 003: 46 / 3002 loss=2.636, ppl=6.22, wps=5805.5, ups=0.09, wpb=64899, bsz=128, num_updates=6011, lr=9.99599e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=69360
2021-06-19 13:54:57 | INFO | train_inner | epoch 003: 47 / 3002 loss=2.511, ppl=5.7, wps=5810.4, ups=0.09, wpb=64914, bsz=128, num_updates=6012, lr=9.99599e-05, gnorm=2.218, loss_scale=4, train_wall=11, gb_free=2.8, wall=69371
2021-06-19 13:55:08 | INFO | train_inner | epoch 003: 48 / 3002 loss=2.645, ppl=6.25, wps=5846.3, ups=0.09, wpb=64855, bsz=128, num_updates=6013, lr=9.99599e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=69382
2021-06-19 13:55:19 | INFO | train_inner | epoch 003: 49 / 3002 loss=2.672, ppl=6.37, wps=5861.8, ups=0.09, wpb=64789, bsz=128, num_updates=6014, lr=9.99599e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=69393
2021-06-19 13:55:30 | INFO | train_inner | epoch 003: 50 / 3002 loss=2.582, ppl=5.99, wps=5846.4, ups=0.09, wpb=64763, bsz=128, num_updates=6015, lr=9.99599e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=69404
2021-06-19 13:55:41 | INFO | train_inner | epoch 003: 51 / 3002 loss=2.459, ppl=5.5, wps=5940.3, ups=0.09, wpb=64879, bsz=128, num_updates=6016, lr=9.99599e-05, gnorm=2.197, loss_scale=4, train_wall=10, gb_free=2.8, wall=69415
2021-06-19 13:55:52 | INFO | train_inner | epoch 003: 52 / 3002 loss=2.713, ppl=6.56, wps=5848.5, ups=0.09, wpb=64778, bsz=128, num_updates=6017, lr=9.99599e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=69426
2021-06-19 13:56:03 | INFO | train_inner | epoch 003: 53 / 3002 loss=2.856, ppl=7.24, wps=5833.2, ups=0.09, wpb=64770, bsz=128, num_updates=6018, lr=9.99599e-05, gnorm=2.181, loss_scale=4, train_wall=11, gb_free=2.8, wall=69438
2021-06-19 13:56:14 | INFO | train_inner | epoch 003: 54 / 3002 loss=2.62, ppl=6.15, wps=5891.6, ups=0.09, wpb=64752, bsz=128, num_updates=6019, lr=9.99598e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=69449
2021-06-19 13:56:25 | INFO | train_inner | epoch 003: 55 / 3002 loss=2.698, ppl=6.49, wps=5768.5, ups=0.09, wpb=64809, bsz=128, num_updates=6020, lr=9.99598e-05, gnorm=2.245, loss_scale=4, train_wall=11, gb_free=2.8, wall=69460
2021-06-19 13:56:37 | INFO | train_inner | epoch 003: 56 / 3002 loss=2.64, ppl=6.23, wps=5788.2, ups=0.09, wpb=64861, bsz=128, num_updates=6021, lr=9.99598e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=69471
2021-06-19 13:56:48 | INFO | train_inner | epoch 003: 57 / 3002 loss=2.602, ppl=6.07, wps=5861.9, ups=0.09, wpb=64847, bsz=128, num_updates=6022, lr=9.99598e-05, gnorm=2.16, loss_scale=4, train_wall=11, gb_free=2.8, wall=69482
2021-06-19 13:56:59 | INFO | train_inner | epoch 003: 58 / 3002 loss=2.673, ppl=6.38, wps=5857, ups=0.09, wpb=64807, bsz=128, num_updates=6023, lr=9.99598e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=69493
2021-06-19 13:57:10 | INFO | train_inner | epoch 003: 59 / 3002 loss=2.635, ppl=6.21, wps=5898.3, ups=0.09, wpb=64889, bsz=128, num_updates=6024, lr=9.99598e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=69504
2021-06-19 13:57:21 | INFO | train_inner | epoch 003: 60 / 3002 loss=2.802, ppl=6.98, wps=5875, ups=0.09, wpb=64889, bsz=128, num_updates=6025, lr=9.99598e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=69515
2021-06-19 13:57:32 | INFO | train_inner | epoch 003: 61 / 3002 loss=2.546, ppl=5.84, wps=5932.4, ups=0.09, wpb=64845, bsz=128, num_updates=6026, lr=9.99598e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=69526
2021-06-19 13:57:43 | INFO | train_inner | epoch 003: 62 / 3002 loss=2.517, ppl=5.73, wps=5839.3, ups=0.09, wpb=64796, bsz=128, num_updates=6027, lr=9.99598e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=69537
2021-06-19 13:57:54 | INFO | train_inner | epoch 003: 63 / 3002 loss=2.702, ppl=6.51, wps=5958.8, ups=0.09, wpb=64856, bsz=128, num_updates=6028, lr=9.99598e-05, gnorm=2.192, loss_scale=4, train_wall=10, gb_free=2.8, wall=69548
2021-06-19 13:58:05 | INFO | train_inner | epoch 003: 64 / 3002 loss=2.653, ppl=6.29, wps=5941.8, ups=0.09, wpb=64831, bsz=128, num_updates=6029, lr=9.99598e-05, gnorm=2.143, loss_scale=4, train_wall=10, gb_free=2.8, wall=69559
2021-06-19 13:58:16 | INFO | train_inner | epoch 003: 65 / 3002 loss=2.793, ppl=6.93, wps=5956.6, ups=0.09, wpb=64906, bsz=128, num_updates=6030, lr=9.99598e-05, gnorm=2.219, loss_scale=4, train_wall=10, gb_free=2.8, wall=69570
2021-06-19 13:58:26 | INFO | train_inner | epoch 003: 66 / 3002 loss=2.663, ppl=6.33, wps=5934.9, ups=0.09, wpb=64862, bsz=128, num_updates=6031, lr=9.99597e-05, gnorm=2.177, loss_scale=4, train_wall=10, gb_free=2.8, wall=69581
2021-06-19 13:58:38 | INFO | train_inner | epoch 003: 67 / 3002 loss=2.537, ppl=5.8, wps=5772.2, ups=0.09, wpb=64796, bsz=128, num_updates=6032, lr=9.99597e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=69592
2021-06-19 13:58:49 | INFO | train_inner | epoch 003: 68 / 3002 loss=2.574, ppl=5.96, wps=5857.4, ups=0.09, wpb=64912, bsz=128, num_updates=6033, lr=9.99597e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=69603
2021-06-19 13:59:00 | INFO | train_inner | epoch 003: 69 / 3002 loss=2.669, ppl=6.36, wps=5900.3, ups=0.09, wpb=64803, bsz=128, num_updates=6034, lr=9.99597e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=69614
2021-06-19 13:59:11 | INFO | train_inner | epoch 003: 70 / 3002 loss=2.79, ppl=6.92, wps=5883.1, ups=0.09, wpb=64868, bsz=128, num_updates=6035, lr=9.99597e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=69625
2021-06-19 13:59:22 | INFO | train_inner | epoch 003: 71 / 3002 loss=2.625, ppl=6.17, wps=5931.7, ups=0.09, wpb=64859, bsz=128, num_updates=6036, lr=9.99597e-05, gnorm=2.179, loss_scale=4, train_wall=10, gb_free=2.8, wall=69636
2021-06-19 13:59:33 | INFO | train_inner | epoch 003: 72 / 3002 loss=2.691, ppl=6.46, wps=5849.6, ups=0.09, wpb=64766, bsz=128, num_updates=6037, lr=9.99597e-05, gnorm=4.769, loss_scale=4, train_wall=11, gb_free=2.8, wall=69647
2021-06-19 13:59:44 | INFO | train_inner | epoch 003: 73 / 3002 loss=2.66, ppl=6.32, wps=5811.2, ups=0.09, wpb=64814, bsz=128, num_updates=6038, lr=9.99597e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=69658
2021-06-19 13:59:55 | INFO | train_inner | epoch 003: 74 / 3002 loss=2.77, ppl=6.82, wps=5849.6, ups=0.09, wpb=64831, bsz=128, num_updates=6039, lr=9.99597e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=69669
2021-06-19 14:00:06 | INFO | train_inner | epoch 003: 75 / 3002 loss=2.699, ppl=6.49, wps=5911, ups=0.09, wpb=64849, bsz=128, num_updates=6040, lr=9.99597e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=69680
2021-06-19 14:00:17 | INFO | train_inner | epoch 003: 76 / 3002 loss=2.709, ppl=6.54, wps=5879.4, ups=0.09, wpb=64837, bsz=128, num_updates=6041, lr=9.99597e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=69691
2021-06-19 14:00:28 | INFO | train_inner | epoch 003: 77 / 3002 loss=2.681, ppl=6.41, wps=5897.6, ups=0.09, wpb=64895, bsz=128, num_updates=6042, lr=9.99597e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=69702
2021-06-19 14:00:39 | INFO | train_inner | epoch 003: 78 / 3002 loss=2.663, ppl=6.34, wps=5903.4, ups=0.09, wpb=64909, bsz=128, num_updates=6043, lr=9.99597e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=69713
2021-06-19 14:00:50 | INFO | train_inner | epoch 003: 79 / 3002 loss=2.665, ppl=6.34, wps=5893.3, ups=0.09, wpb=64769, bsz=128, num_updates=6044, lr=9.99596e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=69724
2021-06-19 14:01:01 | INFO | train_inner | epoch 003: 80 / 3002 loss=2.603, ppl=6.07, wps=5792.4, ups=0.09, wpb=64829, bsz=128, num_updates=6045, lr=9.99596e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=69736
2021-06-19 14:01:12 | INFO | train_inner | epoch 003: 81 / 3002 loss=2.706, ppl=6.53, wps=5939.9, ups=0.09, wpb=64881, bsz=128, num_updates=6046, lr=9.99596e-05, gnorm=2.306, loss_scale=4, train_wall=10, gb_free=2.8, wall=69746
2021-06-19 14:01:23 | INFO | train_inner | epoch 003: 82 / 3002 loss=2.77, ppl=6.82, wps=5978.8, ups=0.09, wpb=64841, bsz=128, num_updates=6047, lr=9.99596e-05, gnorm=2.227, loss_scale=4, train_wall=10, gb_free=2.8, wall=69757
2021-06-19 14:01:34 | INFO | train_inner | epoch 003: 83 / 3002 loss=2.7, ppl=6.5, wps=5971.8, ups=0.09, wpb=64915, bsz=128, num_updates=6048, lr=9.99596e-05, gnorm=3.025, loss_scale=4, train_wall=10, gb_free=2.8, wall=69768
2021-06-19 14:01:45 | INFO | train_inner | epoch 003: 84 / 3002 loss=2.629, ppl=6.19, wps=5824, ups=0.09, wpb=64821, bsz=128, num_updates=6049, lr=9.99596e-05, gnorm=4.118, loss_scale=4, train_wall=11, gb_free=2.8, wall=69779
2021-06-19 14:01:56 | INFO | train_inner | epoch 003: 85 / 3002 loss=2.683, ppl=6.42, wps=5917.9, ups=0.09, wpb=64809, bsz=128, num_updates=6050, lr=9.99596e-05, gnorm=2.265, loss_scale=4, train_wall=10, gb_free=2.8, wall=69790
2021-06-19 14:02:07 | INFO | train_inner | epoch 003: 86 / 3002 loss=2.84, ppl=7.16, wps=5967.1, ups=0.09, wpb=64775, bsz=128, num_updates=6051, lr=9.99596e-05, gnorm=2.299, loss_scale=4, train_wall=10, gb_free=2.8, wall=69801
2021-06-19 14:02:18 | INFO | train_inner | epoch 003: 87 / 3002 loss=2.672, ppl=6.37, wps=5776.6, ups=0.09, wpb=64728, bsz=128, num_updates=6052, lr=9.99596e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=69812
2021-06-19 14:02:29 | INFO | train_inner | epoch 003: 88 / 3002 loss=2.581, ppl=5.99, wps=5957.8, ups=0.09, wpb=64888, bsz=128, num_updates=6053, lr=9.99596e-05, gnorm=2.237, loss_scale=4, train_wall=10, gb_free=2.8, wall=69823
2021-06-19 14:02:40 | INFO | train_inner | epoch 003: 89 / 3002 loss=2.642, ppl=6.24, wps=5812.7, ups=0.09, wpb=64737, bsz=128, num_updates=6054, lr=9.99596e-05, gnorm=2.422, loss_scale=4, train_wall=11, gb_free=2.8, wall=69834
2021-06-19 14:02:51 | INFO | train_inner | epoch 003: 90 / 3002 loss=2.548, ppl=5.85, wps=5903.8, ups=0.09, wpb=64847, bsz=128, num_updates=6055, lr=9.99596e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=69845
2021-06-19 14:03:02 | INFO | train_inner | epoch 003: 91 / 3002 loss=2.589, ppl=6.02, wps=5790.4, ups=0.09, wpb=64772, bsz=128, num_updates=6056, lr=9.99595e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=69857
2021-06-19 14:03:13 | INFO | train_inner | epoch 003: 92 / 3002 loss=2.671, ppl=6.37, wps=5812.5, ups=0.09, wpb=64790, bsz=128, num_updates=6057, lr=9.99595e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=69868
2021-06-19 14:03:25 | INFO | train_inner | epoch 003: 93 / 3002 loss=2.667, ppl=6.35, wps=5778.2, ups=0.09, wpb=64738, bsz=128, num_updates=6058, lr=9.99595e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=69879
2021-06-19 14:03:36 | INFO | train_inner | epoch 003: 94 / 3002 loss=2.599, ppl=6.06, wps=5816.1, ups=0.09, wpb=64859, bsz=128, num_updates=6059, lr=9.99595e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=69890
2021-06-19 14:03:47 | INFO | train_inner | epoch 003: 95 / 3002 loss=2.623, ppl=6.16, wps=5966.6, ups=0.09, wpb=64908, bsz=128, num_updates=6060, lr=9.99595e-05, gnorm=2.198, loss_scale=4, train_wall=10, gb_free=2.8, wall=69901
2021-06-19 14:03:58 | INFO | train_inner | epoch 003: 96 / 3002 loss=2.605, ppl=6.08, wps=5836.1, ups=0.09, wpb=64886, bsz=128, num_updates=6061, lr=9.99595e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=69912
2021-06-19 14:04:09 | INFO | train_inner | epoch 003: 97 / 3002 loss=2.795, ppl=6.94, wps=5771.2, ups=0.09, wpb=64798, bsz=128, num_updates=6062, lr=9.99595e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=69923
2021-06-19 14:04:20 | INFO | train_inner | epoch 003: 98 / 3002 loss=2.654, ppl=6.29, wps=5759.7, ups=0.09, wpb=64749, bsz=128, num_updates=6063, lr=9.99595e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=69935
2021-06-19 14:04:31 | INFO | train_inner | epoch 003: 99 / 3002 loss=2.675, ppl=6.39, wps=5800.2, ups=0.09, wpb=64854, bsz=128, num_updates=6064, lr=9.99595e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=69946
2021-06-19 14:04:43 | INFO | train_inner | epoch 003: 100 / 3002 loss=2.772, ppl=6.83, wps=5747.7, ups=0.09, wpb=64847, bsz=128, num_updates=6065, lr=9.99595e-05, gnorm=2.426, loss_scale=4, train_wall=11, gb_free=2.8, wall=69957
2021-06-19 14:04:54 | INFO | train_inner | epoch 003: 101 / 3002 loss=2.677, ppl=6.39, wps=5871.9, ups=0.09, wpb=64873, bsz=128, num_updates=6066, lr=9.99595e-05, gnorm=4.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=69968
2021-06-19 14:05:05 | INFO | train_inner | epoch 003: 102 / 3002 loss=2.636, ppl=6.22, wps=5823.4, ups=0.09, wpb=64880, bsz=128, num_updates=6067, lr=9.99595e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=69979
2021-06-19 14:05:16 | INFO | train_inner | epoch 003: 103 / 3002 loss=2.622, ppl=6.16, wps=5830, ups=0.09, wpb=64836, bsz=128, num_updates=6068, lr=9.99595e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=69990
2021-06-19 14:05:27 | INFO | train_inner | epoch 003: 104 / 3002 loss=2.617, ppl=6.13, wps=5882.9, ups=0.09, wpb=64816, bsz=128, num_updates=6069, lr=9.99594e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=70001
2021-06-19 14:05:38 | INFO | train_inner | epoch 003: 105 / 3002 loss=2.569, ppl=5.93, wps=5835.3, ups=0.09, wpb=64923, bsz=128, num_updates=6070, lr=9.99594e-05, gnorm=2.829, loss_scale=4, train_wall=11, gb_free=2.8, wall=70012
2021-06-19 14:05:49 | INFO | train_inner | epoch 003: 106 / 3002 loss=2.861, ppl=7.27, wps=5827.7, ups=0.09, wpb=64862, bsz=128, num_updates=6071, lr=9.99594e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=70024
2021-06-19 14:06:00 | INFO | train_inner | epoch 003: 107 / 3002 loss=2.655, ppl=6.3, wps=5832, ups=0.09, wpb=64764, bsz=128, num_updates=6072, lr=9.99594e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=70035
2021-06-19 14:06:11 | INFO | train_inner | epoch 003: 108 / 3002 loss=2.79, ppl=6.91, wps=5881.3, ups=0.09, wpb=64778, bsz=128, num_updates=6073, lr=9.99594e-05, gnorm=2.822, loss_scale=4, train_wall=11, gb_free=2.8, wall=70046
2021-06-19 14:06:23 | INFO | train_inner | epoch 003: 109 / 3002 loss=2.617, ppl=6.14, wps=5700, ups=0.09, wpb=64795, bsz=128, num_updates=6074, lr=9.99594e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=70057
2021-06-19 14:06:34 | INFO | train_inner | epoch 003: 110 / 3002 loss=2.799, ppl=6.96, wps=5862.2, ups=0.09, wpb=64836, bsz=128, num_updates=6075, lr=9.99594e-05, gnorm=2.287, loss_scale=4, train_wall=11, gb_free=2.8, wall=70068
2021-06-19 14:06:45 | INFO | train_inner | epoch 003: 111 / 3002 loss=2.634, ppl=6.21, wps=5654.6, ups=0.09, wpb=64766, bsz=128, num_updates=6076, lr=9.99594e-05, gnorm=3.58, loss_scale=4, train_wall=11, gb_free=2.8, wall=70080
2021-06-19 14:06:56 | INFO | train_inner | epoch 003: 112 / 3002 loss=2.627, ppl=6.18, wps=5795.1, ups=0.09, wpb=64839, bsz=128, num_updates=6077, lr=9.99594e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=70091
2021-06-19 14:07:08 | INFO | train_inner | epoch 003: 113 / 3002 loss=2.662, ppl=6.33, wps=5850.9, ups=0.09, wpb=64813, bsz=128, num_updates=6078, lr=9.99594e-05, gnorm=2.25, loss_scale=4, train_wall=11, gb_free=2.8, wall=70102
2021-06-19 14:07:19 | INFO | train_inner | epoch 003: 114 / 3002 loss=2.792, ppl=6.93, wps=5836.1, ups=0.09, wpb=64863, bsz=128, num_updates=6079, lr=9.99594e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=70113
2021-06-19 14:07:30 | INFO | train_inner | epoch 003: 115 / 3002 loss=2.594, ppl=6.04, wps=5848.7, ups=0.09, wpb=64796, bsz=128, num_updates=6080, lr=9.99594e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=70124
2021-06-19 14:07:41 | INFO | train_inner | epoch 003: 116 / 3002 loss=2.703, ppl=6.51, wps=5893.6, ups=0.09, wpb=64744, bsz=128, num_updates=6081, lr=9.99593e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=70135
2021-06-19 14:07:52 | INFO | train_inner | epoch 003: 117 / 3002 loss=2.626, ppl=6.17, wps=5956.6, ups=0.09, wpb=64826, bsz=128, num_updates=6082, lr=9.99593e-05, gnorm=2.179, loss_scale=4, train_wall=10, gb_free=2.8, wall=70146
2021-06-19 14:08:03 | INFO | train_inner | epoch 003: 118 / 3002 loss=2.77, ppl=6.82, wps=5831.2, ups=0.09, wpb=64860, bsz=128, num_updates=6083, lr=9.99593e-05, gnorm=2.251, loss_scale=4, train_wall=11, gb_free=2.8, wall=70157
2021-06-19 14:08:14 | INFO | train_inner | epoch 003: 119 / 3002 loss=2.814, ppl=7.03, wps=5947.2, ups=0.09, wpb=64746, bsz=128, num_updates=6084, lr=9.99593e-05, gnorm=2.415, loss_scale=4, train_wall=10, gb_free=2.8, wall=70168
2021-06-19 14:08:25 | INFO | train_inner | epoch 003: 120 / 3002 loss=2.639, ppl=6.23, wps=5819.4, ups=0.09, wpb=64793, bsz=128, num_updates=6085, lr=9.99593e-05, gnorm=2.791, loss_scale=4, train_wall=11, gb_free=2.8, wall=70179
2021-06-19 14:08:36 | INFO | train_inner | epoch 003: 121 / 3002 loss=2.769, ppl=6.81, wps=5714.2, ups=0.09, wpb=64852, bsz=128, num_updates=6086, lr=9.99593e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=70190
2021-06-19 14:08:47 | INFO | train_inner | epoch 003: 122 / 3002 loss=2.69, ppl=6.45, wps=5881.9, ups=0.09, wpb=64790, bsz=128, num_updates=6087, lr=9.99593e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=70201
2021-06-19 14:08:58 | INFO | train_inner | epoch 003: 123 / 3002 loss=2.633, ppl=6.2, wps=5797.5, ups=0.09, wpb=64805, bsz=128, num_updates=6088, lr=9.99593e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=70213
2021-06-19 14:09:09 | INFO | train_inner | epoch 003: 124 / 3002 loss=2.549, ppl=5.85, wps=5989.2, ups=0.09, wpb=64802, bsz=128, num_updates=6089, lr=9.99593e-05, gnorm=2.243, loss_scale=4, train_wall=10, gb_free=2.8, wall=70223
2021-06-19 14:09:20 | INFO | train_inner | epoch 003: 125 / 3002 loss=2.699, ppl=6.49, wps=5761.3, ups=0.09, wpb=64800, bsz=128, num_updates=6090, lr=9.99593e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=70235
2021-06-19 14:09:32 | INFO | train_inner | epoch 003: 126 / 3002 loss=2.771, ppl=6.83, wps=5774.7, ups=0.09, wpb=64692, bsz=128, num_updates=6091, lr=9.99593e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=70246
2021-06-19 14:09:43 | INFO | train_inner | epoch 003: 127 / 3002 loss=2.857, ppl=7.24, wps=5846.5, ups=0.09, wpb=64827, bsz=128, num_updates=6092, lr=9.99593e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=70257
2021-06-19 14:09:54 | INFO | train_inner | epoch 003: 128 / 3002 loss=2.717, ppl=6.58, wps=5808.6, ups=0.09, wpb=64793, bsz=128, num_updates=6093, lr=9.99593e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=70268
2021-06-19 14:10:05 | INFO | train_inner | epoch 003: 129 / 3002 loss=2.78, ppl=6.87, wps=5772.9, ups=0.09, wpb=64783, bsz=128, num_updates=6094, lr=9.99592e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=70279
2021-06-19 14:10:16 | INFO | train_inner | epoch 003: 130 / 3002 loss=2.759, ppl=6.77, wps=5811.8, ups=0.09, wpb=64872, bsz=128, num_updates=6095, lr=9.99592e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=70290
2021-06-19 14:10:27 | INFO | train_inner | epoch 003: 131 / 3002 loss=2.673, ppl=6.38, wps=5886.5, ups=0.09, wpb=64852, bsz=128, num_updates=6096, lr=9.99592e-05, gnorm=3.962, loss_scale=4, train_wall=11, gb_free=2.8, wall=70302
2021-06-19 14:10:38 | INFO | train_inner | epoch 003: 132 / 3002 loss=2.562, ppl=5.91, wps=5889.1, ups=0.09, wpb=64809, bsz=128, num_updates=6097, lr=9.99592e-05, gnorm=2.217, loss_scale=4, train_wall=11, gb_free=2.8, wall=70313
2021-06-19 14:10:49 | INFO | train_inner | epoch 003: 133 / 3002 loss=2.65, ppl=6.28, wps=5908.8, ups=0.09, wpb=64879, bsz=128, num_updates=6098, lr=9.99592e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=70323
2021-06-19 14:11:00 | INFO | train_inner | epoch 003: 134 / 3002 loss=2.71, ppl=6.55, wps=5881.9, ups=0.09, wpb=64897, bsz=128, num_updates=6099, lr=9.99592e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=70335
2021-06-19 14:11:11 | INFO | train_inner | epoch 003: 135 / 3002 loss=2.669, ppl=6.36, wps=5827.7, ups=0.09, wpb=64813, bsz=128, num_updates=6100, lr=9.99592e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=70346
2021-06-19 14:11:22 | INFO | train_inner | epoch 003: 136 / 3002 loss=2.797, ppl=6.95, wps=5902.7, ups=0.09, wpb=64832, bsz=128, num_updates=6101, lr=9.99592e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=70357
2021-06-19 14:11:34 | INFO | train_inner | epoch 003: 137 / 3002 loss=2.675, ppl=6.39, wps=5762.8, ups=0.09, wpb=64808, bsz=128, num_updates=6102, lr=9.99592e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=70368
2021-06-19 14:11:45 | INFO | train_inner | epoch 003: 138 / 3002 loss=2.721, ppl=6.59, wps=5766.1, ups=0.09, wpb=64784, bsz=128, num_updates=6103, lr=9.99592e-05, gnorm=3.719, loss_scale=4, train_wall=11, gb_free=2.8, wall=70379
2021-06-19 14:11:56 | INFO | train_inner | epoch 003: 139 / 3002 loss=2.568, ppl=5.93, wps=5702.3, ups=0.09, wpb=64875, bsz=128, num_updates=6104, lr=9.99592e-05, gnorm=2.117, loss_scale=4, train_wall=11, gb_free=2.8, wall=70390
2021-06-19 14:12:07 | INFO | train_inner | epoch 003: 140 / 3002 loss=2.908, ppl=7.51, wps=5827.9, ups=0.09, wpb=64778, bsz=128, num_updates=6105, lr=9.99592e-05, gnorm=2.297, loss_scale=4, train_wall=11, gb_free=2.8, wall=70402
2021-06-19 14:12:19 | INFO | train_inner | epoch 003: 141 / 3002 loss=2.684, ppl=6.43, wps=5756.1, ups=0.09, wpb=64896, bsz=128, num_updates=6106, lr=9.99591e-05, gnorm=2.464, loss_scale=4, train_wall=11, gb_free=2.8, wall=70413
2021-06-19 14:12:30 | INFO | train_inner | epoch 003: 142 / 3002 loss=2.67, ppl=6.36, wps=5821.1, ups=0.09, wpb=64861, bsz=128, num_updates=6107, lr=9.99591e-05, gnorm=2.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=70424
2021-06-19 14:12:41 | INFO | train_inner | epoch 003: 143 / 3002 loss=2.876, ppl=7.34, wps=5925.3, ups=0.09, wpb=64726, bsz=128, num_updates=6108, lr=9.99591e-05, gnorm=2.423, loss_scale=4, train_wall=10, gb_free=2.8, wall=70435
2021-06-19 14:12:52 | INFO | train_inner | epoch 003: 144 / 3002 loss=2.689, ppl=6.45, wps=5753.4, ups=0.09, wpb=64847, bsz=128, num_updates=6109, lr=9.99591e-05, gnorm=2.391, loss_scale=4, train_wall=11, gb_free=2.8, wall=70446
2021-06-19 14:13:03 | INFO | train_inner | epoch 003: 145 / 3002 loss=2.686, ppl=6.44, wps=5922.6, ups=0.09, wpb=64810, bsz=128, num_updates=6110, lr=9.99591e-05, gnorm=2.418, loss_scale=8, train_wall=10, gb_free=2.8, wall=70457
2021-06-19 14:13:14 | INFO | train_inner | epoch 003: 146 / 3002 loss=2.674, ppl=6.38, wps=5827.1, ups=0.09, wpb=64850, bsz=128, num_updates=6111, lr=9.99591e-05, gnorm=2.122, loss_scale=8, train_wall=11, gb_free=2.8, wall=70468
2021-06-19 14:13:25 | INFO | train_inner | epoch 003: 147 / 3002 loss=2.968, ppl=7.82, wps=5856.6, ups=0.09, wpb=64758, bsz=128, num_updates=6112, lr=9.99591e-05, gnorm=2.66, loss_scale=8, train_wall=11, gb_free=2.8, wall=70479
2021-06-19 14:13:36 | INFO | train_inner | epoch 003: 148 / 3002 loss=2.813, ppl=7.03, wps=5854.6, ups=0.09, wpb=64780, bsz=128, num_updates=6113, lr=9.99591e-05, gnorm=2.37, loss_scale=8, train_wall=11, gb_free=2.8, wall=70490
2021-06-19 14:13:47 | INFO | train_inner | epoch 003: 149 / 3002 loss=2.76, ppl=6.78, wps=5754.4, ups=0.09, wpb=64794, bsz=128, num_updates=6114, lr=9.99591e-05, gnorm=2.287, loss_scale=8, train_wall=11, gb_free=2.8, wall=70502
2021-06-19 14:13:59 | INFO | train_inner | epoch 003: 150 / 3002 loss=2.689, ppl=6.45, wps=5793.8, ups=0.09, wpb=64916, bsz=128, num_updates=6115, lr=9.99591e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=70513
2021-06-19 14:14:09 | INFO | train_inner | epoch 003: 151 / 3002 loss=2.757, ppl=6.76, wps=5929.2, ups=0.09, wpb=64670, bsz=128, num_updates=6116, lr=9.99591e-05, gnorm=2.359, loss_scale=8, train_wall=10, gb_free=2.8, wall=70524
2021-06-19 14:14:21 | INFO | train_inner | epoch 003: 152 / 3002 loss=2.574, ppl=5.96, wps=5802.9, ups=0.09, wpb=64841, bsz=128, num_updates=6117, lr=9.99591e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=70535
2021-06-19 14:14:32 | INFO | train_inner | epoch 003: 153 / 3002 loss=2.713, ppl=6.56, wps=5937.9, ups=0.09, wpb=64821, bsz=128, num_updates=6118, lr=9.99591e-05, gnorm=2.451, loss_scale=8, train_wall=10, gb_free=2.8, wall=70546
2021-06-19 14:14:43 | INFO | train_inner | epoch 003: 154 / 3002 loss=2.611, ppl=6.11, wps=5803.4, ups=0.09, wpb=64813, bsz=128, num_updates=6119, lr=9.9959e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=70557
2021-06-19 14:14:54 | INFO | train_inner | epoch 003: 155 / 3002 loss=2.654, ppl=6.29, wps=5781.3, ups=0.09, wpb=64858, bsz=128, num_updates=6120, lr=9.9959e-05, gnorm=2.151, loss_scale=8, train_wall=11, gb_free=2.8, wall=70568
2021-06-19 14:15:05 | INFO | train_inner | epoch 003: 156 / 3002 loss=2.607, ppl=6.09, wps=5861, ups=0.09, wpb=64900, bsz=128, num_updates=6121, lr=9.9959e-05, gnorm=2.207, loss_scale=8, train_wall=11, gb_free=2.8, wall=70579
2021-06-19 14:15:16 | INFO | train_inner | epoch 003: 157 / 3002 loss=2.673, ppl=6.38, wps=5852.6, ups=0.09, wpb=64850, bsz=128, num_updates=6122, lr=9.9959e-05, gnorm=3.556, loss_scale=8, train_wall=11, gb_free=2.8, wall=70590
2021-06-19 14:15:27 | INFO | train_inner | epoch 003: 158 / 3002 loss=2.718, ppl=6.58, wps=5946.2, ups=0.09, wpb=64883, bsz=128, num_updates=6123, lr=9.9959e-05, gnorm=2.203, loss_scale=8, train_wall=10, gb_free=2.8, wall=70601
2021-06-19 14:15:38 | INFO | train_inner | epoch 003: 159 / 3002 loss=2.636, ppl=6.21, wps=5841.8, ups=0.09, wpb=64844, bsz=128, num_updates=6124, lr=9.9959e-05, gnorm=2.316, loss_scale=8, train_wall=11, gb_free=2.8, wall=70612
2021-06-19 14:15:49 | INFO | train_inner | epoch 003: 160 / 3002 loss=2.656, ppl=6.3, wps=5905.6, ups=0.09, wpb=64847, bsz=128, num_updates=6125, lr=9.9959e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=70623
2021-06-19 14:16:00 | INFO | train_inner | epoch 003: 161 / 3002 loss=2.766, ppl=6.8, wps=5919.8, ups=0.09, wpb=64801, bsz=128, num_updates=6126, lr=9.9959e-05, gnorm=2.32, loss_scale=8, train_wall=11, gb_free=2.8, wall=70634
2021-06-19 14:16:11 | INFO | train_inner | epoch 003: 162 / 3002 loss=2.647, ppl=6.26, wps=5820.7, ups=0.09, wpb=64778, bsz=128, num_updates=6127, lr=9.9959e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=70645
2021-06-19 14:16:22 | INFO | train_inner | epoch 003: 163 / 3002 loss=2.842, ppl=7.17, wps=5727.5, ups=0.09, wpb=64716, bsz=128, num_updates=6128, lr=9.9959e-05, gnorm=2.56, loss_scale=8, train_wall=11, gb_free=2.8, wall=70657
2021-06-19 14:16:34 | INFO | train_inner | epoch 003: 164 / 3002 loss=2.583, ppl=5.99, wps=5805.1, ups=0.09, wpb=64820, bsz=128, num_updates=6129, lr=9.9959e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=70668
2021-06-19 14:16:44 | INFO | train_inner | epoch 003: 165 / 3002 loss=2.645, ppl=6.26, wps=5949.9, ups=0.09, wpb=64555, bsz=128, num_updates=6130, lr=9.9959e-05, gnorm=2.48, loss_scale=8, train_wall=10, gb_free=2.8, wall=70679
2021-06-19 14:16:56 | INFO | train_inner | epoch 003: 166 / 3002 loss=2.625, ppl=6.17, wps=5848.7, ups=0.09, wpb=64855, bsz=128, num_updates=6131, lr=9.99589e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=70690
2021-06-19 14:17:07 | INFO | train_inner | epoch 003: 167 / 3002 loss=2.654, ppl=6.3, wps=5878.3, ups=0.09, wpb=64842, bsz=128, num_updates=6132, lr=9.99589e-05, gnorm=2.307, loss_scale=8, train_wall=11, gb_free=2.8, wall=70701
2021-06-19 14:17:18 | INFO | train_inner | epoch 003: 168 / 3002 loss=2.702, ppl=6.51, wps=5741.7, ups=0.09, wpb=64711, bsz=128, num_updates=6133, lr=9.99589e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=70712
2021-06-19 14:17:29 | INFO | train_inner | epoch 003: 169 / 3002 loss=2.712, ppl=6.55, wps=5746.8, ups=0.09, wpb=64723, bsz=128, num_updates=6134, lr=9.99589e-05, gnorm=2.789, loss_scale=8, train_wall=11, gb_free=2.8, wall=70723
2021-06-19 14:17:40 | INFO | train_inner | epoch 003: 170 / 3002 loss=2.67, ppl=6.36, wps=5848.3, ups=0.09, wpb=64879, bsz=128, num_updates=6135, lr=9.99589e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=70735
2021-06-19 14:17:51 | INFO | train_inner | epoch 003: 171 / 3002 loss=2.628, ppl=6.18, wps=5940.3, ups=0.09, wpb=64837, bsz=128, num_updates=6136, lr=9.99589e-05, gnorm=2.268, loss_scale=8, train_wall=10, gb_free=2.8, wall=70745
2021-06-19 14:18:02 | INFO | train_inner | epoch 003: 172 / 3002 loss=2.556, ppl=5.88, wps=5770.1, ups=0.09, wpb=64734, bsz=128, num_updates=6137, lr=9.99589e-05, gnorm=2.329, loss_scale=8, train_wall=11, gb_free=2.8, wall=70757
2021-06-19 14:18:13 | INFO | train_inner | epoch 003: 173 / 3002 loss=2.759, ppl=6.77, wps=5866.9, ups=0.09, wpb=64863, bsz=128, num_updates=6138, lr=9.99589e-05, gnorm=2.265, loss_scale=8, train_wall=11, gb_free=2.8, wall=70768
2021-06-19 14:18:25 | INFO | train_inner | epoch 003: 174 / 3002 loss=2.727, ppl=6.62, wps=5826, ups=0.09, wpb=64787, bsz=128, num_updates=6139, lr=9.99589e-05, gnorm=2.284, loss_scale=8, train_wall=11, gb_free=2.8, wall=70779
2021-06-19 14:18:35 | INFO | train_inner | epoch 003: 175 / 3002 loss=2.693, ppl=6.47, wps=5971.5, ups=0.09, wpb=64790, bsz=128, num_updates=6140, lr=9.99589e-05, gnorm=2.176, loss_scale=8, train_wall=10, gb_free=2.8, wall=70790
2021-06-19 14:18:46 | INFO | train_inner | epoch 003: 176 / 3002 loss=2.771, ppl=6.83, wps=5842.3, ups=0.09, wpb=64840, bsz=128, num_updates=6141, lr=9.99589e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=70801
2021-06-19 14:18:58 | INFO | train_inner | epoch 003: 177 / 3002 loss=2.772, ppl=6.83, wps=5823.2, ups=0.09, wpb=64814, bsz=128, num_updates=6142, lr=9.99589e-05, gnorm=2.296, loss_scale=8, train_wall=11, gb_free=2.8, wall=70812
2021-06-19 14:19:09 | INFO | train_inner | epoch 003: 178 / 3002 loss=2.687, ppl=6.44, wps=5852.4, ups=0.09, wpb=64807, bsz=128, num_updates=6143, lr=9.99589e-05, gnorm=2.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=70823
2021-06-19 14:19:20 | INFO | train_inner | epoch 003: 179 / 3002 loss=2.623, ppl=6.16, wps=5877.9, ups=0.09, wpb=64823, bsz=128, num_updates=6144, lr=9.99588e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=70834
2021-06-19 14:19:31 | INFO | train_inner | epoch 003: 180 / 3002 loss=2.627, ppl=6.18, wps=5922.1, ups=0.09, wpb=64810, bsz=128, num_updates=6145, lr=9.99588e-05, gnorm=2.209, loss_scale=8, train_wall=10, gb_free=2.8, wall=70845
2021-06-19 14:19:42 | INFO | train_inner | epoch 003: 181 / 3002 loss=2.875, ppl=7.34, wps=5869, ups=0.09, wpb=64755, bsz=128, num_updates=6146, lr=9.99588e-05, gnorm=2.25, loss_scale=8, train_wall=11, gb_free=2.8, wall=70856
2021-06-19 14:19:53 | INFO | train_inner | epoch 003: 182 / 3002 loss=2.638, ppl=6.22, wps=5910, ups=0.09, wpb=64864, bsz=128, num_updates=6147, lr=9.99588e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=70867
2021-06-19 14:20:04 | INFO | train_inner | epoch 003: 183 / 3002 loss=2.807, ppl=7, wps=5841.8, ups=0.09, wpb=64775, bsz=128, num_updates=6148, lr=9.99588e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=70878
2021-06-19 14:20:15 | INFO | train_inner | epoch 003: 184 / 3002 loss=2.604, ppl=6.08, wps=5861.3, ups=0.09, wpb=64893, bsz=128, num_updates=6149, lr=9.99588e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=70889
2021-06-19 14:20:26 | INFO | train_inner | epoch 003: 185 / 3002 loss=2.591, ppl=6.02, wps=5822.4, ups=0.09, wpb=64906, bsz=128, num_updates=6150, lr=9.99588e-05, gnorm=2.326, loss_scale=8, train_wall=11, gb_free=2.8, wall=70900
2021-06-19 14:20:37 | INFO | train_inner | epoch 003: 186 / 3002 loss=2.688, ppl=6.45, wps=5925.7, ups=0.09, wpb=64709, bsz=128, num_updates=6151, lr=9.99588e-05, gnorm=2.238, loss_scale=8, train_wall=10, gb_free=2.8, wall=70911
2021-06-19 14:20:48 | INFO | train_inner | epoch 003: 187 / 3002 loss=2.669, ppl=6.36, wps=5909.3, ups=0.09, wpb=64889, bsz=128, num_updates=6152, lr=9.99588e-05, gnorm=2.251, loss_scale=8, train_wall=10, gb_free=2.8, wall=70922
2021-06-19 14:20:59 | INFO | train_inner | epoch 003: 188 / 3002 loss=2.709, ppl=6.54, wps=5843.9, ups=0.09, wpb=64842, bsz=128, num_updates=6153, lr=9.99588e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=70933
2021-06-19 14:21:10 | INFO | train_inner | epoch 003: 189 / 3002 loss=2.425, ppl=5.37, wps=5970.8, ups=0.09, wpb=64899, bsz=128, num_updates=6154, lr=9.99588e-05, gnorm=2.206, loss_scale=8, train_wall=10, gb_free=2.8, wall=70944
2021-06-19 14:21:21 | INFO | train_inner | epoch 003: 190 / 3002 loss=2.863, ppl=7.28, wps=5836, ups=0.09, wpb=64810, bsz=128, num_updates=6155, lr=9.99588e-05, gnorm=2.216, loss_scale=8, train_wall=11, gb_free=2.8, wall=70955
2021-06-19 14:21:32 | INFO | train_inner | epoch 003: 191 / 3002 loss=2.631, ppl=6.19, wps=5741.7, ups=0.09, wpb=64850, bsz=128, num_updates=6156, lr=9.99587e-05, gnorm=2.272, loss_scale=8, train_wall=11, gb_free=2.8, wall=70967
2021-06-19 14:21:43 | INFO | train_inner | epoch 003: 192 / 3002 loss=2.613, ppl=6.12, wps=5869.4, ups=0.09, wpb=64799, bsz=128, num_updates=6157, lr=9.99587e-05, gnorm=2.309, loss_scale=8, train_wall=11, gb_free=2.8, wall=70978
2021-06-19 14:21:54 | INFO | train_inner | epoch 003: 193 / 3002 loss=2.672, ppl=6.37, wps=5793, ups=0.09, wpb=64850, bsz=128, num_updates=6158, lr=9.99587e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=70989
2021-06-19 14:22:05 | INFO | train_inner | epoch 003: 194 / 3002 loss=2.713, ppl=6.56, wps=5954.2, ups=0.09, wpb=64799, bsz=128, num_updates=6159, lr=9.99587e-05, gnorm=2.222, loss_scale=8, train_wall=10, gb_free=2.8, wall=71000
2021-06-19 14:22:16 | INFO | train_inner | epoch 003: 195 / 3002 loss=2.748, ppl=6.72, wps=5880, ups=0.09, wpb=64740, bsz=128, num_updates=6160, lr=9.99587e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=71011
2021-06-19 14:22:28 | INFO | train_inner | epoch 003: 196 / 3002 loss=2.68, ppl=6.41, wps=5806.3, ups=0.09, wpb=64822, bsz=128, num_updates=6161, lr=9.99587e-05, gnorm=2.069, loss_scale=8, train_wall=11, gb_free=2.8, wall=71022
2021-06-19 14:22:39 | INFO | train_inner | epoch 003: 197 / 3002 loss=2.638, ppl=6.22, wps=5913.1, ups=0.09, wpb=64899, bsz=128, num_updates=6162, lr=9.99587e-05, gnorm=2.241, loss_scale=8, train_wall=10, gb_free=2.8, wall=71033
2021-06-19 14:22:50 | INFO | train_inner | epoch 003: 198 / 3002 loss=2.671, ppl=6.37, wps=5868, ups=0.09, wpb=64824, bsz=128, num_updates=6163, lr=9.99587e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=71044
2021-06-19 14:23:01 | INFO | train_inner | epoch 003: 199 / 3002 loss=2.71, ppl=6.54, wps=5896.2, ups=0.09, wpb=64910, bsz=128, num_updates=6164, lr=9.99587e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=71055
2021-06-19 14:23:12 | INFO | train_inner | epoch 003: 200 / 3002 loss=2.648, ppl=6.27, wps=5778.3, ups=0.09, wpb=64846, bsz=128, num_updates=6165, lr=9.99587e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=71066
2021-06-19 14:23:23 | INFO | train_inner | epoch 003: 201 / 3002 loss=2.868, ppl=7.3, wps=5869.5, ups=0.09, wpb=64831, bsz=128, num_updates=6166, lr=9.99587e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=71077
2021-06-19 14:23:34 | INFO | train_inner | epoch 003: 202 / 3002 loss=2.597, ppl=6.05, wps=5862, ups=0.09, wpb=64858, bsz=128, num_updates=6167, lr=9.99587e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=71088
2021-06-19 14:23:45 | INFO | train_inner | epoch 003: 203 / 3002 loss=2.561, ppl=5.9, wps=5855.2, ups=0.09, wpb=64873, bsz=128, num_updates=6168, lr=9.99587e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=71099
2021-06-19 14:23:56 | INFO | train_inner | epoch 003: 204 / 3002 loss=2.55, ppl=5.86, wps=5707.2, ups=0.09, wpb=64814, bsz=128, num_updates=6169, lr=9.99586e-05, gnorm=2.134, loss_scale=8, train_wall=11, gb_free=2.8, wall=71111
2021-06-19 14:24:07 | INFO | train_inner | epoch 003: 205 / 3002 loss=2.777, ppl=6.85, wps=5964.2, ups=0.09, wpb=64881, bsz=128, num_updates=6170, lr=9.99586e-05, gnorm=2.334, loss_scale=8, train_wall=10, gb_free=2.8, wall=71122
2021-06-19 14:24:18 | INFO | train_inner | epoch 003: 206 / 3002 loss=2.748, ppl=6.72, wps=5816, ups=0.09, wpb=64789, bsz=128, num_updates=6171, lr=9.99586e-05, gnorm=2.327, loss_scale=8, train_wall=11, gb_free=2.8, wall=71133
2021-06-19 14:24:29 | INFO | train_inner | epoch 003: 207 / 3002 loss=2.45, ppl=5.47, wps=5898.3, ups=0.09, wpb=64853, bsz=128, num_updates=6172, lr=9.99586e-05, gnorm=2.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=71144
2021-06-19 14:24:40 | INFO | train_inner | epoch 003: 208 / 3002 loss=2.61, ppl=6.1, wps=5857.9, ups=0.09, wpb=64825, bsz=128, num_updates=6173, lr=9.99586e-05, gnorm=13, loss_scale=8, train_wall=11, gb_free=2.8, wall=71155
2021-06-19 14:24:51 | INFO | train_inner | epoch 003: 209 / 3002 loss=2.672, ppl=6.37, wps=5909.4, ups=0.09, wpb=64847, bsz=128, num_updates=6174, lr=9.99586e-05, gnorm=2.197, loss_scale=8, train_wall=10, gb_free=2.8, wall=71166
2021-06-19 14:25:03 | INFO | train_inner | epoch 003: 210 / 3002 loss=2.751, ppl=6.73, wps=5779.1, ups=0.09, wpb=64857, bsz=128, num_updates=6175, lr=9.99586e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=71177
2021-06-19 14:25:14 | INFO | train_inner | epoch 003: 211 / 3002 loss=2.77, ppl=6.82, wps=5820.4, ups=0.09, wpb=64859, bsz=128, num_updates=6176, lr=9.99586e-05, gnorm=2.38, loss_scale=8, train_wall=11, gb_free=2.8, wall=71188
2021-06-19 14:25:25 | INFO | train_inner | epoch 003: 212 / 3002 loss=2.672, ppl=6.37, wps=5790.9, ups=0.09, wpb=64772, bsz=128, num_updates=6177, lr=9.99586e-05, gnorm=2.255, loss_scale=8, train_wall=11, gb_free=2.8, wall=71199
2021-06-19 14:25:36 | INFO | train_inner | epoch 003: 213 / 3002 loss=2.534, ppl=5.79, wps=5906.6, ups=0.09, wpb=64875, bsz=128, num_updates=6178, lr=9.99586e-05, gnorm=2.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=71210
2021-06-19 14:25:47 | INFO | train_inner | epoch 003: 214 / 3002 loss=2.747, ppl=6.71, wps=5873.7, ups=0.09, wpb=64885, bsz=128, num_updates=6179, lr=9.99586e-05, gnorm=2.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=71221
2021-06-19 14:25:58 | INFO | train_inner | epoch 003: 215 / 3002 loss=2.495, ppl=5.64, wps=5769, ups=0.09, wpb=64775, bsz=128, num_updates=6180, lr=9.99586e-05, gnorm=2.464, loss_scale=8, train_wall=11, gb_free=2.8, wall=71233
2021-06-19 14:26:09 | INFO | train_inner | epoch 003: 216 / 3002 loss=2.716, ppl=6.57, wps=5850.4, ups=0.09, wpb=64821, bsz=128, num_updates=6181, lr=9.99585e-05, gnorm=3.465, loss_scale=8, train_wall=11, gb_free=2.8, wall=71244
2021-06-19 14:26:20 | INFO | train_inner | epoch 003: 217 / 3002 loss=2.652, ppl=6.29, wps=5845.3, ups=0.09, wpb=64874, bsz=128, num_updates=6182, lr=9.99585e-05, gnorm=2.269, loss_scale=8, train_wall=11, gb_free=2.8, wall=71255
2021-06-19 14:26:32 | INFO | train_inner | epoch 003: 218 / 3002 loss=2.691, ppl=6.46, wps=5789.3, ups=0.09, wpb=64839, bsz=128, num_updates=6183, lr=9.99585e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=71266
2021-06-19 14:26:42 | INFO | train_inner | epoch 003: 219 / 3002 loss=2.808, ppl=7, wps=5975.5, ups=0.09, wpb=64790, bsz=128, num_updates=6184, lr=9.99585e-05, gnorm=2.121, loss_scale=8, train_wall=10, gb_free=2.8, wall=71277
2021-06-19 14:26:53 | INFO | train_inner | epoch 003: 220 / 3002 loss=2.548, ppl=5.85, wps=5866.4, ups=0.09, wpb=64817, bsz=128, num_updates=6185, lr=9.99585e-05, gnorm=2.207, loss_scale=8, train_wall=11, gb_free=2.8, wall=71288
2021-06-19 14:27:05 | INFO | train_inner | epoch 003: 221 / 3002 loss=2.627, ppl=6.18, wps=5863.4, ups=0.09, wpb=64783, bsz=128, num_updates=6186, lr=9.99585e-05, gnorm=2.137, loss_scale=8, train_wall=11, gb_free=2.8, wall=71299
2021-06-19 14:27:16 | INFO | train_inner | epoch 003: 222 / 3002 loss=2.682, ppl=6.42, wps=5720, ups=0.09, wpb=64859, bsz=128, num_updates=6187, lr=9.99585e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=71310
2021-06-19 14:27:27 | INFO | train_inner | epoch 003: 223 / 3002 loss=2.895, ppl=7.44, wps=5913.7, ups=0.09, wpb=64736, bsz=128, num_updates=6188, lr=9.99585e-05, gnorm=2.427, loss_scale=8, train_wall=10, gb_free=2.8, wall=71321
2021-06-19 14:27:38 | INFO | train_inner | epoch 003: 224 / 3002 loss=2.645, ppl=6.26, wps=5936, ups=0.09, wpb=64817, bsz=128, num_updates=6189, lr=9.99585e-05, gnorm=2.355, loss_scale=8, train_wall=10, gb_free=2.8, wall=71332
2021-06-19 14:27:49 | INFO | train_inner | epoch 003: 225 / 3002 loss=2.694, ppl=6.47, wps=5795.3, ups=0.09, wpb=64809, bsz=128, num_updates=6190, lr=9.99585e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=71343
2021-06-19 14:28:00 | INFO | train_inner | epoch 003: 226 / 3002 loss=2.554, ppl=5.87, wps=5904.9, ups=0.09, wpb=64802, bsz=128, num_updates=6191, lr=9.99585e-05, gnorm=2.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=71354
2021-06-19 14:28:11 | INFO | train_inner | epoch 003: 227 / 3002 loss=2.673, ppl=6.38, wps=5836.7, ups=0.09, wpb=64698, bsz=128, num_updates=6192, lr=9.99585e-05, gnorm=2.157, loss_scale=8, train_wall=11, gb_free=2.8, wall=71365
2021-06-19 14:28:22 | INFO | train_inner | epoch 003: 228 / 3002 loss=2.616, ppl=6.13, wps=5856.4, ups=0.09, wpb=64760, bsz=128, num_updates=6193, lr=9.99585e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=71376
2021-06-19 14:28:33 | INFO | train_inner | epoch 003: 229 / 3002 loss=2.687, ppl=6.44, wps=5913.3, ups=0.09, wpb=64813, bsz=128, num_updates=6194, lr=9.99584e-05, gnorm=5.726, loss_scale=8, train_wall=11, gb_free=2.8, wall=71387
2021-06-19 14:28:44 | INFO | train_inner | epoch 003: 230 / 3002 loss=2.584, ppl=6, wps=5844, ups=0.09, wpb=64930, bsz=128, num_updates=6195, lr=9.99584e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=71398
2021-06-19 14:28:55 | INFO | train_inner | epoch 003: 231 / 3002 loss=2.702, ppl=6.51, wps=5858.5, ups=0.09, wpb=64891, bsz=128, num_updates=6196, lr=9.99584e-05, gnorm=2.245, loss_scale=8, train_wall=11, gb_free=2.8, wall=71410
2021-06-19 14:29:06 | INFO | train_inner | epoch 003: 232 / 3002 loss=2.718, ppl=6.58, wps=5806.1, ups=0.09, wpb=64785, bsz=128, num_updates=6197, lr=9.99584e-05, gnorm=2.17, loss_scale=8, train_wall=11, gb_free=2.8, wall=71421
2021-06-19 14:29:17 | INFO | train_inner | epoch 003: 233 / 3002 loss=2.693, ppl=6.47, wps=5892.9, ups=0.09, wpb=64770, bsz=128, num_updates=6198, lr=9.99584e-05, gnorm=2.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=71432
2021-06-19 14:29:28 | INFO | train_inner | epoch 003: 234 / 3002 loss=2.608, ppl=6.1, wps=5873.6, ups=0.09, wpb=64847, bsz=128, num_updates=6199, lr=9.99584e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=71443
2021-06-19 14:29:39 | INFO | train_inner | epoch 003: 235 / 3002 loss=2.645, ppl=6.26, wps=5926.6, ups=0.09, wpb=64805, bsz=128, num_updates=6200, lr=9.99584e-05, gnorm=2.105, loss_scale=8, train_wall=10, gb_free=2.8, wall=71454
2021-06-19 14:29:50 | INFO | train_inner | epoch 003: 236 / 3002 loss=2.756, ppl=6.75, wps=5872.7, ups=0.09, wpb=64791, bsz=128, num_updates=6201, lr=9.99584e-05, gnorm=3.095, loss_scale=8, train_wall=11, gb_free=2.8, wall=71465
2021-06-19 14:30:01 | INFO | train_inner | epoch 003: 237 / 3002 loss=2.791, ppl=6.92, wps=5835.6, ups=0.09, wpb=64769, bsz=128, num_updates=6202, lr=9.99584e-05, gnorm=2.391, loss_scale=8, train_wall=11, gb_free=2.8, wall=71476
2021-06-19 14:30:12 | INFO | train_inner | epoch 003: 238 / 3002 loss=2.444, ppl=5.44, wps=5912.8, ups=0.09, wpb=64837, bsz=128, num_updates=6203, lr=9.99584e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=71487
2021-06-19 14:30:24 | INFO | train_inner | epoch 003: 239 / 3002 loss=2.656, ppl=6.3, wps=5758.3, ups=0.09, wpb=64823, bsz=128, num_updates=6204, lr=9.99584e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=71498
2021-06-19 14:30:35 | INFO | train_inner | epoch 003: 240 / 3002 loss=2.653, ppl=6.29, wps=5802, ups=0.09, wpb=64791, bsz=128, num_updates=6205, lr=9.99584e-05, gnorm=2.223, loss_scale=8, train_wall=11, gb_free=2.8, wall=71509
2021-06-19 14:30:46 | INFO | train_inner | epoch 003: 241 / 3002 loss=2.771, ppl=6.83, wps=5849.3, ups=0.09, wpb=64808, bsz=128, num_updates=6206, lr=9.99583e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=71520
2021-06-19 14:30:57 | INFO | train_inner | epoch 003: 242 / 3002 loss=2.97, ppl=7.84, wps=5814.6, ups=0.09, wpb=64725, bsz=128, num_updates=6207, lr=9.99583e-05, gnorm=2.35, loss_scale=8, train_wall=11, gb_free=2.8, wall=71531
2021-06-19 14:31:08 | INFO | train_inner | epoch 003: 243 / 3002 loss=2.803, ppl=6.98, wps=5894.6, ups=0.09, wpb=64768, bsz=128, num_updates=6208, lr=9.99583e-05, gnorm=2.263, loss_scale=8, train_wall=11, gb_free=2.8, wall=71542
2021-06-19 14:31:19 | INFO | train_inner | epoch 003: 244 / 3002 loss=2.676, ppl=6.39, wps=5737.7, ups=0.09, wpb=64770, bsz=128, num_updates=6209, lr=9.99583e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=71554
2021-06-19 14:31:30 | INFO | train_inner | epoch 003: 245 / 3002 loss=2.7, ppl=6.5, wps=5984.2, ups=0.09, wpb=64861, bsz=128, num_updates=6210, lr=9.99583e-05, gnorm=2.2, loss_scale=8, train_wall=10, gb_free=2.8, wall=71564
2021-06-19 14:31:41 | INFO | train_inner | epoch 003: 246 / 3002 loss=2.888, ppl=7.4, wps=5798.6, ups=0.09, wpb=64854, bsz=128, num_updates=6211, lr=9.99583e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=71576
2021-06-19 14:31:52 | INFO | train_inner | epoch 003: 247 / 3002 loss=2.676, ppl=6.39, wps=5862.3, ups=0.09, wpb=64793, bsz=128, num_updates=6212, lr=9.99583e-05, gnorm=4.334, loss_scale=8, train_wall=11, gb_free=2.8, wall=71587
2021-06-19 14:32:04 | INFO | train_inner | epoch 003: 248 / 3002 loss=2.609, ppl=6.1, wps=5793.6, ups=0.09, wpb=64872, bsz=128, num_updates=6213, lr=9.99583e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=71598
2021-06-19 14:32:15 | INFO | train_inner | epoch 003: 249 / 3002 loss=2.576, ppl=5.96, wps=5829.8, ups=0.09, wpb=64872, bsz=128, num_updates=6214, lr=9.99583e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=71609
2021-06-19 14:32:26 | INFO | train_inner | epoch 003: 250 / 3002 loss=2.791, ppl=6.92, wps=5886.7, ups=0.09, wpb=64923, bsz=128, num_updates=6215, lr=9.99583e-05, gnorm=2.349, loss_scale=8, train_wall=11, gb_free=2.8, wall=71620
2021-06-19 14:32:37 | INFO | train_inner | epoch 003: 251 / 3002 loss=2.679, ppl=6.4, wps=5850.7, ups=0.09, wpb=64829, bsz=128, num_updates=6216, lr=9.99583e-05, gnorm=2.42, loss_scale=8, train_wall=11, gb_free=2.8, wall=71631
2021-06-19 14:32:48 | INFO | train_inner | epoch 003: 252 / 3002 loss=2.573, ppl=5.95, wps=5854.4, ups=0.09, wpb=64865, bsz=128, num_updates=6217, lr=9.99583e-05, gnorm=2.354, loss_scale=8, train_wall=11, gb_free=2.8, wall=71642
2021-06-19 14:32:59 | INFO | train_inner | epoch 003: 253 / 3002 loss=2.634, ppl=6.21, wps=5763.9, ups=0.09, wpb=64816, bsz=128, num_updates=6218, lr=9.99583e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=71653
2021-06-19 14:33:10 | INFO | train_inner | epoch 003: 254 / 3002 loss=2.765, ppl=6.8, wps=5840, ups=0.09, wpb=64809, bsz=128, num_updates=6219, lr=9.99582e-05, gnorm=3.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=71665
2021-06-19 14:33:21 | INFO | train_inner | epoch 003: 255 / 3002 loss=2.822, ppl=7.07, wps=5799.4, ups=0.09, wpb=64801, bsz=128, num_updates=6220, lr=9.99582e-05, gnorm=2.12, loss_scale=8, train_wall=11, gb_free=2.8, wall=71676
2021-06-19 14:33:32 | INFO | train_inner | epoch 003: 256 / 3002 loss=2.701, ppl=6.5, wps=5884.1, ups=0.09, wpb=64804, bsz=128, num_updates=6221, lr=9.99582e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=71687
2021-06-19 14:33:43 | INFO | train_inner | epoch 003: 257 / 3002 loss=2.662, ppl=6.33, wps=5874.5, ups=0.09, wpb=64842, bsz=128, num_updates=6222, lr=9.99582e-05, gnorm=2.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=71698
2021-06-19 14:33:55 | INFO | train_inner | epoch 003: 258 / 3002 loss=2.574, ppl=5.96, wps=5877.6, ups=0.09, wpb=64874, bsz=128, num_updates=6223, lr=9.99582e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=71709
2021-06-19 14:34:05 | INFO | train_inner | epoch 003: 259 / 3002 loss=2.594, ppl=6.04, wps=5933, ups=0.09, wpb=64768, bsz=128, num_updates=6224, lr=9.99582e-05, gnorm=2.084, loss_scale=8, train_wall=10, gb_free=2.8, wall=71720
2021-06-19 14:34:17 | INFO | train_inner | epoch 003: 260 / 3002 loss=2.675, ppl=6.39, wps=5838, ups=0.09, wpb=64834, bsz=128, num_updates=6225, lr=9.99582e-05, gnorm=2.264, loss_scale=8, train_wall=11, gb_free=2.8, wall=71731
2021-06-19 14:34:27 | INFO | train_inner | epoch 003: 261 / 3002 loss=2.633, ppl=6.2, wps=5934.5, ups=0.09, wpb=64907, bsz=128, num_updates=6226, lr=9.99582e-05, gnorm=2.183, loss_scale=8, train_wall=10, gb_free=2.8, wall=71742
2021-06-19 14:34:38 | INFO | train_inner | epoch 003: 262 / 3002 loss=2.641, ppl=6.24, wps=5894.6, ups=0.09, wpb=64836, bsz=128, num_updates=6227, lr=9.99582e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=71753
2021-06-19 14:34:50 | INFO | train_inner | epoch 003: 263 / 3002 loss=2.561, ppl=5.9, wps=5822.7, ups=0.09, wpb=64863, bsz=128, num_updates=6228, lr=9.99582e-05, gnorm=2.186, loss_scale=8, train_wall=11, gb_free=2.8, wall=71764
2021-06-19 14:35:01 | INFO | train_inner | epoch 003: 264 / 3002 loss=2.613, ppl=6.12, wps=5738.8, ups=0.09, wpb=64798, bsz=128, num_updates=6229, lr=9.99582e-05, gnorm=2.165, loss_scale=8, train_wall=11, gb_free=2.8, wall=71775
2021-06-19 14:35:12 | INFO | train_inner | epoch 003: 265 / 3002 loss=2.851, ppl=7.21, wps=5768.3, ups=0.09, wpb=64851, bsz=128, num_updates=6230, lr=9.99582e-05, gnorm=2.313, loss_scale=8, train_wall=11, gb_free=2.8, wall=71786
2021-06-19 14:35:23 | INFO | train_inner | epoch 003: 266 / 3002 loss=2.631, ppl=6.19, wps=5828.1, ups=0.09, wpb=64851, bsz=128, num_updates=6231, lr=9.99581e-05, gnorm=2.278, loss_scale=8, train_wall=11, gb_free=2.8, wall=71798
2021-06-19 14:35:34 | INFO | train_inner | epoch 003: 267 / 3002 loss=2.734, ppl=6.65, wps=5818, ups=0.09, wpb=64830, bsz=128, num_updates=6232, lr=9.99581e-05, gnorm=2.254, loss_scale=8, train_wall=11, gb_free=2.8, wall=71809
2021-06-19 14:35:45 | INFO | train_inner | epoch 003: 268 / 3002 loss=2.574, ppl=5.95, wps=5884.5, ups=0.09, wpb=64889, bsz=128, num_updates=6233, lr=9.99581e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=71820
2021-06-19 14:35:56 | INFO | train_inner | epoch 003: 269 / 3002 loss=2.553, ppl=5.87, wps=5950.7, ups=0.09, wpb=64908, bsz=128, num_updates=6234, lr=9.99581e-05, gnorm=2.243, loss_scale=8, train_wall=10, gb_free=2.8, wall=71831
2021-06-19 14:36:08 | INFO | train_inner | epoch 003: 270 / 3002 loss=2.606, ppl=6.09, wps=5807.6, ups=0.09, wpb=64841, bsz=128, num_updates=6235, lr=9.99581e-05, gnorm=2.362, loss_scale=8, train_wall=11, gb_free=2.8, wall=71842
2021-06-19 14:36:19 | INFO | train_inner | epoch 003: 271 / 3002 loss=2.906, ppl=7.49, wps=5740.9, ups=0.09, wpb=64805, bsz=128, num_updates=6236, lr=9.99581e-05, gnorm=2.222, loss_scale=8, train_wall=11, gb_free=2.8, wall=71853
2021-06-19 14:36:30 | INFO | train_inner | epoch 003: 272 / 3002 loss=2.769, ppl=6.81, wps=5736.4, ups=0.09, wpb=64782, bsz=128, num_updates=6237, lr=9.99581e-05, gnorm=2.104, loss_scale=8, train_wall=11, gb_free=2.8, wall=71864
2021-06-19 14:36:41 | INFO | train_inner | epoch 003: 273 / 3002 loss=2.731, ppl=6.64, wps=5752.1, ups=0.09, wpb=64847, bsz=128, num_updates=6238, lr=9.99581e-05, gnorm=2.184, loss_scale=16, train_wall=11, gb_free=2.8, wall=71876
2021-06-19 14:36:52 | INFO | train_inner | epoch 003: 274 / 3002 loss=2.741, ppl=6.68, wps=5851, ups=0.09, wpb=64853, bsz=128, num_updates=6239, lr=9.99581e-05, gnorm=2.189, loss_scale=16, train_wall=11, gb_free=2.8, wall=71887
2021-06-19 14:37:03 | INFO | train_inner | epoch 003: 275 / 3002 loss=2.556, ppl=5.88, wps=5975.7, ups=0.09, wpb=64799, bsz=128, num_updates=6240, lr=9.99581e-05, gnorm=2.167, loss_scale=16, train_wall=10, gb_free=2.8, wall=71898
2021-06-19 14:37:14 | INFO | train_inner | epoch 003: 276 / 3002 loss=2.705, ppl=6.52, wps=5780.5, ups=0.09, wpb=64723, bsz=128, num_updates=6241, lr=9.99581e-05, gnorm=2.172, loss_scale=16, train_wall=11, gb_free=2.8, wall=71909
2021-06-19 14:37:26 | INFO | train_inner | epoch 003: 277 / 3002 loss=2.525, ppl=5.76, wps=5685.1, ups=0.09, wpb=64867, bsz=128, num_updates=6242, lr=9.99581e-05, gnorm=2.112, loss_scale=16, train_wall=11, gb_free=2.8, wall=71920
2021-06-19 14:37:37 | INFO | train_inner | epoch 003: 278 / 3002 loss=2.689, ppl=6.45, wps=5973, ups=0.09, wpb=64885, bsz=128, num_updates=6243, lr=9.99581e-05, gnorm=2.208, loss_scale=16, train_wall=10, gb_free=2.8, wall=71931
2021-06-19 14:37:48 | INFO | train_inner | epoch 003: 279 / 3002 loss=2.871, ppl=7.32, wps=5756.2, ups=0.09, wpb=64820, bsz=128, num_updates=6244, lr=9.9958e-05, gnorm=2.626, loss_scale=16, train_wall=11, gb_free=2.8, wall=71942
2021-06-19 14:37:59 | INFO | train_inner | epoch 003: 280 / 3002 loss=2.563, ppl=5.91, wps=5921.4, ups=0.09, wpb=64883, bsz=128, num_updates=6245, lr=9.9958e-05, gnorm=2.428, loss_scale=16, train_wall=10, gb_free=2.8, wall=71953
2021-06-19 14:38:10 | INFO | train_inner | epoch 003: 281 / 3002 loss=2.641, ppl=6.24, wps=5784.4, ups=0.09, wpb=64869, bsz=128, num_updates=6246, lr=9.9958e-05, gnorm=2.761, loss_scale=16, train_wall=11, gb_free=2.8, wall=71965
2021-06-19 14:38:21 | INFO | train_inner | epoch 003: 282 / 3002 loss=2.813, ppl=7.03, wps=5933, ups=0.09, wpb=64779, bsz=128, num_updates=6247, lr=9.9958e-05, gnorm=2.241, loss_scale=16, train_wall=10, gb_free=2.8, wall=71975
2021-06-19 14:38:32 | INFO | train_inner | epoch 003: 283 / 3002 loss=2.717, ppl=6.58, wps=5888, ups=0.09, wpb=64884, bsz=128, num_updates=6248, lr=9.9958e-05, gnorm=2.202, loss_scale=16, train_wall=11, gb_free=2.8, wall=71986
2021-06-19 14:38:43 | INFO | train_inner | epoch 003: 284 / 3002 loss=2.751, ppl=6.73, wps=5810, ups=0.09, wpb=64834, bsz=128, num_updates=6249, lr=9.9958e-05, gnorm=2.246, loss_scale=16, train_wall=11, gb_free=2.8, wall=71998
2021-06-19 14:38:54 | INFO | train_inner | epoch 003: 285 / 3002 loss=2.471, ppl=5.54, wps=5919.2, ups=0.09, wpb=64900, bsz=128, num_updates=6250, lr=9.9958e-05, gnorm=2.11, loss_scale=16, train_wall=10, gb_free=2.8, wall=72009
2021-06-19 14:39:05 | INFO | train_inner | epoch 003: 286 / 3002 loss=2.723, ppl=6.6, wps=5876.9, ups=0.09, wpb=64733, bsz=128, num_updates=6251, lr=9.9958e-05, gnorm=2.142, loss_scale=16, train_wall=11, gb_free=2.8, wall=72020
2021-06-19 14:39:16 | INFO | train_inner | epoch 003: 287 / 3002 loss=2.764, ppl=6.79, wps=5847.7, ups=0.09, wpb=64859, bsz=128, num_updates=6252, lr=9.9958e-05, gnorm=2.327, loss_scale=16, train_wall=11, gb_free=2.8, wall=72031
2021-06-19 14:39:27 | INFO | train_inner | epoch 003: 288 / 3002 loss=2.625, ppl=6.17, wps=5832.1, ups=0.09, wpb=64753, bsz=128, num_updates=6253, lr=9.9958e-05, gnorm=2.354, loss_scale=16, train_wall=11, gb_free=2.8, wall=72042
2021-06-19 14:39:39 | INFO | train_inner | epoch 003: 289 / 3002 loss=2.786, ppl=6.9, wps=5852.7, ups=0.09, wpb=64862, bsz=128, num_updates=6254, lr=9.9958e-05, gnorm=2.251, loss_scale=16, train_wall=11, gb_free=2.8, wall=72053
2021-06-19 14:39:50 | INFO | train_inner | epoch 003: 290 / 3002 loss=2.787, ppl=6.9, wps=5828.1, ups=0.09, wpb=64767, bsz=128, num_updates=6255, lr=9.9958e-05, gnorm=2.242, loss_scale=16, train_wall=11, gb_free=2.8, wall=72064
2021-06-19 14:40:01 | INFO | train_inner | epoch 003: 291 / 3002 loss=2.67, ppl=6.37, wps=5916, ups=0.09, wpb=64914, bsz=128, num_updates=6256, lr=9.99579e-05, gnorm=2.139, loss_scale=16, train_wall=10, gb_free=2.8, wall=72075
2021-06-19 14:40:12 | INFO | train_inner | epoch 003: 292 / 3002 loss=2.687, ppl=6.44, wps=5776.2, ups=0.09, wpb=64754, bsz=128, num_updates=6257, lr=9.99579e-05, gnorm=2.102, loss_scale=16, train_wall=11, gb_free=2.8, wall=72086
2021-06-19 14:40:23 | INFO | train_inner | epoch 003: 293 / 3002 loss=2.575, ppl=5.96, wps=5805.9, ups=0.09, wpb=64812, bsz=128, num_updates=6258, lr=9.99579e-05, gnorm=2.246, loss_scale=16, train_wall=11, gb_free=2.8, wall=72097
2021-06-19 14:40:34 | INFO | train_inner | epoch 003: 294 / 3002 loss=2.687, ppl=6.44, wps=5813.3, ups=0.09, wpb=64860, bsz=128, num_updates=6259, lr=9.99579e-05, gnorm=2.269, loss_scale=16, train_wall=11, gb_free=2.8, wall=72109
2021-06-19 14:40:45 | INFO | train_inner | epoch 003: 295 / 3002 loss=2.693, ppl=6.46, wps=5760.7, ups=0.09, wpb=64840, bsz=128, num_updates=6260, lr=9.99579e-05, gnorm=2.81, loss_scale=16, train_wall=11, gb_free=2.8, wall=72120
2021-06-19 14:40:56 | INFO | train_inner | epoch 003: 296 / 3002 loss=2.657, ppl=6.31, wps=5946.4, ups=0.09, wpb=64845, bsz=128, num_updates=6261, lr=9.99579e-05, gnorm=2.234, loss_scale=16, train_wall=10, gb_free=2.8, wall=72131
2021-06-19 14:41:07 | INFO | train_inner | epoch 003: 297 / 3002 loss=2.701, ppl=6.5, wps=5831.5, ups=0.09, wpb=64897, bsz=128, num_updates=6262, lr=9.99579e-05, gnorm=2.364, loss_scale=16, train_wall=11, gb_free=2.8, wall=72142
2021-06-19 14:41:19 | INFO | train_inner | epoch 003: 298 / 3002 loss=2.608, ppl=6.1, wps=5821.3, ups=0.09, wpb=64789, bsz=128, num_updates=6263, lr=9.99579e-05, gnorm=2.128, loss_scale=16, train_wall=11, gb_free=2.8, wall=72153
2021-06-19 14:41:30 | INFO | train_inner | epoch 003: 299 / 3002 loss=2.577, ppl=5.97, wps=5808.5, ups=0.09, wpb=64867, bsz=128, num_updates=6264, lr=9.99579e-05, gnorm=3.39, loss_scale=16, train_wall=11, gb_free=2.8, wall=72164
2021-06-19 14:41:41 | INFO | train_inner | epoch 003: 300 / 3002 loss=2.779, ppl=6.86, wps=5884.7, ups=0.09, wpb=64845, bsz=128, num_updates=6265, lr=9.99579e-05, gnorm=3.805, loss_scale=16, train_wall=11, gb_free=2.8, wall=72175
2021-06-19 14:41:52 | INFO | train_inner | epoch 003: 301 / 3002 loss=2.715, ppl=6.56, wps=5737.8, ups=0.09, wpb=64846, bsz=128, num_updates=6266, lr=9.99579e-05, gnorm=2.178, loss_scale=16, train_wall=11, gb_free=2.8, wall=72186
2021-06-19 14:42:03 | INFO | train_inner | epoch 003: 302 / 3002 loss=2.641, ppl=6.24, wps=5951, ups=0.09, wpb=64840, bsz=128, num_updates=6267, lr=9.99579e-05, gnorm=2.124, loss_scale=16, train_wall=10, gb_free=2.8, wall=72197
2021-06-19 14:42:14 | INFO | train_inner | epoch 003: 303 / 3002 loss=2.81, ppl=7.01, wps=5779, ups=0.09, wpb=64847, bsz=128, num_updates=6268, lr=9.99579e-05, gnorm=2.281, loss_scale=16, train_wall=11, gb_free=2.8, wall=72209
2021-06-19 14:42:25 | INFO | train_inner | epoch 003: 304 / 3002 loss=2.63, ppl=6.19, wps=5806.4, ups=0.09, wpb=64771, bsz=128, num_updates=6269, lr=9.99578e-05, gnorm=2.101, loss_scale=16, train_wall=11, gb_free=2.8, wall=72220
2021-06-19 14:42:37 | INFO | train_inner | epoch 003: 305 / 3002 loss=2.574, ppl=5.96, wps=5800.7, ups=0.09, wpb=64786, bsz=128, num_updates=6270, lr=9.99578e-05, gnorm=2.19, loss_scale=16, train_wall=11, gb_free=2.8, wall=72231
2021-06-19 14:42:47 | INFO | train_inner | epoch 003: 306 / 3002 loss=2.641, ppl=6.24, wps=5927.1, ups=0.09, wpb=64762, bsz=128, num_updates=6271, lr=9.99578e-05, gnorm=2.225, loss_scale=16, train_wall=10, gb_free=2.8, wall=72242
2021-06-19 14:42:58 | INFO | train_inner | epoch 003: 307 / 3002 loss=2.71, ppl=6.54, wps=5949.7, ups=0.09, wpb=64901, bsz=128, num_updates=6272, lr=9.99578e-05, gnorm=2.234, loss_scale=16, train_wall=10, gb_free=2.8, wall=72253
2021-06-19 14:43:09 | INFO | train_inner | epoch 003: 308 / 3002 loss=2.679, ppl=6.4, wps=5935.6, ups=0.09, wpb=64834, bsz=128, num_updates=6273, lr=9.99578e-05, gnorm=2.869, loss_scale=16, train_wall=10, gb_free=2.8, wall=72264
2021-06-19 14:43:20 | INFO | train_inner | epoch 003: 309 / 3002 loss=2.832, ppl=7.12, wps=5900.6, ups=0.09, wpb=64793, bsz=128, num_updates=6274, lr=9.99578e-05, gnorm=2.224, loss_scale=16, train_wall=11, gb_free=2.8, wall=72275
2021-06-19 14:43:31 | INFO | train_inner | epoch 003: 310 / 3002 loss=2.793, ppl=6.93, wps=5828.9, ups=0.09, wpb=64797, bsz=128, num_updates=6275, lr=9.99578e-05, gnorm=2.179, loss_scale=16, train_wall=11, gb_free=2.8, wall=72286
2021-06-19 14:43:42 | INFO | train_inner | epoch 003: 311 / 3002 loss=2.75, ppl=6.73, wps=5852, ups=0.09, wpb=64774, bsz=128, num_updates=6276, lr=9.99578e-05, gnorm=2.158, loss_scale=16, train_wall=11, gb_free=2.8, wall=72297
2021-06-19 14:43:54 | INFO | train_inner | epoch 003: 312 / 3002 loss=2.599, ppl=6.06, wps=5736.8, ups=0.09, wpb=64855, bsz=128, num_updates=6277, lr=9.99578e-05, gnorm=2.237, loss_scale=16, train_wall=11, gb_free=2.8, wall=72308
2021-06-19 14:44:05 | INFO | train_inner | epoch 003: 313 / 3002 loss=2.684, ppl=6.43, wps=5853.8, ups=0.09, wpb=64803, bsz=128, num_updates=6278, lr=9.99578e-05, gnorm=2.181, loss_scale=16, train_wall=11, gb_free=2.8, wall=72319
2021-06-19 14:44:16 | INFO | train_inner | epoch 003: 314 / 3002 loss=2.788, ppl=6.9, wps=5924, ups=0.09, wpb=64785, bsz=128, num_updates=6279, lr=9.99578e-05, gnorm=2.337, loss_scale=16, train_wall=10, gb_free=2.8, wall=72330
2021-06-19 14:44:27 | INFO | train_inner | epoch 003: 315 / 3002 loss=2.794, ppl=6.94, wps=5758.1, ups=0.09, wpb=64828, bsz=128, num_updates=6280, lr=9.99578e-05, gnorm=2.16, loss_scale=16, train_wall=11, gb_free=2.8, wall=72341
2021-06-19 14:44:38 | INFO | train_inner | epoch 003: 316 / 3002 loss=2.76, ppl=6.77, wps=5887.5, ups=0.09, wpb=64904, bsz=128, num_updates=6281, lr=9.99577e-05, gnorm=2.111, loss_scale=16, train_wall=11, gb_free=2.8, wall=72352
2021-06-19 14:44:49 | INFO | train_inner | epoch 003: 317 / 3002 loss=2.751, ppl=6.73, wps=5800.8, ups=0.09, wpb=64809, bsz=128, num_updates=6282, lr=9.99577e-05, gnorm=2.189, loss_scale=16, train_wall=11, gb_free=2.8, wall=72364
2021-06-19 14:45:00 | INFO | train_inner | epoch 003: 318 / 3002 loss=2.565, ppl=5.92, wps=5802.3, ups=0.09, wpb=64821, bsz=128, num_updates=6283, lr=9.99577e-05, gnorm=2.104, loss_scale=16, train_wall=11, gb_free=2.8, wall=72375
2021-06-19 14:45:11 | INFO | train_inner | epoch 003: 319 / 3002 loss=2.705, ppl=6.52, wps=5891.9, ups=0.09, wpb=64818, bsz=128, num_updates=6284, lr=9.99577e-05, gnorm=2.194, loss_scale=16, train_wall=11, gb_free=2.8, wall=72386
2021-06-19 14:45:23 | INFO | train_inner | epoch 003: 320 / 3002 loss=2.552, ppl=5.86, wps=5705.5, ups=0.09, wpb=64830, bsz=128, num_updates=6285, lr=9.99577e-05, gnorm=2.23, loss_scale=16, train_wall=11, gb_free=2.8, wall=72397
2021-06-19 14:45:34 | INFO | train_inner | epoch 003: 321 / 3002 loss=2.615, ppl=6.12, wps=5843.6, ups=0.09, wpb=64804, bsz=128, num_updates=6286, lr=9.99577e-05, gnorm=2.136, loss_scale=16, train_wall=11, gb_free=2.8, wall=72408
2021-06-19 14:45:45 | INFO | train_inner | epoch 003: 322 / 3002 loss=2.543, ppl=5.83, wps=5984, ups=0.09, wpb=64848, bsz=128, num_updates=6287, lr=9.99577e-05, gnorm=2.239, loss_scale=16, train_wall=10, gb_free=2.8, wall=72419
2021-06-19 14:45:56 | INFO | train_inner | epoch 003: 323 / 3002 loss=2.587, ppl=6.01, wps=5971.5, ups=0.09, wpb=64856, bsz=128, num_updates=6288, lr=9.99577e-05, gnorm=2.13, loss_scale=16, train_wall=10, gb_free=2.8, wall=72430
2021-06-19 14:46:07 | INFO | train_inner | epoch 003: 324 / 3002 loss=2.539, ppl=5.81, wps=5844.9, ups=0.09, wpb=64802, bsz=128, num_updates=6289, lr=9.99577e-05, gnorm=2.245, loss_scale=16, train_wall=11, gb_free=2.8, wall=72441
2021-06-19 14:46:17 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-19 14:46:28 | INFO | train_inner | epoch 003: 326 / 3002 loss=2.671, ppl=6.37, wps=2971.6, ups=0.05, wpb=64812, bsz=128, num_updates=6290, lr=9.99577e-05, gnorm=2.13, loss_scale=8, train_wall=21, gb_free=2.8, wall=72463
2021-06-19 14:46:40 | INFO | train_inner | epoch 003: 327 / 3002 loss=2.713, ppl=6.56, wps=5762, ups=0.09, wpb=64795, bsz=128, num_updates=6291, lr=9.99577e-05, gnorm=2.104, loss_scale=8, train_wall=11, gb_free=2.8, wall=72474
2021-06-19 14:46:51 | INFO | train_inner | epoch 003: 328 / 3002 loss=2.709, ppl=6.54, wps=5882.7, ups=0.09, wpb=64894, bsz=128, num_updates=6292, lr=9.99577e-05, gnorm=2.152, loss_scale=8, train_wall=11, gb_free=2.8, wall=72485
2021-06-19 14:47:02 | INFO | train_inner | epoch 003: 329 / 3002 loss=2.769, ppl=6.82, wps=5880, ups=0.09, wpb=64837, bsz=128, num_updates=6293, lr=9.99577e-05, gnorm=2.332, loss_scale=8, train_wall=11, gb_free=2.8, wall=72496
2021-06-19 14:47:13 | INFO | train_inner | epoch 003: 330 / 3002 loss=2.536, ppl=5.8, wps=5939.6, ups=0.09, wpb=64827, bsz=128, num_updates=6294, lr=9.99576e-05, gnorm=2.08, loss_scale=8, train_wall=10, gb_free=2.8, wall=72507
2021-06-19 14:47:24 | INFO | train_inner | epoch 003: 331 / 3002 loss=2.618, ppl=6.14, wps=5810.1, ups=0.09, wpb=64863, bsz=128, num_updates=6295, lr=9.99576e-05, gnorm=2.125, loss_scale=8, train_wall=11, gb_free=2.8, wall=72518
2021-06-19 14:47:35 | INFO | train_inner | epoch 003: 332 / 3002 loss=2.647, ppl=6.26, wps=5752.2, ups=0.09, wpb=64697, bsz=128, num_updates=6296, lr=9.99576e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=72529
2021-06-19 14:47:46 | INFO | train_inner | epoch 003: 333 / 3002 loss=2.777, ppl=6.86, wps=5775.1, ups=0.09, wpb=64840, bsz=128, num_updates=6297, lr=9.99576e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=72541
2021-06-19 14:47:58 | INFO | train_inner | epoch 003: 334 / 3002 loss=2.855, ppl=7.24, wps=5740.1, ups=0.09, wpb=64840, bsz=128, num_updates=6298, lr=9.99576e-05, gnorm=2.3, loss_scale=8, train_wall=11, gb_free=2.8, wall=72552
2021-06-19 14:48:09 | INFO | train_inner | epoch 003: 335 / 3002 loss=2.665, ppl=6.34, wps=5764, ups=0.09, wpb=64750, bsz=128, num_updates=6299, lr=9.99576e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=72563
2021-06-19 14:48:20 | INFO | train_inner | epoch 003: 336 / 3002 loss=2.56, ppl=5.9, wps=5742.2, ups=0.09, wpb=64809, bsz=128, num_updates=6300, lr=9.99576e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=72574
2021-06-19 14:48:31 | INFO | train_inner | epoch 003: 337 / 3002 loss=2.726, ppl=6.62, wps=5812.6, ups=0.09, wpb=64836, bsz=128, num_updates=6301, lr=9.99576e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=72586
2021-06-19 14:48:42 | INFO | train_inner | epoch 003: 338 / 3002 loss=2.566, ppl=5.92, wps=5804.9, ups=0.09, wpb=64849, bsz=128, num_updates=6302, lr=9.99576e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=72597
2021-06-19 14:48:54 | INFO | train_inner | epoch 003: 339 / 3002 loss=2.678, ppl=6.4, wps=5703.8, ups=0.09, wpb=64842, bsz=128, num_updates=6303, lr=9.99576e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=72608
2021-06-19 14:49:05 | INFO | train_inner | epoch 003: 340 / 3002 loss=2.564, ppl=5.91, wps=5896.6, ups=0.09, wpb=64914, bsz=128, num_updates=6304, lr=9.99576e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=72619
2021-06-19 14:49:16 | INFO | train_inner | epoch 003: 341 / 3002 loss=2.683, ppl=6.42, wps=5703.3, ups=0.09, wpb=64861, bsz=128, num_updates=6305, lr=9.99576e-05, gnorm=2.135, loss_scale=8, train_wall=11, gb_free=2.8, wall=72631
2021-06-19 14:49:27 | INFO | train_inner | epoch 003: 342 / 3002 loss=2.719, ppl=6.58, wps=5822.2, ups=0.09, wpb=64847, bsz=128, num_updates=6306, lr=9.99575e-05, gnorm=2.282, loss_scale=8, train_wall=11, gb_free=2.8, wall=72642
2021-06-19 14:49:39 | INFO | train_inner | epoch 003: 343 / 3002 loss=2.632, ppl=6.2, wps=5790.6, ups=0.09, wpb=64877, bsz=128, num_updates=6307, lr=9.99575e-05, gnorm=2.293, loss_scale=8, train_wall=11, gb_free=2.8, wall=72653
2021-06-19 14:49:49 | INFO | train_inner | epoch 003: 344 / 3002 loss=2.678, ppl=6.4, wps=5924.2, ups=0.09, wpb=64839, bsz=128, num_updates=6308, lr=9.99575e-05, gnorm=2.286, loss_scale=8, train_wall=11, gb_free=2.8, wall=72664
2021-06-19 14:50:00 | INFO | train_inner | epoch 003: 345 / 3002 loss=2.643, ppl=6.25, wps=5899.2, ups=0.09, wpb=64890, bsz=128, num_updates=6309, lr=9.99575e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=72675
2021-06-19 14:50:12 | INFO | train_inner | epoch 003: 346 / 3002 loss=2.569, ppl=5.93, wps=5816.9, ups=0.09, wpb=64938, bsz=128, num_updates=6310, lr=9.99575e-05, gnorm=2.277, loss_scale=8, train_wall=11, gb_free=2.8, wall=72686
2021-06-19 14:50:23 | INFO | train_inner | epoch 003: 347 / 3002 loss=2.689, ppl=6.45, wps=5772.9, ups=0.09, wpb=64853, bsz=128, num_updates=6311, lr=9.99575e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=72697
2021-06-19 14:50:34 | INFO | train_inner | epoch 003: 348 / 3002 loss=2.57, ppl=5.94, wps=5744.2, ups=0.09, wpb=64836, bsz=128, num_updates=6312, lr=9.99575e-05, gnorm=2.152, loss_scale=8, train_wall=11, gb_free=2.8, wall=72709
2021-06-19 14:50:45 | INFO | train_inner | epoch 003: 349 / 3002 loss=2.544, ppl=5.83, wps=5831.3, ups=0.09, wpb=64846, bsz=128, num_updates=6313, lr=9.99575e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=72720
2021-06-19 14:50:57 | INFO | train_inner | epoch 003: 350 / 3002 loss=2.481, ppl=5.58, wps=5754.4, ups=0.09, wpb=64796, bsz=128, num_updates=6314, lr=9.99575e-05, gnorm=2.137, loss_scale=8, train_wall=11, gb_free=2.8, wall=72731
2021-06-19 14:51:08 | INFO | train_inner | epoch 003: 351 / 3002 loss=2.635, ppl=6.21, wps=5875.7, ups=0.09, wpb=64736, bsz=128, num_updates=6315, lr=9.99575e-05, gnorm=2.233, loss_scale=8, train_wall=11, gb_free=2.8, wall=72742
2021-06-19 14:51:19 | INFO | train_inner | epoch 003: 352 / 3002 loss=2.806, ppl=6.99, wps=5749, ups=0.09, wpb=64766, bsz=128, num_updates=6316, lr=9.99575e-05, gnorm=2.257, loss_scale=8, train_wall=11, gb_free=2.8, wall=72753
2021-06-19 14:51:30 | INFO | train_inner | epoch 003: 353 / 3002 loss=2.506, ppl=5.68, wps=6013, ups=0.09, wpb=64891, bsz=128, num_updates=6317, lr=9.99575e-05, gnorm=2.238, loss_scale=8, train_wall=10, gb_free=2.8, wall=72764
2021-06-19 14:51:41 | INFO | train_inner | epoch 003: 354 / 3002 loss=2.673, ppl=6.38, wps=5769, ups=0.09, wpb=64862, bsz=128, num_updates=6318, lr=9.99575e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=72775
2021-06-19 14:51:52 | INFO | train_inner | epoch 003: 355 / 3002 loss=2.59, ppl=6.02, wps=5876.9, ups=0.09, wpb=64752, bsz=128, num_updates=6319, lr=9.99574e-05, gnorm=2.397, loss_scale=8, train_wall=11, gb_free=2.8, wall=72786
2021-06-19 14:52:03 | INFO | train_inner | epoch 003: 356 / 3002 loss=2.716, ppl=6.57, wps=5832.8, ups=0.09, wpb=64821, bsz=128, num_updates=6320, lr=9.99574e-05, gnorm=2.216, loss_scale=8, train_wall=11, gb_free=2.8, wall=72797
2021-06-19 14:52:14 | INFO | train_inner | epoch 003: 357 / 3002 loss=2.644, ppl=6.25, wps=5890, ups=0.09, wpb=64789, bsz=128, num_updates=6321, lr=9.99574e-05, gnorm=2.308, loss_scale=8, train_wall=11, gb_free=2.8, wall=72808
2021-06-19 14:52:25 | INFO | train_inner | epoch 003: 358 / 3002 loss=2.714, ppl=6.56, wps=5905.5, ups=0.09, wpb=64822, bsz=128, num_updates=6322, lr=9.99574e-05, gnorm=2.227, loss_scale=8, train_wall=11, gb_free=2.8, wall=72819
2021-06-19 14:52:36 | INFO | train_inner | epoch 003: 359 / 3002 loss=2.554, ppl=5.87, wps=5736, ups=0.09, wpb=64849, bsz=128, num_updates=6323, lr=9.99574e-05, gnorm=2.311, loss_scale=8, train_wall=11, gb_free=2.8, wall=72831
2021-06-19 14:52:47 | INFO | train_inner | epoch 003: 360 / 3002 loss=2.701, ppl=6.5, wps=5858.2, ups=0.09, wpb=64820, bsz=128, num_updates=6324, lr=9.99574e-05, gnorm=3.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=72842
2021-06-19 14:52:58 | INFO | train_inner | epoch 003: 361 / 3002 loss=2.678, ppl=6.4, wps=5817.4, ups=0.09, wpb=64837, bsz=128, num_updates=6325, lr=9.99574e-05, gnorm=2.151, loss_scale=8, train_wall=11, gb_free=2.8, wall=72853
2021-06-19 14:53:10 | INFO | train_inner | epoch 003: 362 / 3002 loss=2.465, ppl=5.52, wps=5808.4, ups=0.09, wpb=64860, bsz=128, num_updates=6326, lr=9.99574e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=72864
2021-06-19 14:53:21 | INFO | train_inner | epoch 003: 363 / 3002 loss=2.71, ppl=6.54, wps=5852, ups=0.09, wpb=64811, bsz=128, num_updates=6327, lr=9.99574e-05, gnorm=2.224, loss_scale=8, train_wall=11, gb_free=2.8, wall=72875
2021-06-19 14:53:32 | INFO | train_inner | epoch 003: 364 / 3002 loss=2.564, ppl=5.91, wps=5778.5, ups=0.09, wpb=64871, bsz=128, num_updates=6328, lr=9.99574e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=72886
2021-06-19 14:53:43 | INFO | train_inner | epoch 003: 365 / 3002 loss=2.655, ppl=6.3, wps=5925.3, ups=0.09, wpb=64875, bsz=128, num_updates=6329, lr=9.99574e-05, gnorm=2.214, loss_scale=8, train_wall=10, gb_free=2.8, wall=72897
2021-06-19 14:53:54 | INFO | train_inner | epoch 003: 366 / 3002 loss=2.621, ppl=6.15, wps=5847, ups=0.09, wpb=64852, bsz=128, num_updates=6330, lr=9.99574e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=72908
2021-06-19 14:54:05 | INFO | train_inner | epoch 003: 367 / 3002 loss=2.61, ppl=6.11, wps=5897.7, ups=0.09, wpb=64814, bsz=128, num_updates=6331, lr=9.99573e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=72919
2021-06-19 14:54:16 | INFO | train_inner | epoch 003: 368 / 3002 loss=2.729, ppl=6.63, wps=5803.2, ups=0.09, wpb=64861, bsz=128, num_updates=6332, lr=9.99573e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=72931
2021-06-19 14:54:27 | INFO | train_inner | epoch 003: 369 / 3002 loss=2.484, ppl=5.59, wps=5880.9, ups=0.09, wpb=64826, bsz=128, num_updates=6333, lr=9.99573e-05, gnorm=2.095, loss_scale=8, train_wall=11, gb_free=2.8, wall=72942
2021-06-19 14:54:38 | INFO | train_inner | epoch 003: 370 / 3002 loss=2.68, ppl=6.41, wps=5885.8, ups=0.09, wpb=64869, bsz=128, num_updates=6334, lr=9.99573e-05, gnorm=3.481, loss_scale=8, train_wall=11, gb_free=2.8, wall=72953
2021-06-19 14:54:49 | INFO | train_inner | epoch 003: 371 / 3002 loss=2.574, ppl=5.96, wps=5867.4, ups=0.09, wpb=64845, bsz=128, num_updates=6335, lr=9.99573e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=72964
2021-06-19 14:55:00 | INFO | train_inner | epoch 003: 372 / 3002 loss=2.645, ppl=6.25, wps=5951.2, ups=0.09, wpb=64788, bsz=128, num_updates=6336, lr=9.99573e-05, gnorm=2.235, loss_scale=8, train_wall=10, gb_free=2.8, wall=72974
2021-06-19 14:55:11 | INFO | train_inner | epoch 003: 373 / 3002 loss=2.648, ppl=6.27, wps=5891.4, ups=0.09, wpb=64822, bsz=128, num_updates=6337, lr=9.99573e-05, gnorm=2.178, loss_scale=8, train_wall=11, gb_free=2.8, wall=72985
2021-06-19 14:55:22 | INFO | train_inner | epoch 003: 374 / 3002 loss=2.517, ppl=5.72, wps=5872.1, ups=0.09, wpb=64806, bsz=128, num_updates=6338, lr=9.99573e-05, gnorm=2.275, loss_scale=8, train_wall=11, gb_free=2.8, wall=72997
2021-06-19 14:55:33 | INFO | train_inner | epoch 003: 375 / 3002 loss=2.716, ppl=6.57, wps=5848.1, ups=0.09, wpb=64795, bsz=128, num_updates=6339, lr=9.99573e-05, gnorm=2.229, loss_scale=8, train_wall=11, gb_free=2.8, wall=73008
2021-06-19 14:55:44 | INFO | train_inner | epoch 003: 376 / 3002 loss=2.715, ppl=6.57, wps=5882.3, ups=0.09, wpb=64789, bsz=128, num_updates=6340, lr=9.99573e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=73019
2021-06-19 14:55:55 | INFO | train_inner | epoch 003: 377 / 3002 loss=2.68, ppl=6.41, wps=5879.4, ups=0.09, wpb=64856, bsz=128, num_updates=6341, lr=9.99573e-05, gnorm=2.24, loss_scale=8, train_wall=11, gb_free=2.8, wall=73030
2021-06-19 14:56:06 | INFO | train_inner | epoch 003: 378 / 3002 loss=2.581, ppl=5.98, wps=5851.6, ups=0.09, wpb=64757, bsz=128, num_updates=6342, lr=9.99573e-05, gnorm=2.14, loss_scale=8, train_wall=11, gb_free=2.8, wall=73041
2021-06-19 14:56:17 | INFO | train_inner | epoch 003: 379 / 3002 loss=2.763, ppl=6.79, wps=5830, ups=0.09, wpb=64733, bsz=128, num_updates=6343, lr=9.99573e-05, gnorm=2.065, loss_scale=8, train_wall=11, gb_free=2.8, wall=73052
2021-06-19 14:56:29 | INFO | train_inner | epoch 003: 380 / 3002 loss=2.667, ppl=6.35, wps=5828.5, ups=0.09, wpb=64837, bsz=128, num_updates=6344, lr=9.99572e-05, gnorm=8.297, loss_scale=8, train_wall=11, gb_free=2.8, wall=73063
2021-06-19 14:56:40 | INFO | train_inner | epoch 003: 381 / 3002 loss=2.699, ppl=6.49, wps=5813, ups=0.09, wpb=64815, bsz=128, num_updates=6345, lr=9.99572e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=73074
2021-06-19 14:56:51 | INFO | train_inner | epoch 003: 382 / 3002 loss=2.53, ppl=5.78, wps=5823.5, ups=0.09, wpb=64861, bsz=128, num_updates=6346, lr=9.99572e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=73085
2021-06-19 14:57:02 | INFO | train_inner | epoch 003: 383 / 3002 loss=2.692, ppl=6.46, wps=5714.4, ups=0.09, wpb=64831, bsz=128, num_updates=6347, lr=9.99572e-05, gnorm=8.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=73097
2021-06-19 14:57:13 | INFO | train_inner | epoch 003: 384 / 3002 loss=2.697, ppl=6.48, wps=5853.6, ups=0.09, wpb=64836, bsz=128, num_updates=6348, lr=9.99572e-05, gnorm=2.134, loss_scale=8, train_wall=11, gb_free=2.8, wall=73108
2021-06-19 14:57:24 | INFO | train_inner | epoch 003: 385 / 3002 loss=2.819, ppl=7.06, wps=5862.7, ups=0.09, wpb=64844, bsz=128, num_updates=6349, lr=9.99572e-05, gnorm=2.801, loss_scale=8, train_wall=11, gb_free=2.8, wall=73119
2021-06-19 14:57:35 | INFO | train_inner | epoch 003: 386 / 3002 loss=2.697, ppl=6.48, wps=5863, ups=0.09, wpb=64762, bsz=128, num_updates=6350, lr=9.99572e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=73130
2021-06-19 14:57:46 | INFO | train_inner | epoch 003: 387 / 3002 loss=2.764, ppl=6.79, wps=5888.3, ups=0.09, wpb=64823, bsz=128, num_updates=6351, lr=9.99572e-05, gnorm=2.332, loss_scale=8, train_wall=11, gb_free=2.8, wall=73141
2021-06-19 14:57:58 | INFO | train_inner | epoch 003: 388 / 3002 loss=2.712, ppl=6.55, wps=5839.6, ups=0.09, wpb=64880, bsz=128, num_updates=6352, lr=9.99572e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=73152
2021-06-19 14:58:09 | INFO | train_inner | epoch 003: 389 / 3002 loss=2.853, ppl=7.22, wps=5827.7, ups=0.09, wpb=64757, bsz=128, num_updates=6353, lr=9.99572e-05, gnorm=2.372, loss_scale=8, train_wall=11, gb_free=2.8, wall=73163
2021-06-19 14:58:20 | INFO | train_inner | epoch 003: 390 / 3002 loss=2.635, ppl=6.21, wps=5783.6, ups=0.09, wpb=64863, bsz=128, num_updates=6354, lr=9.99572e-05, gnorm=2.19, loss_scale=8, train_wall=11, gb_free=2.8, wall=73174
2021-06-19 14:58:31 | INFO | train_inner | epoch 003: 391 / 3002 loss=2.637, ppl=6.22, wps=5787.3, ups=0.09, wpb=64792, bsz=128, num_updates=6355, lr=9.99572e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=73185
2021-06-19 14:58:43 | INFO | train_inner | epoch 003: 392 / 3002 loss=2.601, ppl=6.07, wps=5668.8, ups=0.09, wpb=64865, bsz=128, num_updates=6356, lr=9.99571e-05, gnorm=2.207, loss_scale=8, train_wall=11, gb_free=2.8, wall=73197
2021-06-19 14:58:54 | INFO | train_inner | epoch 003: 393 / 3002 loss=2.702, ppl=6.51, wps=5720.5, ups=0.09, wpb=64777, bsz=128, num_updates=6357, lr=9.99571e-05, gnorm=2.214, loss_scale=8, train_wall=11, gb_free=2.8, wall=73208
2021-06-19 14:59:05 | INFO | train_inner | epoch 003: 394 / 3002 loss=2.69, ppl=6.45, wps=5768.8, ups=0.09, wpb=64827, bsz=128, num_updates=6358, lr=9.99571e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=73219
2021-06-19 14:59:16 | INFO | train_inner | epoch 003: 395 / 3002 loss=2.553, ppl=5.87, wps=5854.9, ups=0.09, wpb=64816, bsz=128, num_updates=6359, lr=9.99571e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=73230
2021-06-19 14:59:27 | INFO | train_inner | epoch 003: 396 / 3002 loss=2.616, ppl=6.13, wps=5879, ups=0.09, wpb=64864, bsz=128, num_updates=6360, lr=9.99571e-05, gnorm=2.22, loss_scale=8, train_wall=11, gb_free=2.8, wall=73242
2021-06-19 14:59:38 | INFO | train_inner | epoch 003: 397 / 3002 loss=2.668, ppl=6.36, wps=5932.5, ups=0.09, wpb=64927, bsz=128, num_updates=6361, lr=9.99571e-05, gnorm=2.324, loss_scale=8, train_wall=10, gb_free=2.8, wall=73252
2021-06-19 14:59:49 | INFO | train_inner | epoch 003: 398 / 3002 loss=2.652, ppl=6.28, wps=5767.3, ups=0.09, wpb=64725, bsz=128, num_updates=6362, lr=9.99571e-05, gnorm=2.817, loss_scale=8, train_wall=11, gb_free=2.8, wall=73264
2021-06-19 15:00:01 | INFO | train_inner | epoch 003: 399 / 3002 loss=2.74, ppl=6.68, wps=5769.7, ups=0.09, wpb=64839, bsz=128, num_updates=6363, lr=9.99571e-05, gnorm=2.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=73275
2021-06-19 15:00:12 | INFO | train_inner | epoch 003: 400 / 3002 loss=2.566, ppl=5.92, wps=5831.7, ups=0.09, wpb=64830, bsz=128, num_updates=6364, lr=9.99571e-05, gnorm=2.156, loss_scale=8, train_wall=11, gb_free=2.8, wall=73286
2021-06-19 15:00:23 | INFO | train_inner | epoch 003: 401 / 3002 loss=2.568, ppl=5.93, wps=5880.8, ups=0.09, wpb=64803, bsz=128, num_updates=6365, lr=9.99571e-05, gnorm=2.271, loss_scale=8, train_wall=11, gb_free=2.8, wall=73297
2021-06-19 15:00:34 | INFO | train_inner | epoch 003: 402 / 3002 loss=2.729, ppl=6.63, wps=5819.4, ups=0.09, wpb=64743, bsz=128, num_updates=6366, lr=9.99571e-05, gnorm=2.146, loss_scale=8, train_wall=11, gb_free=2.8, wall=73308
2021-06-19 15:00:45 | INFO | train_inner | epoch 003: 403 / 3002 loss=2.63, ppl=6.19, wps=5883.6, ups=0.09, wpb=64872, bsz=128, num_updates=6367, lr=9.99571e-05, gnorm=2.284, loss_scale=8, train_wall=11, gb_free=2.8, wall=73319
2021-06-19 15:00:56 | INFO | train_inner | epoch 003: 404 / 3002 loss=2.603, ppl=6.07, wps=5824.1, ups=0.09, wpb=64794, bsz=128, num_updates=6368, lr=9.99571e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=73330
2021-06-19 15:01:07 | INFO | train_inner | epoch 003: 405 / 3002 loss=2.583, ppl=5.99, wps=5874.5, ups=0.09, wpb=64786, bsz=128, num_updates=6369, lr=9.9957e-05, gnorm=2.279, loss_scale=8, train_wall=11, gb_free=2.8, wall=73341
2021-06-19 15:01:18 | INFO | train_inner | epoch 003: 406 / 3002 loss=2.669, ppl=6.36, wps=5738.5, ups=0.09, wpb=64882, bsz=128, num_updates=6370, lr=9.9957e-05, gnorm=2.27, loss_scale=8, train_wall=11, gb_free=2.8, wall=73353
2021-06-19 15:01:29 | INFO | train_inner | epoch 003: 407 / 3002 loss=2.792, ppl=6.92, wps=5851.6, ups=0.09, wpb=64829, bsz=128, num_updates=6371, lr=9.9957e-05, gnorm=2.377, loss_scale=8, train_wall=11, gb_free=2.8, wall=73364
2021-06-19 15:01:41 | INFO | train_inner | epoch 003: 408 / 3002 loss=2.632, ppl=6.2, wps=5825.5, ups=0.09, wpb=64809, bsz=128, num_updates=6372, lr=9.9957e-05, gnorm=2.154, loss_scale=8, train_wall=11, gb_free=2.8, wall=73375
2021-06-19 15:01:52 | INFO | train_inner | epoch 003: 409 / 3002 loss=2.765, ppl=6.8, wps=5852.3, ups=0.09, wpb=64922, bsz=128, num_updates=6373, lr=9.9957e-05, gnorm=2.36, loss_scale=8, train_wall=11, gb_free=2.8, wall=73386
2021-06-19 15:02:03 | INFO | train_inner | epoch 003: 410 / 3002 loss=2.723, ppl=6.6, wps=5828.2, ups=0.09, wpb=64896, bsz=128, num_updates=6374, lr=9.9957e-05, gnorm=2.482, loss_scale=8, train_wall=11, gb_free=2.8, wall=73397
2021-06-19 15:02:14 | INFO | train_inner | epoch 003: 411 / 3002 loss=2.624, ppl=6.17, wps=5840.8, ups=0.09, wpb=64815, bsz=128, num_updates=6375, lr=9.9957e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=73408
2021-06-19 15:02:25 | INFO | train_inner | epoch 003: 412 / 3002 loss=2.685, ppl=6.43, wps=5786.8, ups=0.09, wpb=64868, bsz=128, num_updates=6376, lr=9.9957e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=73419
2021-06-19 15:02:36 | INFO | train_inner | epoch 003: 413 / 3002 loss=2.68, ppl=6.41, wps=5756.5, ups=0.09, wpb=64848, bsz=128, num_updates=6377, lr=9.9957e-05, gnorm=2.16, loss_scale=8, train_wall=11, gb_free=2.8, wall=73431
2021-06-19 15:02:47 | INFO | train_inner | epoch 003: 414 / 3002 loss=2.735, ppl=6.66, wps=5816.3, ups=0.09, wpb=64782, bsz=128, num_updates=6378, lr=9.9957e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=73442
2021-06-19 15:02:59 | INFO | train_inner | epoch 003: 415 / 3002 loss=2.825, ppl=7.08, wps=5836.8, ups=0.09, wpb=64901, bsz=128, num_updates=6379, lr=9.9957e-05, gnorm=2.327, loss_scale=8, train_wall=11, gb_free=2.8, wall=73453
2021-06-19 15:03:10 | INFO | train_inner | epoch 003: 416 / 3002 loss=2.674, ppl=6.38, wps=5904.5, ups=0.09, wpb=64806, bsz=128, num_updates=6380, lr=9.9957e-05, gnorm=3.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=73464
2021-06-19 15:03:21 | INFO | train_inner | epoch 003: 417 / 3002 loss=2.626, ppl=6.17, wps=5862.3, ups=0.09, wpb=64806, bsz=128, num_updates=6381, lr=9.99569e-05, gnorm=2.736, loss_scale=8, train_wall=11, gb_free=2.8, wall=73475
2021-06-19 15:03:32 | INFO | train_inner | epoch 003: 418 / 3002 loss=2.636, ppl=6.21, wps=5815.2, ups=0.09, wpb=64836, bsz=128, num_updates=6382, lr=9.99569e-05, gnorm=2.143, loss_scale=8, train_wall=11, gb_free=2.8, wall=73486
2021-06-19 15:03:43 | INFO | train_inner | epoch 003: 419 / 3002 loss=2.535, ppl=5.8, wps=5794.8, ups=0.09, wpb=64880, bsz=128, num_updates=6383, lr=9.99569e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=73497
2021-06-19 15:03:54 | INFO | train_inner | epoch 003: 420 / 3002 loss=2.798, ppl=6.95, wps=5928.1, ups=0.09, wpb=64847, bsz=128, num_updates=6384, lr=9.99569e-05, gnorm=2.793, loss_scale=8, train_wall=10, gb_free=2.8, wall=73508
2021-06-19 15:04:05 | INFO | train_inner | epoch 003: 421 / 3002 loss=2.596, ppl=6.04, wps=5936.5, ups=0.09, wpb=64845, bsz=128, num_updates=6385, lr=9.99569e-05, gnorm=2.072, loss_scale=8, train_wall=10, gb_free=2.8, wall=73519
2021-06-19 15:04:16 | INFO | train_inner | epoch 003: 422 / 3002 loss=2.621, ppl=6.15, wps=5838.1, ups=0.09, wpb=64801, bsz=128, num_updates=6386, lr=9.99569e-05, gnorm=2.164, loss_scale=8, train_wall=11, gb_free=2.8, wall=73530
2021-06-19 15:04:27 | INFO | train_inner | epoch 003: 423 / 3002 loss=2.754, ppl=6.75, wps=5984.5, ups=0.09, wpb=64893, bsz=128, num_updates=6387, lr=9.99569e-05, gnorm=2.287, loss_scale=8, train_wall=10, gb_free=2.8, wall=73541
2021-06-19 15:04:38 | INFO | train_inner | epoch 003: 424 / 3002 loss=2.778, ppl=6.86, wps=5916, ups=0.09, wpb=64845, bsz=128, num_updates=6388, lr=9.99569e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=73552
2021-06-19 15:04:49 | INFO | train_inner | epoch 003: 425 / 3002 loss=2.757, ppl=6.76, wps=5777.9, ups=0.09, wpb=64756, bsz=128, num_updates=6389, lr=9.99569e-05, gnorm=2.235, loss_scale=8, train_wall=11, gb_free=2.8, wall=73563
2021-06-19 15:05:00 | INFO | train_inner | epoch 003: 426 / 3002 loss=2.576, ppl=5.96, wps=5892.3, ups=0.09, wpb=64816, bsz=128, num_updates=6390, lr=9.99569e-05, gnorm=2.115, loss_scale=8, train_wall=11, gb_free=2.8, wall=73574
2021-06-19 15:05:11 | INFO | train_inner | epoch 003: 427 / 3002 loss=2.645, ppl=6.25, wps=5899.1, ups=0.09, wpb=64879, bsz=128, num_updates=6391, lr=9.99569e-05, gnorm=2.128, loss_scale=8, train_wall=11, gb_free=2.8, wall=73585
2021-06-19 15:05:22 | INFO | train_inner | epoch 003: 428 / 3002 loss=2.606, ppl=6.09, wps=5868.7, ups=0.09, wpb=64790, bsz=128, num_updates=6392, lr=9.99569e-05, gnorm=2.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=73596
2021-06-19 15:05:33 | INFO | train_inner | epoch 003: 429 / 3002 loss=2.682, ppl=6.42, wps=5847.8, ups=0.09, wpb=64892, bsz=128, num_updates=6393, lr=9.99569e-05, gnorm=4.648, loss_scale=8, train_wall=11, gb_free=2.8, wall=73607
2021-06-19 15:05:44 | INFO | train_inner | epoch 003: 430 / 3002 loss=2.649, ppl=6.27, wps=5933.5, ups=0.09, wpb=64828, bsz=128, num_updates=6394, lr=9.99568e-05, gnorm=2.144, loss_scale=8, train_wall=10, gb_free=2.8, wall=73618
2021-06-19 15:05:55 | INFO | train_inner | epoch 003: 431 / 3002 loss=2.83, ppl=7.11, wps=5871.3, ups=0.09, wpb=64688, bsz=128, num_updates=6395, lr=9.99568e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=73629
2021-06-19 15:06:06 | INFO | train_inner | epoch 003: 432 / 3002 loss=2.628, ppl=6.18, wps=5762.7, ups=0.09, wpb=64791, bsz=128, num_updates=6396, lr=9.99568e-05, gnorm=2.126, loss_scale=8, train_wall=11, gb_free=2.8, wall=73641
2021-06-19 15:06:17 | INFO | train_inner | epoch 003: 433 / 3002 loss=2.489, ppl=5.61, wps=5806.5, ups=0.09, wpb=64795, bsz=128, num_updates=6397, lr=9.99568e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=73652
2021-06-19 15:06:28 | INFO | train_inner | epoch 003: 434 / 3002 loss=2.66, ppl=6.32, wps=5955.4, ups=0.09, wpb=64903, bsz=128, num_updates=6398, lr=9.99568e-05, gnorm=2.22, loss_scale=8, train_wall=10, gb_free=2.8, wall=73663
2021-06-19 15:06:39 | INFO | train_inner | epoch 003: 435 / 3002 loss=2.608, ppl=6.1, wps=5856.4, ups=0.09, wpb=64889, bsz=128, num_updates=6399, lr=9.99568e-05, gnorm=2.507, loss_scale=8, train_wall=11, gb_free=2.8, wall=73674
2021-06-19 15:06:50 | INFO | train_inner | epoch 003: 436 / 3002 loss=2.683, ppl=6.42, wps=5886.8, ups=0.09, wpb=64768, bsz=128, num_updates=6400, lr=9.99568e-05, gnorm=2.588, loss_scale=8, train_wall=11, gb_free=2.8, wall=73685
2021-06-19 15:07:02 | INFO | train_inner | epoch 003: 437 / 3002 loss=2.78, ppl=6.87, wps=5820.9, ups=0.09, wpb=64795, bsz=128, num_updates=6401, lr=9.99568e-05, gnorm=2.162, loss_scale=8, train_wall=11, gb_free=2.8, wall=73696
2021-06-19 15:07:13 | INFO | train_inner | epoch 003: 438 / 3002 loss=2.513, ppl=5.71, wps=5828.3, ups=0.09, wpb=64844, bsz=128, num_updates=6402, lr=9.99568e-05, gnorm=2.142, loss_scale=8, train_wall=11, gb_free=2.8, wall=73707
2021-06-19 15:07:24 | INFO | train_inner | epoch 003: 439 / 3002 loss=2.775, ppl=6.85, wps=5899.5, ups=0.09, wpb=64827, bsz=128, num_updates=6403, lr=9.99568e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=73718
2021-06-19 15:07:35 | INFO | train_inner | epoch 003: 440 / 3002 loss=2.533, ppl=5.79, wps=5800.9, ups=0.09, wpb=64847, bsz=128, num_updates=6404, lr=9.99568e-05, gnorm=2.171, loss_scale=8, train_wall=11, gb_free=2.8, wall=73729
2021-06-19 15:07:46 | INFO | train_inner | epoch 003: 441 / 3002 loss=2.642, ppl=6.24, wps=5852.9, ups=0.09, wpb=64768, bsz=128, num_updates=6405, lr=9.99568e-05, gnorm=2.104, loss_scale=8, train_wall=11, gb_free=2.8, wall=73740
2021-06-19 15:07:57 | INFO | train_inner | epoch 003: 442 / 3002 loss=2.851, ppl=7.22, wps=5958.1, ups=0.09, wpb=64822, bsz=128, num_updates=6406, lr=9.99567e-05, gnorm=2.136, loss_scale=8, train_wall=10, gb_free=2.8, wall=73751
2021-06-19 15:08:08 | INFO | train_inner | epoch 003: 443 / 3002 loss=2.635, ppl=6.21, wps=5898.3, ups=0.09, wpb=64857, bsz=128, num_updates=6407, lr=9.99567e-05, gnorm=2.202, loss_scale=8, train_wall=11, gb_free=2.8, wall=73762
2021-06-19 15:08:19 | INFO | train_inner | epoch 003: 444 / 3002 loss=2.69, ppl=6.45, wps=5895.8, ups=0.09, wpb=64851, bsz=128, num_updates=6408, lr=9.99567e-05, gnorm=2.128, loss_scale=8, train_wall=11, gb_free=2.8, wall=73773
2021-06-19 15:08:30 | INFO | train_inner | epoch 003: 445 / 3002 loss=2.583, ppl=5.99, wps=5726.2, ups=0.09, wpb=64790, bsz=128, num_updates=6409, lr=9.99567e-05, gnorm=2.085, loss_scale=8, train_wall=11, gb_free=2.8, wall=73784
2021-06-19 15:08:41 | INFO | train_inner | epoch 003: 446 / 3002 loss=2.686, ppl=6.44, wps=5806.3, ups=0.09, wpb=64791, bsz=128, num_updates=6410, lr=9.99567e-05, gnorm=2.306, loss_scale=8, train_wall=11, gb_free=2.8, wall=73796
2021-06-19 15:08:52 | INFO | train_inner | epoch 003: 447 / 3002 loss=2.722, ppl=6.6, wps=5846.8, ups=0.09, wpb=64833, bsz=128, num_updates=6411, lr=9.99567e-05, gnorm=2.219, loss_scale=8, train_wall=11, gb_free=2.8, wall=73807
2021-06-19 15:09:04 | INFO | train_inner | epoch 003: 448 / 3002 loss=2.672, ppl=6.37, wps=5796, ups=0.09, wpb=64772, bsz=128, num_updates=6412, lr=9.99567e-05, gnorm=2.232, loss_scale=8, train_wall=11, gb_free=2.8, wall=73818
2021-06-19 15:09:15 | INFO | train_inner | epoch 003: 449 / 3002 loss=2.548, ppl=5.85, wps=5745.5, ups=0.09, wpb=64890, bsz=128, num_updates=6413, lr=9.99567e-05, gnorm=2.128, loss_scale=8, train_wall=11, gb_free=2.8, wall=73829
2021-06-19 15:09:26 | INFO | train_inner | epoch 003: 450 / 3002 loss=2.588, ppl=6.01, wps=5802.6, ups=0.09, wpb=64871, bsz=128, num_updates=6414, lr=9.99567e-05, gnorm=2.13, loss_scale=8, train_wall=11, gb_free=2.8, wall=73840
2021-06-19 15:09:37 | INFO | train_inner | epoch 003: 451 / 3002 loss=2.8, ppl=6.96, wps=5844.1, ups=0.09, wpb=64931, bsz=128, num_updates=6415, lr=9.99567e-05, gnorm=2.214, loss_scale=8, train_wall=11, gb_free=2.8, wall=73851
2021-06-19 15:09:48 | INFO | train_inner | epoch 003: 452 / 3002 loss=2.678, ppl=6.4, wps=5800.3, ups=0.09, wpb=64809, bsz=128, num_updates=6416, lr=9.99567e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=73863
2021-06-19 15:10:00 | INFO | train_inner | epoch 003: 453 / 3002 loss=2.697, ppl=6.48, wps=5743.7, ups=0.09, wpb=64854, bsz=128, num_updates=6417, lr=9.99567e-05, gnorm=7.469, loss_scale=16, train_wall=11, gb_free=2.8, wall=73874
2021-06-19 15:10:11 | INFO | train_inner | epoch 003: 454 / 3002 loss=2.768, ppl=6.81, wps=5835.7, ups=0.09, wpb=64888, bsz=128, num_updates=6418, lr=9.99567e-05, gnorm=2.215, loss_scale=16, train_wall=11, gb_free=2.8, wall=73885
2021-06-19 15:10:22 | INFO | train_inner | epoch 003: 455 / 3002 loss=2.729, ppl=6.63, wps=5959.1, ups=0.09, wpb=64864, bsz=128, num_updates=6419, lr=9.99566e-05, gnorm=2.184, loss_scale=16, train_wall=10, gb_free=2.8, wall=73896
2021-06-19 15:10:33 | INFO | train_inner | epoch 003: 456 / 3002 loss=2.636, ppl=6.21, wps=5836.3, ups=0.09, wpb=64901, bsz=128, num_updates=6420, lr=9.99566e-05, gnorm=2.188, loss_scale=16, train_wall=11, gb_free=2.8, wall=73907
2021-06-19 15:10:44 | INFO | train_inner | epoch 003: 457 / 3002 loss=2.605, ppl=6.08, wps=5842.7, ups=0.09, wpb=64839, bsz=128, num_updates=6421, lr=9.99566e-05, gnorm=2.179, loss_scale=16, train_wall=11, gb_free=2.8, wall=73918
2021-06-19 15:10:55 | INFO | train_inner | epoch 003: 458 / 3002 loss=2.618, ppl=6.14, wps=5767.3, ups=0.09, wpb=64885, bsz=128, num_updates=6422, lr=9.99566e-05, gnorm=2.259, loss_scale=16, train_wall=11, gb_free=2.8, wall=73929
2021-06-19 15:11:06 | INFO | train_inner | epoch 003: 459 / 3002 loss=2.728, ppl=6.62, wps=5825.9, ups=0.09, wpb=64829, bsz=128, num_updates=6423, lr=9.99566e-05, gnorm=2.182, loss_scale=16, train_wall=11, gb_free=2.8, wall=73941
2021-06-19 15:11:17 | INFO | train_inner | epoch 003: 460 / 3002 loss=2.731, ppl=6.64, wps=5736.5, ups=0.09, wpb=64826, bsz=128, num_updates=6424, lr=9.99566e-05, gnorm=2.129, loss_scale=16, train_wall=11, gb_free=2.8, wall=73952
2021-06-19 15:11:29 | INFO | train_inner | epoch 003: 461 / 3002 loss=2.589, ppl=6.02, wps=5771.6, ups=0.09, wpb=64849, bsz=128, num_updates=6425, lr=9.99566e-05, gnorm=2.114, loss_scale=16, train_wall=11, gb_free=2.8, wall=73963
2021-06-19 15:11:40 | INFO | train_inner | epoch 003: 462 / 3002 loss=2.597, ppl=6.05, wps=5745.2, ups=0.09, wpb=64691, bsz=128, num_updates=6426, lr=9.99566e-05, gnorm=2.197, loss_scale=16, train_wall=11, gb_free=2.8, wall=73974
2021-06-19 15:11:51 | INFO | train_inner | epoch 003: 463 / 3002 loss=2.749, ppl=6.72, wps=5832.6, ups=0.09, wpb=64857, bsz=128, num_updates=6427, lr=9.99566e-05, gnorm=2.253, loss_scale=16, train_wall=11, gb_free=2.8, wall=73985
2021-06-19 15:12:02 | INFO | train_inner | epoch 003: 464 / 3002 loss=2.642, ppl=6.24, wps=5763.8, ups=0.09, wpb=64877, bsz=128, num_updates=6428, lr=9.99566e-05, gnorm=2.186, loss_scale=16, train_wall=11, gb_free=2.8, wall=73997
2021-06-19 15:12:14 | INFO | train_inner | epoch 003: 465 / 3002 loss=2.539, ppl=5.81, wps=5772.7, ups=0.09, wpb=64836, bsz=128, num_updates=6429, lr=9.99566e-05, gnorm=2.24, loss_scale=16, train_wall=11, gb_free=2.8, wall=74008
2021-06-19 15:12:25 | INFO | train_inner | epoch 003: 466 / 3002 loss=2.709, ppl=6.54, wps=5822.4, ups=0.09, wpb=64751, bsz=128, num_updates=6430, lr=9.99566e-05, gnorm=2.398, loss_scale=16, train_wall=11, gb_free=2.8, wall=74019
2021-06-19 15:12:36 | INFO | train_inner | epoch 003: 467 / 3002 loss=2.586, ppl=6, wps=5901.9, ups=0.09, wpb=64868, bsz=128, num_updates=6431, lr=9.99565e-05, gnorm=2.157, loss_scale=16, train_wall=11, gb_free=2.8, wall=74030
2021-06-19 15:12:47 | INFO | train_inner | epoch 003: 468 / 3002 loss=2.683, ppl=6.42, wps=5937.9, ups=0.09, wpb=64878, bsz=128, num_updates=6432, lr=9.99565e-05, gnorm=2.158, loss_scale=16, train_wall=10, gb_free=2.8, wall=74041
2021-06-19 15:12:58 | INFO | train_inner | epoch 003: 469 / 3002 loss=2.688, ppl=6.44, wps=5891, ups=0.09, wpb=64919, bsz=128, num_updates=6433, lr=9.99565e-05, gnorm=2.155, loss_scale=16, train_wall=11, gb_free=2.8, wall=74052
2021-06-19 15:13:09 | INFO | train_inner | epoch 003: 470 / 3002 loss=2.78, ppl=6.87, wps=5895.7, ups=0.09, wpb=64826, bsz=128, num_updates=6434, lr=9.99565e-05, gnorm=2.721, loss_scale=16, train_wall=11, gb_free=2.8, wall=74063
2021-06-19 15:13:20 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-19 15:13:31 | INFO | train_inner | epoch 003: 472 / 3002 loss=2.729, ppl=6.63, wps=2931, ups=0.05, wpb=64752, bsz=128, num_updates=6435, lr=9.99565e-05, gnorm=2.204, loss_scale=8, train_wall=21, gb_free=2.8, wall=74085
2021-06-19 15:13:42 | INFO | train_inner | epoch 003: 473 / 3002 loss=2.634, ppl=6.21, wps=5696, ups=0.09, wpb=64609, bsz=128, num_updates=6436, lr=9.99565e-05, gnorm=2.115, loss_scale=8, train_wall=11, gb_free=2.8, wall=74096
2021-06-19 15:13:53 | INFO | train_inner | epoch 003: 474 / 3002 loss=2.689, ppl=6.45, wps=5777.8, ups=0.09, wpb=64816, bsz=128, num_updates=6437, lr=9.99565e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=74108
2021-06-19 15:14:05 | INFO | train_inner | epoch 003: 475 / 3002 loss=2.781, ppl=6.87, wps=5750.9, ups=0.09, wpb=64825, bsz=128, num_updates=6438, lr=9.99565e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=74119
2021-06-19 15:14:16 | INFO | train_inner | epoch 003: 476 / 3002 loss=2.634, ppl=6.21, wps=5777.4, ups=0.09, wpb=64815, bsz=128, num_updates=6439, lr=9.99565e-05, gnorm=2.115, loss_scale=8, train_wall=11, gb_free=2.8, wall=74130
2021-06-19 15:14:27 | INFO | train_inner | epoch 003: 477 / 3002 loss=2.689, ppl=6.45, wps=5782, ups=0.09, wpb=64849, bsz=128, num_updates=6440, lr=9.99565e-05, gnorm=2.198, loss_scale=8, train_wall=11, gb_free=2.8, wall=74141
2021-06-19 15:14:38 | INFO | train_inner | epoch 003: 478 / 3002 loss=2.625, ppl=6.17, wps=5927.5, ups=0.09, wpb=64900, bsz=128, num_updates=6441, lr=9.99565e-05, gnorm=2.208, loss_scale=8, train_wall=11, gb_free=2.8, wall=74152
2021-06-19 15:14:49 | INFO | train_inner | epoch 003: 479 / 3002 loss=2.554, ppl=5.87, wps=5895.4, ups=0.09, wpb=64786, bsz=128, num_updates=6442, lr=9.99565e-05, gnorm=2.208, loss_scale=8, train_wall=10, gb_free=2.8, wall=74163
2021-06-19 15:15:00 | INFO | train_inner | epoch 003: 480 / 3002 loss=2.602, ppl=6.07, wps=5890.9, ups=0.09, wpb=64853, bsz=128, num_updates=6443, lr=9.99565e-05, gnorm=5.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=74174
2021-06-19 15:15:11 | INFO | train_inner | epoch 003: 481 / 3002 loss=2.788, ppl=6.91, wps=5833.7, ups=0.09, wpb=64808, bsz=128, num_updates=6444, lr=9.99564e-05, gnorm=2.154, loss_scale=8, train_wall=11, gb_free=2.8, wall=74185
2021-06-19 15:15:22 | INFO | train_inner | epoch 003: 482 / 3002 loss=2.731, ppl=6.64, wps=5968.8, ups=0.09, wpb=64855, bsz=128, num_updates=6445, lr=9.99564e-05, gnorm=2.088, loss_scale=8, train_wall=10, gb_free=2.8, wall=74196
2021-06-19 15:15:33 | INFO | train_inner | epoch 003: 483 / 3002 loss=2.612, ppl=6.12, wps=5799.2, ups=0.09, wpb=64791, bsz=128, num_updates=6446, lr=9.99564e-05, gnorm=2.237, loss_scale=8, train_wall=11, gb_free=2.8, wall=74207
2021-06-19 15:15:44 | INFO | train_inner | epoch 003: 484 / 3002 loss=2.616, ppl=6.13, wps=5989.6, ups=0.09, wpb=64810, bsz=128, num_updates=6447, lr=9.99564e-05, gnorm=2.536, loss_scale=8, train_wall=10, gb_free=2.8, wall=74218
2021-06-19 15:15:55 | INFO | train_inner | epoch 003: 485 / 3002 loss=2.516, ppl=5.72, wps=5925, ups=0.09, wpb=64849, bsz=128, num_updates=6448, lr=9.99564e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=74229
2021-06-19 15:16:06 | INFO | train_inner | epoch 003: 486 / 3002 loss=2.583, ppl=5.99, wps=5980.9, ups=0.09, wpb=64864, bsz=128, num_updates=6449, lr=9.99564e-05, gnorm=2.248, loss_scale=8, train_wall=10, gb_free=2.8, wall=74240
2021-06-19 15:16:17 | INFO | train_inner | epoch 003: 487 / 3002 loss=2.909, ppl=7.51, wps=5879.3, ups=0.09, wpb=64781, bsz=128, num_updates=6450, lr=9.99564e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=74251
2021-06-19 15:16:28 | INFO | train_inner | epoch 003: 488 / 3002 loss=2.845, ppl=7.19, wps=5797.3, ups=0.09, wpb=64804, bsz=128, num_updates=6451, lr=9.99564e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=74262
2021-06-19 15:16:39 | INFO | train_inner | epoch 003: 489 / 3002 loss=2.666, ppl=6.35, wps=5775.7, ups=0.09, wpb=64839, bsz=128, num_updates=6452, lr=9.99564e-05, gnorm=2.165, loss_scale=8, train_wall=11, gb_free=2.8, wall=74273
2021-06-19 15:16:50 | INFO | train_inner | epoch 003: 490 / 3002 loss=2.593, ppl=6.03, wps=5908.7, ups=0.09, wpb=64790, bsz=128, num_updates=6453, lr=9.99564e-05, gnorm=2.29, loss_scale=8, train_wall=11, gb_free=2.8, wall=74284
2021-06-19 15:17:01 | INFO | train_inner | epoch 003: 491 / 3002 loss=2.774, ppl=6.84, wps=5967.1, ups=0.09, wpb=64801, bsz=128, num_updates=6454, lr=9.99564e-05, gnorm=2.211, loss_scale=8, train_wall=10, gb_free=2.8, wall=74295
2021-06-19 15:17:12 | INFO | train_inner | epoch 003: 492 / 3002 loss=2.668, ppl=6.36, wps=5889.8, ups=0.09, wpb=64738, bsz=128, num_updates=6455, lr=9.99564e-05, gnorm=2.17, loss_scale=8, train_wall=11, gb_free=2.8, wall=74306
2021-06-19 15:17:23 | INFO | train_inner | epoch 003: 493 / 3002 loss=2.576, ppl=5.96, wps=5716.3, ups=0.09, wpb=64837, bsz=128, num_updates=6456, lr=9.99563e-05, gnorm=2.183, loss_scale=8, train_wall=11, gb_free=2.8, wall=74318
2021-06-19 15:17:34 | INFO | train_inner | epoch 003: 494 / 3002 loss=2.569, ppl=5.93, wps=5834, ups=0.09, wpb=64828, bsz=128, num_updates=6457, lr=9.99563e-05, gnorm=2.151, loss_scale=8, train_wall=11, gb_free=2.8, wall=74329
2021-06-19 15:17:45 | INFO | train_inner | epoch 003: 495 / 3002 loss=2.754, ppl=6.75, wps=5861.8, ups=0.09, wpb=64712, bsz=128, num_updates=6458, lr=9.99563e-05, gnorm=2.191, loss_scale=8, train_wall=11, gb_free=2.8, wall=74340
2021-06-19 15:17:56 | INFO | train_inner | epoch 003: 496 / 3002 loss=2.665, ppl=6.34, wps=5943, ups=0.09, wpb=64841, bsz=128, num_updates=6459, lr=9.99563e-05, gnorm=2.142, loss_scale=8, train_wall=10, gb_free=2.8, wall=74351
2021-06-19 15:18:07 | INFO | train_inner | epoch 003: 497 / 3002 loss=2.666, ppl=6.35, wps=5851.4, ups=0.09, wpb=64804, bsz=128, num_updates=6460, lr=9.99563e-05, gnorm=2.394, loss_scale=8, train_wall=11, gb_free=2.8, wall=74362
2021-06-19 15:18:18 | INFO | train_inner | epoch 003: 498 / 3002 loss=2.658, ppl=6.31, wps=5863, ups=0.09, wpb=64880, bsz=128, num_updates=6461, lr=9.99563e-05, gnorm=2.506, loss_scale=8, train_wall=11, gb_free=2.8, wall=74373
2021-06-19 15:18:30 | INFO | train_inner | epoch 003: 499 / 3002 loss=2.592, ppl=6.03, wps=5772.2, ups=0.09, wpb=64754, bsz=128, num_updates=6462, lr=9.99563e-05, gnorm=2.108, loss_scale=8, train_wall=11, gb_free=2.8, wall=74384
2021-06-19 15:18:41 | INFO | train_inner | epoch 003: 500 / 3002 loss=2.624, ppl=6.16, wps=5794.2, ups=0.09, wpb=64769, bsz=128, num_updates=6463, lr=9.99563e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=74395
2021-06-19 15:18:52 | INFO | train_inner | epoch 003: 501 / 3002 loss=2.523, ppl=5.75, wps=5876.2, ups=0.09, wpb=64904, bsz=128, num_updates=6464, lr=9.99563e-05, gnorm=2.2, loss_scale=8, train_wall=11, gb_free=2.8, wall=74406
2021-06-19 15:19:03 | INFO | train_inner | epoch 003: 502 / 3002 loss=2.514, ppl=5.71, wps=5838.8, ups=0.09, wpb=64832, bsz=128, num_updates=6465, lr=9.99563e-05, gnorm=3.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=74417
2021-06-19 15:19:14 | INFO | train_inner | epoch 003: 503 / 3002 loss=2.646, ppl=6.26, wps=5812.5, ups=0.09, wpb=64892, bsz=128, num_updates=6466, lr=9.99563e-05, gnorm=2.238, loss_scale=8, train_wall=11, gb_free=2.8, wall=74429
2021-06-19 15:19:25 | INFO | train_inner | epoch 003: 504 / 3002 loss=2.813, ppl=7.03, wps=5868.6, ups=0.09, wpb=64827, bsz=128, num_updates=6467, lr=9.99563e-05, gnorm=2.107, loss_scale=8, train_wall=11, gb_free=2.8, wall=74440
2021-06-19 15:19:36 | INFO | train_inner | epoch 003: 505 / 3002 loss=2.745, ppl=6.7, wps=5848.5, ups=0.09, wpb=64780, bsz=128, num_updates=6468, lr=9.99563e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=74451
2021-06-19 15:19:47 | INFO | train_inner | epoch 003: 506 / 3002 loss=2.907, ppl=7.5, wps=5841.4, ups=0.09, wpb=64850, bsz=128, num_updates=6469, lr=9.99562e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=74462
2021-06-19 15:19:59 | INFO | train_inner | epoch 003: 507 / 3002 loss=2.785, ppl=6.89, wps=5795.3, ups=0.09, wpb=64809, bsz=128, num_updates=6470, lr=9.99562e-05, gnorm=2.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=74473
2021-06-19 15:20:10 | INFO | train_inner | epoch 003: 508 / 3002 loss=2.619, ppl=6.14, wps=5794.2, ups=0.09, wpb=64853, bsz=128, num_updates=6471, lr=9.99562e-05, gnorm=2.258, loss_scale=8, train_wall=11, gb_free=2.8, wall=74484
2021-06-19 15:20:21 | INFO | train_inner | epoch 003: 509 / 3002 loss=2.564, ppl=5.91, wps=5899.7, ups=0.09, wpb=64928, bsz=128, num_updates=6472, lr=9.99562e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=74495
2021-06-19 15:20:32 | INFO | train_inner | epoch 003: 510 / 3002 loss=2.868, ppl=7.3, wps=5843.5, ups=0.09, wpb=64798, bsz=128, num_updates=6473, lr=9.99562e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=74506
2021-06-19 15:20:43 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 15:20:54 | INFO | train_inner | epoch 003: 512 / 3002 loss=2.714, ppl=6.56, wps=2915.3, ups=0.05, wpb=64773, bsz=128, num_updates=6474, lr=9.99562e-05, gnorm=2.496, loss_scale=4, train_wall=21, gb_free=2.8, wall=74528
2021-06-19 15:21:05 | INFO | train_inner | epoch 003: 513 / 3002 loss=2.687, ppl=6.44, wps=5787.3, ups=0.09, wpb=64797, bsz=128, num_updates=6475, lr=9.99562e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=74540
2021-06-19 15:21:16 | INFO | train_inner | epoch 003: 514 / 3002 loss=2.637, ppl=6.22, wps=5884.9, ups=0.09, wpb=64741, bsz=128, num_updates=6476, lr=9.99562e-05, gnorm=15.806, loss_scale=4, train_wall=11, gb_free=2.8, wall=74551
2021-06-19 15:21:27 | INFO | train_inner | epoch 003: 515 / 3002 loss=2.807, ppl=7, wps=5914, ups=0.09, wpb=64891, bsz=128, num_updates=6477, lr=9.99562e-05, gnorm=2.142, loss_scale=4, train_wall=10, gb_free=2.8, wall=74562
2021-06-19 15:21:38 | INFO | train_inner | epoch 003: 516 / 3002 loss=2.588, ppl=6.01, wps=5879.9, ups=0.09, wpb=64801, bsz=128, num_updates=6478, lr=9.99562e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=74573
2021-06-19 15:21:50 | INFO | train_inner | epoch 003: 517 / 3002 loss=2.749, ppl=6.72, wps=5730.5, ups=0.09, wpb=64803, bsz=128, num_updates=6479, lr=9.99562e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=74584
2021-06-19 15:22:01 | INFO | train_inner | epoch 003: 518 / 3002 loss=2.697, ppl=6.48, wps=5966.8, ups=0.09, wpb=64916, bsz=128, num_updates=6480, lr=9.99562e-05, gnorm=2.235, loss_scale=4, train_wall=10, gb_free=2.8, wall=74595
2021-06-19 15:22:12 | INFO | train_inner | epoch 003: 519 / 3002 loss=2.659, ppl=6.31, wps=5861.7, ups=0.09, wpb=64851, bsz=128, num_updates=6481, lr=9.99561e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=74606
2021-06-19 15:22:23 | INFO | train_inner | epoch 003: 520 / 3002 loss=2.527, ppl=5.76, wps=5862, ups=0.09, wpb=64727, bsz=128, num_updates=6482, lr=9.99561e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=74617
2021-06-19 15:22:34 | INFO | train_inner | epoch 003: 521 / 3002 loss=2.618, ppl=6.14, wps=5911.1, ups=0.09, wpb=64821, bsz=128, num_updates=6483, lr=9.99561e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=74628
2021-06-19 15:22:45 | INFO | train_inner | epoch 003: 522 / 3002 loss=2.813, ppl=7.03, wps=5913, ups=0.09, wpb=64790, bsz=128, num_updates=6484, lr=9.99561e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=74639
2021-06-19 15:22:56 | INFO | train_inner | epoch 003: 523 / 3002 loss=2.705, ppl=6.52, wps=5829.1, ups=0.09, wpb=64833, bsz=128, num_updates=6485, lr=9.99561e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=74650
2021-06-19 15:23:07 | INFO | train_inner | epoch 003: 524 / 3002 loss=2.664, ppl=6.34, wps=5861.6, ups=0.09, wpb=64830, bsz=128, num_updates=6486, lr=9.99561e-05, gnorm=2.198, loss_scale=4, train_wall=11, gb_free=2.8, wall=74661
2021-06-19 15:23:18 | INFO | train_inner | epoch 003: 525 / 3002 loss=2.687, ppl=6.44, wps=5862.3, ups=0.09, wpb=64755, bsz=128, num_updates=6487, lr=9.99561e-05, gnorm=2.3, loss_scale=4, train_wall=11, gb_free=2.8, wall=74672
2021-06-19 15:23:29 | INFO | train_inner | epoch 003: 526 / 3002 loss=2.746, ppl=6.71, wps=5920.8, ups=0.09, wpb=64763, bsz=128, num_updates=6488, lr=9.99561e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=74683
2021-06-19 15:23:40 | INFO | train_inner | epoch 003: 527 / 3002 loss=2.544, ppl=5.83, wps=5924, ups=0.09, wpb=64793, bsz=128, num_updates=6489, lr=9.99561e-05, gnorm=2.142, loss_scale=4, train_wall=11, gb_free=2.8, wall=74694
2021-06-19 15:23:51 | INFO | train_inner | epoch 003: 528 / 3002 loss=2.609, ppl=6.1, wps=5870, ups=0.09, wpb=64853, bsz=128, num_updates=6490, lr=9.99561e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=74705
2021-06-19 15:24:02 | INFO | train_inner | epoch 003: 529 / 3002 loss=2.65, ppl=6.28, wps=5807.8, ups=0.09, wpb=64904, bsz=128, num_updates=6491, lr=9.99561e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=74716
2021-06-19 15:24:13 | INFO | train_inner | epoch 003: 530 / 3002 loss=2.554, ppl=5.87, wps=5743, ups=0.09, wpb=64822, bsz=128, num_updates=6492, lr=9.99561e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=74727
2021-06-19 15:24:24 | INFO | train_inner | epoch 003: 531 / 3002 loss=2.654, ppl=6.29, wps=5752.5, ups=0.09, wpb=64891, bsz=128, num_updates=6493, lr=9.99561e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=74739
2021-06-19 15:24:36 | INFO | train_inner | epoch 003: 532 / 3002 loss=2.78, ppl=6.87, wps=5831.1, ups=0.09, wpb=64850, bsz=128, num_updates=6494, lr=9.9956e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=74750
2021-06-19 15:24:47 | INFO | train_inner | epoch 003: 533 / 3002 loss=2.627, ppl=6.18, wps=5748.7, ups=0.09, wpb=64766, bsz=128, num_updates=6495, lr=9.9956e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=74761
2021-06-19 15:24:58 | INFO | train_inner | epoch 003: 534 / 3002 loss=2.58, ppl=5.98, wps=5829.1, ups=0.09, wpb=64844, bsz=128, num_updates=6496, lr=9.9956e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=74772
2021-06-19 15:25:09 | INFO | train_inner | epoch 003: 535 / 3002 loss=2.594, ppl=6.04, wps=5823.2, ups=0.09, wpb=64862, bsz=128, num_updates=6497, lr=9.9956e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=74783
2021-06-19 15:25:20 | INFO | train_inner | epoch 003: 536 / 3002 loss=2.63, ppl=6.19, wps=5872.6, ups=0.09, wpb=64831, bsz=128, num_updates=6498, lr=9.9956e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=74794
2021-06-19 15:25:31 | INFO | train_inner | epoch 003: 537 / 3002 loss=2.692, ppl=6.46, wps=5860.2, ups=0.09, wpb=64853, bsz=128, num_updates=6499, lr=9.9956e-05, gnorm=2.327, loss_scale=4, train_wall=11, gb_free=2.8, wall=74806
2021-06-19 15:25:42 | INFO | train_inner | epoch 003: 538 / 3002 loss=2.739, ppl=6.68, wps=5925, ups=0.09, wpb=64785, bsz=128, num_updates=6500, lr=9.9956e-05, gnorm=2.239, loss_scale=4, train_wall=10, gb_free=2.8, wall=74816
2021-06-19 15:25:53 | INFO | train_inner | epoch 003: 539 / 3002 loss=2.546, ppl=5.84, wps=5928.3, ups=0.09, wpb=64784, bsz=128, num_updates=6501, lr=9.9956e-05, gnorm=2.25, loss_scale=4, train_wall=10, gb_free=2.8, wall=74827
2021-06-19 15:26:04 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 15:26:15 | INFO | train_inner | epoch 003: 541 / 3002 loss=2.589, ppl=6.01, wps=2952.9, ups=0.05, wpb=64843, bsz=128, num_updates=6502, lr=9.9956e-05, gnorm=2.274, loss_scale=2, train_wall=21, gb_free=2.8, wall=74849
2021-06-19 15:26:26 | INFO | train_inner | epoch 003: 542 / 3002 loss=2.698, ppl=6.49, wps=5840.9, ups=0.09, wpb=64740, bsz=128, num_updates=6503, lr=9.9956e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=74860
2021-06-19 15:26:37 | INFO | train_inner | epoch 003: 543 / 3002 loss=2.498, ppl=5.65, wps=5891.4, ups=0.09, wpb=64875, bsz=128, num_updates=6504, lr=9.9956e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=74871
2021-06-19 15:26:48 | INFO | train_inner | epoch 003: 544 / 3002 loss=2.628, ppl=6.18, wps=5701.9, ups=0.09, wpb=64868, bsz=128, num_updates=6505, lr=9.9956e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=74883
2021-06-19 15:27:00 | INFO | train_inner | epoch 003: 545 / 3002 loss=2.58, ppl=5.98, wps=5732.9, ups=0.09, wpb=64820, bsz=128, num_updates=6506, lr=9.99559e-05, gnorm=2.257, loss_scale=2, train_wall=11, gb_free=2.8, wall=74894
2021-06-19 15:27:11 | INFO | train_inner | epoch 003: 546 / 3002 loss=2.613, ppl=6.12, wps=5804.1, ups=0.09, wpb=64772, bsz=128, num_updates=6507, lr=9.99559e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=74905
2021-06-19 15:27:22 | INFO | train_inner | epoch 003: 547 / 3002 loss=2.701, ppl=6.5, wps=5924.1, ups=0.09, wpb=64860, bsz=128, num_updates=6508, lr=9.99559e-05, gnorm=2.254, loss_scale=2, train_wall=10, gb_free=2.8, wall=74916
2021-06-19 15:27:33 | INFO | train_inner | epoch 003: 548 / 3002 loss=2.775, ppl=6.84, wps=5817.7, ups=0.09, wpb=64794, bsz=128, num_updates=6509, lr=9.99559e-05, gnorm=2.739, loss_scale=2, train_wall=11, gb_free=2.8, wall=74927
2021-06-19 15:27:44 | INFO | train_inner | epoch 003: 549 / 3002 loss=2.591, ppl=6.03, wps=5764, ups=0.09, wpb=64878, bsz=128, num_updates=6510, lr=9.99559e-05, gnorm=2.826, loss_scale=2, train_wall=11, gb_free=2.8, wall=74939
2021-06-19 15:27:55 | INFO | train_inner | epoch 003: 550 / 3002 loss=2.529, ppl=5.77, wps=5880.6, ups=0.09, wpb=64898, bsz=128, num_updates=6511, lr=9.99559e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=74950
2021-06-19 15:28:07 | INFO | train_inner | epoch 003: 551 / 3002 loss=2.592, ppl=6.03, wps=5774.9, ups=0.09, wpb=64832, bsz=128, num_updates=6512, lr=9.99559e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=74961
2021-06-19 15:28:18 | INFO | train_inner | epoch 003: 552 / 3002 loss=2.748, ppl=6.72, wps=5917, ups=0.09, wpb=64849, bsz=128, num_updates=6513, lr=9.99559e-05, gnorm=2.232, loss_scale=2, train_wall=11, gb_free=2.8, wall=74972
2021-06-19 15:28:28 | INFO | train_inner | epoch 003: 553 / 3002 loss=2.726, ppl=6.61, wps=5999.9, ups=0.09, wpb=64858, bsz=128, num_updates=6514, lr=9.99559e-05, gnorm=2.095, loss_scale=2, train_wall=10, gb_free=2.8, wall=74983
2021-06-19 15:28:40 | INFO | train_inner | epoch 003: 554 / 3002 loss=2.632, ppl=6.2, wps=5786.2, ups=0.09, wpb=64847, bsz=128, num_updates=6515, lr=9.99559e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=74994
2021-06-19 15:28:51 | INFO | train_inner | epoch 003: 555 / 3002 loss=2.759, ppl=6.77, wps=5863.3, ups=0.09, wpb=64863, bsz=128, num_updates=6516, lr=9.99559e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=75005
2021-06-19 15:29:02 | INFO | train_inner | epoch 003: 556 / 3002 loss=2.733, ppl=6.65, wps=5873.4, ups=0.09, wpb=64823, bsz=128, num_updates=6517, lr=9.99559e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=75016
2021-06-19 15:29:13 | INFO | train_inner | epoch 003: 557 / 3002 loss=2.454, ppl=5.48, wps=5830.5, ups=0.09, wpb=64887, bsz=128, num_updates=6518, lr=9.99559e-05, gnorm=2.076, loss_scale=2, train_wall=11, gb_free=2.8, wall=75027
2021-06-19 15:29:24 | INFO | train_inner | epoch 003: 558 / 3002 loss=2.653, ppl=6.29, wps=5715.7, ups=0.09, wpb=64883, bsz=128, num_updates=6519, lr=9.99558e-05, gnorm=2.168, loss_scale=2, train_wall=11, gb_free=2.8, wall=75038
2021-06-19 15:29:35 | INFO | train_inner | epoch 003: 559 / 3002 loss=2.657, ppl=6.31, wps=5801.6, ups=0.09, wpb=64823, bsz=128, num_updates=6520, lr=9.99558e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=75050
2021-06-19 15:29:47 | INFO | train_inner | epoch 003: 560 / 3002 loss=2.831, ppl=7.12, wps=5722.1, ups=0.09, wpb=64807, bsz=128, num_updates=6521, lr=9.99558e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=75061
2021-06-19 15:29:57 | INFO | train_inner | epoch 003: 561 / 3002 loss=2.546, ppl=5.84, wps=5986.8, ups=0.09, wpb=64842, bsz=128, num_updates=6522, lr=9.99558e-05, gnorm=2.194, loss_scale=2, train_wall=10, gb_free=2.8, wall=75072
2021-06-19 15:30:09 | INFO | train_inner | epoch 003: 562 / 3002 loss=2.68, ppl=6.41, wps=5848.7, ups=0.09, wpb=64919, bsz=128, num_updates=6523, lr=9.99558e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=75083
2021-06-19 15:30:20 | INFO | train_inner | epoch 003: 563 / 3002 loss=2.727, ppl=6.62, wps=5787.5, ups=0.09, wpb=64781, bsz=128, num_updates=6524, lr=9.99558e-05, gnorm=2.158, loss_scale=2, train_wall=11, gb_free=2.8, wall=75094
2021-06-19 15:30:31 | INFO | train_inner | epoch 003: 564 / 3002 loss=2.602, ppl=6.07, wps=5817.9, ups=0.09, wpb=64821, bsz=128, num_updates=6525, lr=9.99558e-05, gnorm=2.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=75105
2021-06-19 15:30:42 | INFO | train_inner | epoch 003: 565 / 3002 loss=2.588, ppl=6.01, wps=5822.9, ups=0.09, wpb=64866, bsz=128, num_updates=6526, lr=9.99558e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=75116
2021-06-19 15:30:53 | INFO | train_inner | epoch 003: 566 / 3002 loss=2.796, ppl=6.94, wps=5954.5, ups=0.09, wpb=64911, bsz=128, num_updates=6527, lr=9.99558e-05, gnorm=2.262, loss_scale=2, train_wall=10, gb_free=2.8, wall=75127
2021-06-19 15:31:04 | INFO | train_inner | epoch 003: 567 / 3002 loss=2.743, ppl=6.69, wps=5773.9, ups=0.09, wpb=64778, bsz=128, num_updates=6528, lr=9.99558e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=75138
2021-06-19 15:31:15 | INFO | train_inner | epoch 003: 568 / 3002 loss=2.674, ppl=6.38, wps=5912.8, ups=0.09, wpb=64801, bsz=128, num_updates=6529, lr=9.99558e-05, gnorm=2.652, loss_scale=2, train_wall=10, gb_free=2.8, wall=75149
2021-06-19 15:31:26 | INFO | train_inner | epoch 003: 569 / 3002 loss=2.655, ppl=6.3, wps=5751.4, ups=0.09, wpb=64875, bsz=128, num_updates=6530, lr=9.99558e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=75161
2021-06-19 15:31:38 | INFO | train_inner | epoch 003: 570 / 3002 loss=2.785, ppl=6.89, wps=5805.8, ups=0.09, wpb=64806, bsz=128, num_updates=6531, lr=9.99557e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=75172
2021-06-19 15:31:48 | INFO | train_inner | epoch 003: 571 / 3002 loss=2.602, ppl=6.07, wps=5966.1, ups=0.09, wpb=64872, bsz=128, num_updates=6532, lr=9.99557e-05, gnorm=2.149, loss_scale=2, train_wall=10, gb_free=2.8, wall=75183
2021-06-19 15:32:00 | INFO | train_inner | epoch 003: 572 / 3002 loss=2.782, ppl=6.88, wps=5752.3, ups=0.09, wpb=64836, bsz=128, num_updates=6533, lr=9.99557e-05, gnorm=2.319, loss_scale=2, train_wall=11, gb_free=2.8, wall=75194
2021-06-19 15:32:11 | INFO | train_inner | epoch 003: 573 / 3002 loss=2.826, ppl=7.09, wps=5887, ups=0.09, wpb=64749, bsz=128, num_updates=6534, lr=9.99557e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=75205
2021-06-19 15:32:22 | INFO | train_inner | epoch 003: 574 / 3002 loss=2.678, ppl=6.4, wps=5780, ups=0.09, wpb=64755, bsz=128, num_updates=6535, lr=9.99557e-05, gnorm=2.542, loss_scale=2, train_wall=11, gb_free=2.8, wall=75216
2021-06-19 15:32:33 | INFO | train_inner | epoch 003: 575 / 3002 loss=2.581, ppl=5.98, wps=5899.1, ups=0.09, wpb=64820, bsz=128, num_updates=6536, lr=9.99557e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=75227
2021-06-19 15:32:44 | INFO | train_inner | epoch 003: 576 / 3002 loss=2.622, ppl=6.15, wps=5787.6, ups=0.09, wpb=64839, bsz=128, num_updates=6537, lr=9.99557e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=75238
2021-06-19 15:32:55 | INFO | train_inner | epoch 003: 577 / 3002 loss=2.571, ppl=5.94, wps=5845.4, ups=0.09, wpb=64960, bsz=128, num_updates=6538, lr=9.99557e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=75250
2021-06-19 15:33:06 | INFO | train_inner | epoch 003: 578 / 3002 loss=2.818, ppl=7.05, wps=5913.9, ups=0.09, wpb=64914, bsz=128, num_updates=6539, lr=9.99557e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=75261
2021-06-19 15:33:17 | INFO | train_inner | epoch 003: 579 / 3002 loss=2.509, ppl=5.69, wps=5973.2, ups=0.09, wpb=64819, bsz=128, num_updates=6540, lr=9.99557e-05, gnorm=3.497, loss_scale=2, train_wall=10, gb_free=2.8, wall=75271
2021-06-19 15:33:28 | INFO | train_inner | epoch 003: 580 / 3002 loss=2.868, ppl=7.3, wps=5915, ups=0.09, wpb=64840, bsz=128, num_updates=6541, lr=9.99557e-05, gnorm=2.249, loss_scale=2, train_wall=11, gb_free=2.8, wall=75282
2021-06-19 15:33:39 | INFO | train_inner | epoch 003: 581 / 3002 loss=2.603, ppl=6.07, wps=5759.2, ups=0.09, wpb=64865, bsz=128, num_updates=6542, lr=9.99557e-05, gnorm=5.38, loss_scale=2, train_wall=11, gb_free=2.8, wall=75294
2021-06-19 15:33:50 | INFO | train_inner | epoch 003: 582 / 3002 loss=2.611, ppl=6.11, wps=5915.8, ups=0.09, wpb=64756, bsz=128, num_updates=6543, lr=9.99557e-05, gnorm=2.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=75305
2021-06-19 15:34:01 | INFO | train_inner | epoch 003: 583 / 3002 loss=2.519, ppl=5.73, wps=5984.2, ups=0.09, wpb=64915, bsz=128, num_updates=6544, lr=9.99556e-05, gnorm=2.122, loss_scale=2, train_wall=10, gb_free=2.8, wall=75315
2021-06-19 15:34:12 | INFO | train_inner | epoch 003: 584 / 3002 loss=2.836, ppl=7.14, wps=5973.4, ups=0.09, wpb=64818, bsz=128, num_updates=6545, lr=9.99556e-05, gnorm=2.107, loss_scale=2, train_wall=10, gb_free=2.8, wall=75326
2021-06-19 15:34:23 | INFO | train_inner | epoch 003: 585 / 3002 loss=2.649, ppl=6.27, wps=5811.5, ups=0.09, wpb=64816, bsz=128, num_updates=6546, lr=9.99556e-05, gnorm=3.762, loss_scale=2, train_wall=11, gb_free=2.8, wall=75337
2021-06-19 15:34:34 | INFO | train_inner | epoch 003: 586 / 3002 loss=2.649, ppl=6.27, wps=5821.8, ups=0.09, wpb=64814, bsz=128, num_updates=6547, lr=9.99556e-05, gnorm=2.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=75349
2021-06-19 15:34:45 | INFO | train_inner | epoch 003: 587 / 3002 loss=2.698, ppl=6.49, wps=5912.6, ups=0.09, wpb=64779, bsz=128, num_updates=6548, lr=9.99556e-05, gnorm=2.228, loss_scale=2, train_wall=10, gb_free=2.8, wall=75359
2021-06-19 15:34:56 | INFO | train_inner | epoch 003: 588 / 3002 loss=2.778, ppl=6.86, wps=5880.4, ups=0.09, wpb=64817, bsz=128, num_updates=6549, lr=9.99556e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=75371
2021-06-19 15:35:07 | INFO | train_inner | epoch 003: 589 / 3002 loss=2.672, ppl=6.37, wps=5858, ups=0.09, wpb=64754, bsz=128, num_updates=6550, lr=9.99556e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=75382
2021-06-19 15:35:18 | INFO | train_inner | epoch 003: 590 / 3002 loss=2.739, ppl=6.68, wps=5810.3, ups=0.09, wpb=64804, bsz=128, num_updates=6551, lr=9.99556e-05, gnorm=2.193, loss_scale=2, train_wall=11, gb_free=2.8, wall=75393
2021-06-19 15:35:29 | INFO | train_inner | epoch 003: 591 / 3002 loss=2.564, ppl=5.91, wps=5846.5, ups=0.09, wpb=64910, bsz=128, num_updates=6552, lr=9.99556e-05, gnorm=2.175, loss_scale=2, train_wall=11, gb_free=2.8, wall=75404
2021-06-19 15:35:40 | INFO | train_inner | epoch 003: 592 / 3002 loss=2.669, ppl=6.36, wps=5909.4, ups=0.09, wpb=64834, bsz=128, num_updates=6553, lr=9.99556e-05, gnorm=2.1, loss_scale=2, train_wall=11, gb_free=2.8, wall=75415
2021-06-19 15:35:52 | INFO | train_inner | epoch 003: 593 / 3002 loss=2.596, ppl=6.05, wps=5827.2, ups=0.09, wpb=64842, bsz=128, num_updates=6554, lr=9.99556e-05, gnorm=2.12, loss_scale=2, train_wall=11, gb_free=2.8, wall=75426
2021-06-19 15:36:03 | INFO | train_inner | epoch 003: 594 / 3002 loss=2.45, ppl=5.46, wps=5916.2, ups=0.09, wpb=64853, bsz=128, num_updates=6555, lr=9.99556e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=75437
2021-06-19 15:36:14 | INFO | train_inner | epoch 003: 595 / 3002 loss=2.575, ppl=5.96, wps=5857.5, ups=0.09, wpb=64867, bsz=128, num_updates=6556, lr=9.99555e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=75448
2021-06-19 15:36:25 | INFO | train_inner | epoch 003: 596 / 3002 loss=2.554, ppl=5.87, wps=5748.3, ups=0.09, wpb=64821, bsz=128, num_updates=6557, lr=9.99555e-05, gnorm=2.255, loss_scale=2, train_wall=11, gb_free=2.8, wall=75459
2021-06-19 15:36:36 | INFO | train_inner | epoch 003: 597 / 3002 loss=2.478, ppl=5.57, wps=5870.6, ups=0.09, wpb=64822, bsz=128, num_updates=6558, lr=9.99555e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=75470
2021-06-19 15:36:47 | INFO | train_inner | epoch 003: 598 / 3002 loss=2.696, ppl=6.48, wps=5969.4, ups=0.09, wpb=64922, bsz=128, num_updates=6559, lr=9.99555e-05, gnorm=2.224, loss_scale=2, train_wall=10, gb_free=2.8, wall=75481
2021-06-19 15:36:58 | INFO | train_inner | epoch 003: 599 / 3002 loss=2.7, ppl=6.5, wps=5942.2, ups=0.09, wpb=64826, bsz=128, num_updates=6560, lr=9.99555e-05, gnorm=2.083, loss_scale=2, train_wall=10, gb_free=2.8, wall=75492
2021-06-19 15:37:09 | INFO | train_inner | epoch 003: 600 / 3002 loss=2.772, ppl=6.83, wps=5831.4, ups=0.09, wpb=64774, bsz=128, num_updates=6561, lr=9.99555e-05, gnorm=2.155, loss_scale=2, train_wall=11, gb_free=2.8, wall=75503
2021-06-19 15:37:20 | INFO | train_inner | epoch 003: 601 / 3002 loss=2.751, ppl=6.73, wps=5723.3, ups=0.09, wpb=64791, bsz=128, num_updates=6562, lr=9.99555e-05, gnorm=2.241, loss_scale=2, train_wall=11, gb_free=2.8, wall=75514
2021-06-19 15:37:31 | INFO | train_inner | epoch 003: 602 / 3002 loss=2.782, ppl=6.88, wps=5853, ups=0.09, wpb=64869, bsz=128, num_updates=6563, lr=9.99555e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=75526
2021-06-19 15:37:42 | INFO | train_inner | epoch 003: 603 / 3002 loss=2.643, ppl=6.24, wps=5818.7, ups=0.09, wpb=64772, bsz=128, num_updates=6564, lr=9.99555e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=75537
2021-06-19 15:37:53 | INFO | train_inner | epoch 003: 604 / 3002 loss=2.573, ppl=5.95, wps=5856.2, ups=0.09, wpb=64917, bsz=128, num_updates=6565, lr=9.99555e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=75548
2021-06-19 15:38:04 | INFO | train_inner | epoch 003: 605 / 3002 loss=2.701, ppl=6.5, wps=5988.2, ups=0.09, wpb=64921, bsz=128, num_updates=6566, lr=9.99555e-05, gnorm=2.225, loss_scale=2, train_wall=10, gb_free=2.8, wall=75559
2021-06-19 15:38:15 | INFO | train_inner | epoch 003: 606 / 3002 loss=2.594, ppl=6.04, wps=5904.1, ups=0.09, wpb=64955, bsz=128, num_updates=6567, lr=9.99555e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=75570
2021-06-19 15:38:26 | INFO | train_inner | epoch 003: 607 / 3002 loss=2.663, ppl=6.33, wps=5978.3, ups=0.09, wpb=64834, bsz=128, num_updates=6568, lr=9.99555e-05, gnorm=2.109, loss_scale=2, train_wall=10, gb_free=2.8, wall=75580
2021-06-19 15:38:37 | INFO | train_inner | epoch 003: 608 / 3002 loss=2.738, ppl=6.67, wps=5855.1, ups=0.09, wpb=64904, bsz=128, num_updates=6569, lr=9.99554e-05, gnorm=2.25, loss_scale=2, train_wall=11, gb_free=2.8, wall=75592
2021-06-19 15:38:48 | INFO | train_inner | epoch 003: 609 / 3002 loss=2.726, ppl=6.62, wps=5858.3, ups=0.09, wpb=64884, bsz=128, num_updates=6570, lr=9.99554e-05, gnorm=2.781, loss_scale=2, train_wall=11, gb_free=2.8, wall=75603
2021-06-19 15:38:59 | INFO | train_inner | epoch 003: 610 / 3002 loss=2.542, ppl=5.82, wps=5904.3, ups=0.09, wpb=64875, bsz=128, num_updates=6571, lr=9.99554e-05, gnorm=2.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=75614
2021-06-19 15:39:10 | INFO | train_inner | epoch 003: 611 / 3002 loss=2.84, ppl=7.16, wps=5822, ups=0.09, wpb=64802, bsz=128, num_updates=6572, lr=9.99554e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=75625
2021-06-19 15:39:21 | INFO | train_inner | epoch 003: 612 / 3002 loss=2.664, ppl=6.34, wps=5868.4, ups=0.09, wpb=64817, bsz=128, num_updates=6573, lr=9.99554e-05, gnorm=2.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=75636
2021-06-19 15:39:33 | INFO | train_inner | epoch 003: 613 / 3002 loss=2.723, ppl=6.6, wps=5858.5, ups=0.09, wpb=64835, bsz=128, num_updates=6574, lr=9.99554e-05, gnorm=2.178, loss_scale=2, train_wall=11, gb_free=2.8, wall=75647
2021-06-19 15:39:43 | INFO | train_inner | epoch 003: 614 / 3002 loss=2.535, ppl=5.79, wps=5949.4, ups=0.09, wpb=64821, bsz=128, num_updates=6575, lr=9.99554e-05, gnorm=3.817, loss_scale=2, train_wall=10, gb_free=2.8, wall=75658
2021-06-19 15:39:55 | INFO | train_inner | epoch 003: 615 / 3002 loss=2.568, ppl=5.93, wps=5838.5, ups=0.09, wpb=64860, bsz=128, num_updates=6576, lr=9.99554e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=75669
2021-06-19 15:40:06 | INFO | train_inner | epoch 003: 616 / 3002 loss=2.604, ppl=6.08, wps=5751.7, ups=0.09, wpb=64771, bsz=128, num_updates=6577, lr=9.99554e-05, gnorm=2.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=75680
2021-06-19 15:40:17 | INFO | train_inner | epoch 003: 617 / 3002 loss=2.79, ppl=6.92, wps=5763.9, ups=0.09, wpb=64806, bsz=128, num_updates=6578, lr=9.99554e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=75691
2021-06-19 15:40:28 | INFO | train_inner | epoch 003: 618 / 3002 loss=2.727, ppl=6.62, wps=5832.3, ups=0.09, wpb=64814, bsz=128, num_updates=6579, lr=9.99554e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=75702
2021-06-19 15:40:39 | INFO | train_inner | epoch 003: 619 / 3002 loss=2.708, ppl=6.53, wps=5825.4, ups=0.09, wpb=64890, bsz=128, num_updates=6580, lr=9.99554e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=75714
2021-06-19 15:40:50 | INFO | train_inner | epoch 003: 620 / 3002 loss=2.73, ppl=6.64, wps=5792.6, ups=0.09, wpb=64771, bsz=128, num_updates=6581, lr=9.99553e-05, gnorm=2.07, loss_scale=2, train_wall=11, gb_free=2.8, wall=75725
2021-06-19 15:41:02 | INFO | train_inner | epoch 003: 621 / 3002 loss=2.573, ppl=5.95, wps=5818.8, ups=0.09, wpb=64789, bsz=128, num_updates=6582, lr=9.99553e-05, gnorm=2.142, loss_scale=2, train_wall=11, gb_free=2.8, wall=75736
2021-06-19 15:41:13 | INFO | train_inner | epoch 003: 622 / 3002 loss=2.701, ppl=6.5, wps=5900.6, ups=0.09, wpb=64825, bsz=128, num_updates=6583, lr=9.99553e-05, gnorm=2.213, loss_scale=2, train_wall=11, gb_free=2.8, wall=75747
2021-06-19 15:41:24 | INFO | train_inner | epoch 003: 623 / 3002 loss=2.803, ppl=6.98, wps=5832.1, ups=0.09, wpb=64881, bsz=128, num_updates=6584, lr=9.99553e-05, gnorm=2.633, loss_scale=2, train_wall=11, gb_free=2.8, wall=75758
2021-06-19 15:41:35 | INFO | train_inner | epoch 003: 624 / 3002 loss=2.69, ppl=6.45, wps=5925.6, ups=0.09, wpb=64822, bsz=128, num_updates=6585, lr=9.99553e-05, gnorm=12.447, loss_scale=2, train_wall=10, gb_free=2.8, wall=75769
2021-06-19 15:41:46 | INFO | train_inner | epoch 003: 625 / 3002 loss=2.768, ppl=6.81, wps=5921, ups=0.09, wpb=64781, bsz=128, num_updates=6586, lr=9.99553e-05, gnorm=2.321, loss_scale=2, train_wall=10, gb_free=2.8, wall=75780
2021-06-19 15:41:57 | INFO | train_inner | epoch 003: 626 / 3002 loss=2.708, ppl=6.54, wps=5872.3, ups=0.09, wpb=64854, bsz=128, num_updates=6587, lr=9.99553e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=75791
2021-06-19 15:42:08 | INFO | train_inner | epoch 003: 627 / 3002 loss=2.762, ppl=6.78, wps=5808.9, ups=0.09, wpb=64857, bsz=128, num_updates=6588, lr=9.99553e-05, gnorm=2.201, loss_scale=2, train_wall=11, gb_free=2.8, wall=75802
2021-06-19 15:42:19 | INFO | train_inner | epoch 003: 628 / 3002 loss=2.782, ppl=6.88, wps=5861.3, ups=0.09, wpb=64734, bsz=128, num_updates=6589, lr=9.99553e-05, gnorm=2.25, loss_scale=2, train_wall=11, gb_free=2.8, wall=75813
2021-06-19 15:42:30 | INFO | train_inner | epoch 003: 629 / 3002 loss=2.681, ppl=6.42, wps=5836, ups=0.09, wpb=64857, bsz=128, num_updates=6590, lr=9.99553e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=75824
2021-06-19 15:42:41 | INFO | train_inner | epoch 003: 630 / 3002 loss=2.588, ppl=6.01, wps=5947.7, ups=0.09, wpb=64845, bsz=128, num_updates=6591, lr=9.99553e-05, gnorm=2.142, loss_scale=2, train_wall=10, gb_free=2.8, wall=75835
2021-06-19 15:42:52 | INFO | train_inner | epoch 003: 631 / 3002 loss=2.683, ppl=6.42, wps=5947.1, ups=0.09, wpb=64785, bsz=128, num_updates=6592, lr=9.99553e-05, gnorm=3.211, loss_scale=2, train_wall=10, gb_free=2.8, wall=75846
2021-06-19 15:43:03 | INFO | train_inner | epoch 003: 632 / 3002 loss=2.619, ppl=6.14, wps=5925.8, ups=0.09, wpb=64845, bsz=128, num_updates=6593, lr=9.99553e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=75857
2021-06-19 15:43:14 | INFO | train_inner | epoch 003: 633 / 3002 loss=2.703, ppl=6.51, wps=5902.6, ups=0.09, wpb=64934, bsz=128, num_updates=6594, lr=9.99552e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=75868
2021-06-19 15:43:25 | INFO | train_inner | epoch 003: 634 / 3002 loss=2.708, ppl=6.53, wps=5966.3, ups=0.09, wpb=64802, bsz=128, num_updates=6595, lr=9.99552e-05, gnorm=2.196, loss_scale=2, train_wall=10, gb_free=2.8, wall=75879
2021-06-19 15:43:36 | INFO | train_inner | epoch 003: 635 / 3002 loss=2.775, ppl=6.85, wps=5850.5, ups=0.09, wpb=64863, bsz=128, num_updates=6596, lr=9.99552e-05, gnorm=2.191, loss_scale=2, train_wall=11, gb_free=2.8, wall=75890
2021-06-19 15:43:47 | INFO | train_inner | epoch 003: 636 / 3002 loss=2.666, ppl=6.35, wps=5795.7, ups=0.09, wpb=64825, bsz=128, num_updates=6597, lr=9.99552e-05, gnorm=2.189, loss_scale=2, train_wall=11, gb_free=2.8, wall=75901
2021-06-19 15:43:58 | INFO | train_inner | epoch 003: 637 / 3002 loss=2.596, ppl=6.05, wps=5886.8, ups=0.09, wpb=64866, bsz=128, num_updates=6598, lr=9.99552e-05, gnorm=2.305, loss_scale=2, train_wall=11, gb_free=2.8, wall=75912
2021-06-19 15:44:09 | INFO | train_inner | epoch 003: 638 / 3002 loss=2.558, ppl=5.89, wps=5747, ups=0.09, wpb=64832, bsz=128, num_updates=6599, lr=9.99552e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=75923
2021-06-19 15:44:20 | INFO | train_inner | epoch 003: 639 / 3002 loss=2.826, ppl=7.09, wps=5810.3, ups=0.09, wpb=64847, bsz=128, num_updates=6600, lr=9.99552e-05, gnorm=2.52, loss_scale=2, train_wall=11, gb_free=2.8, wall=75935
2021-06-19 15:44:31 | INFO | train_inner | epoch 003: 640 / 3002 loss=2.751, ppl=6.73, wps=5876.5, ups=0.09, wpb=64883, bsz=128, num_updates=6601, lr=9.99552e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=75946
2021-06-19 15:44:42 | INFO | train_inner | epoch 003: 641 / 3002 loss=2.507, ppl=5.69, wps=5844.1, ups=0.09, wpb=64909, bsz=128, num_updates=6602, lr=9.99552e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=75957
2021-06-19 15:44:54 | INFO | train_inner | epoch 003: 642 / 3002 loss=2.688, ppl=6.44, wps=5755.5, ups=0.09, wpb=64805, bsz=128, num_updates=6603, lr=9.99552e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=75968
2021-06-19 15:45:05 | INFO | train_inner | epoch 003: 643 / 3002 loss=2.63, ppl=6.19, wps=5825.4, ups=0.09, wpb=64867, bsz=128, num_updates=6604, lr=9.99552e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=75979
2021-06-19 15:45:16 | INFO | train_inner | epoch 003: 644 / 3002 loss=2.731, ppl=6.64, wps=5787.4, ups=0.09, wpb=64791, bsz=128, num_updates=6605, lr=9.99552e-05, gnorm=2.213, loss_scale=2, train_wall=11, gb_free=2.8, wall=75990
2021-06-19 15:45:27 | INFO | train_inner | epoch 003: 645 / 3002 loss=2.576, ppl=5.96, wps=5866, ups=0.09, wpb=64863, bsz=128, num_updates=6606, lr=9.99551e-05, gnorm=2.569, loss_scale=2, train_wall=11, gb_free=2.8, wall=76001
2021-06-19 15:45:38 | INFO | train_inner | epoch 003: 646 / 3002 loss=2.62, ppl=6.15, wps=5934.1, ups=0.09, wpb=64910, bsz=128, num_updates=6607, lr=9.99551e-05, gnorm=2.265, loss_scale=2, train_wall=10, gb_free=2.8, wall=76012
2021-06-19 15:45:49 | INFO | train_inner | epoch 003: 647 / 3002 loss=2.652, ppl=6.28, wps=5912.3, ups=0.09, wpb=64719, bsz=128, num_updates=6608, lr=9.99551e-05, gnorm=2.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=76023
2021-06-19 15:46:00 | INFO | train_inner | epoch 003: 648 / 3002 loss=2.625, ppl=6.17, wps=5750, ups=0.09, wpb=64778, bsz=128, num_updates=6609, lr=9.99551e-05, gnorm=2.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=76035
2021-06-19 15:46:11 | INFO | train_inner | epoch 003: 649 / 3002 loss=2.749, ppl=6.72, wps=5825.1, ups=0.09, wpb=64916, bsz=128, num_updates=6610, lr=9.99551e-05, gnorm=2.739, loss_scale=2, train_wall=11, gb_free=2.8, wall=76046
2021-06-19 15:46:22 | INFO | train_inner | epoch 003: 650 / 3002 loss=2.809, ppl=7.01, wps=5944, ups=0.09, wpb=64865, bsz=128, num_updates=6611, lr=9.99551e-05, gnorm=2.148, loss_scale=2, train_wall=10, gb_free=2.8, wall=76057
2021-06-19 15:46:33 | INFO | train_inner | epoch 003: 651 / 3002 loss=2.542, ppl=5.82, wps=5900.9, ups=0.09, wpb=64819, bsz=128, num_updates=6612, lr=9.99551e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=76068
2021-06-19 15:46:45 | INFO | train_inner | epoch 003: 652 / 3002 loss=2.576, ppl=5.96, wps=5712.5, ups=0.09, wpb=64801, bsz=128, num_updates=6613, lr=9.99551e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=76079
2021-06-19 15:46:56 | INFO | train_inner | epoch 003: 653 / 3002 loss=2.725, ppl=6.61, wps=5855.6, ups=0.09, wpb=64855, bsz=128, num_updates=6614, lr=9.99551e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=76090
2021-06-19 15:47:07 | INFO | train_inner | epoch 003: 654 / 3002 loss=2.697, ppl=6.48, wps=5858.4, ups=0.09, wpb=64698, bsz=128, num_updates=6615, lr=9.99551e-05, gnorm=2.119, loss_scale=2, train_wall=11, gb_free=2.8, wall=76101
2021-06-19 15:47:18 | INFO | train_inner | epoch 003: 655 / 3002 loss=2.789, ppl=6.91, wps=5749.7, ups=0.09, wpb=64743, bsz=128, num_updates=6616, lr=9.99551e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=76112
2021-06-19 15:47:29 | INFO | train_inner | epoch 003: 656 / 3002 loss=2.721, ppl=6.59, wps=5764.2, ups=0.09, wpb=64715, bsz=128, num_updates=6617, lr=9.99551e-05, gnorm=8.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=76124
2021-06-19 15:47:40 | INFO | train_inner | epoch 003: 657 / 3002 loss=2.665, ppl=6.34, wps=5898.6, ups=0.09, wpb=64864, bsz=128, num_updates=6618, lr=9.99551e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=76135
2021-06-19 15:47:51 | INFO | train_inner | epoch 003: 658 / 3002 loss=2.62, ppl=6.15, wps=5873.4, ups=0.09, wpb=64812, bsz=128, num_updates=6619, lr=9.9955e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=76146
2021-06-19 15:48:02 | INFO | train_inner | epoch 003: 659 / 3002 loss=2.764, ppl=6.79, wps=5942.7, ups=0.09, wpb=64857, bsz=128, num_updates=6620, lr=9.9955e-05, gnorm=2.269, loss_scale=2, train_wall=10, gb_free=2.8, wall=76157
2021-06-19 15:48:13 | INFO | train_inner | epoch 003: 660 / 3002 loss=2.557, ppl=5.89, wps=5820.9, ups=0.09, wpb=64819, bsz=128, num_updates=6621, lr=9.9955e-05, gnorm=2.253, loss_scale=2, train_wall=11, gb_free=2.8, wall=76168
2021-06-19 15:48:25 | INFO | train_inner | epoch 003: 661 / 3002 loss=2.756, ppl=6.75, wps=5753.9, ups=0.09, wpb=64760, bsz=128, num_updates=6622, lr=9.9955e-05, gnorm=2.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=76179
2021-06-19 15:48:36 | INFO | train_inner | epoch 003: 662 / 3002 loss=2.677, ppl=6.4, wps=5933, ups=0.09, wpb=64881, bsz=128, num_updates=6623, lr=9.9955e-05, gnorm=2.324, loss_scale=2, train_wall=11, gb_free=2.8, wall=76190
2021-06-19 15:48:47 | INFO | train_inner | epoch 003: 663 / 3002 loss=2.693, ppl=6.47, wps=5682.6, ups=0.09, wpb=64771, bsz=128, num_updates=6624, lr=9.9955e-05, gnorm=2.554, loss_scale=2, train_wall=11, gb_free=2.8, wall=76201
2021-06-19 15:48:58 | INFO | train_inner | epoch 003: 664 / 3002 loss=2.717, ppl=6.57, wps=5877.2, ups=0.09, wpb=64844, bsz=128, num_updates=6625, lr=9.9955e-05, gnorm=2.756, loss_scale=2, train_wall=11, gb_free=2.8, wall=76212
2021-06-19 15:49:09 | INFO | train_inner | epoch 003: 665 / 3002 loss=2.718, ppl=6.58, wps=5849.6, ups=0.09, wpb=64807, bsz=128, num_updates=6626, lr=9.9955e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=76223
2021-06-19 15:49:20 | INFO | train_inner | epoch 003: 666 / 3002 loss=2.667, ppl=6.35, wps=5917.3, ups=0.09, wpb=64860, bsz=128, num_updates=6627, lr=9.9955e-05, gnorm=2.368, loss_scale=2, train_wall=10, gb_free=2.8, wall=76234
2021-06-19 15:49:31 | INFO | train_inner | epoch 003: 667 / 3002 loss=2.66, ppl=6.32, wps=5771.2, ups=0.09, wpb=64837, bsz=128, num_updates=6628, lr=9.9955e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=76246
2021-06-19 15:49:42 | INFO | train_inner | epoch 003: 668 / 3002 loss=2.659, ppl=6.32, wps=6027.7, ups=0.09, wpb=64918, bsz=128, num_updates=6629, lr=9.9955e-05, gnorm=2.184, loss_scale=4, train_wall=10, gb_free=2.8, wall=76256
2021-06-19 15:49:53 | INFO | train_inner | epoch 003: 669 / 3002 loss=2.593, ppl=6.03, wps=5780.2, ups=0.09, wpb=64755, bsz=128, num_updates=6630, lr=9.9955e-05, gnorm=2.54, loss_scale=4, train_wall=11, gb_free=2.8, wall=76268
2021-06-19 15:50:04 | INFO | train_inner | epoch 003: 670 / 3002 loss=2.62, ppl=6.15, wps=5751.3, ups=0.09, wpb=64744, bsz=128, num_updates=6631, lr=9.99549e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=76279
2021-06-19 15:50:16 | INFO | train_inner | epoch 003: 671 / 3002 loss=2.636, ppl=6.22, wps=5845.6, ups=0.09, wpb=64834, bsz=128, num_updates=6632, lr=9.99549e-05, gnorm=2.367, loss_scale=4, train_wall=11, gb_free=2.8, wall=76290
2021-06-19 15:50:27 | INFO | train_inner | epoch 003: 672 / 3002 loss=2.771, ppl=6.83, wps=5771.3, ups=0.09, wpb=64828, bsz=128, num_updates=6633, lr=9.99549e-05, gnorm=2.308, loss_scale=4, train_wall=11, gb_free=2.8, wall=76301
2021-06-19 15:50:38 | INFO | train_inner | epoch 003: 673 / 3002 loss=2.708, ppl=6.53, wps=5674.5, ups=0.09, wpb=64764, bsz=128, num_updates=6634, lr=9.99549e-05, gnorm=2.3, loss_scale=4, train_wall=11, gb_free=2.8, wall=76313
2021-06-19 15:50:49 | INFO | train_inner | epoch 003: 674 / 3002 loss=2.681, ppl=6.41, wps=5745.7, ups=0.09, wpb=64844, bsz=128, num_updates=6635, lr=9.99549e-05, gnorm=2.084, loss_scale=4, train_wall=11, gb_free=2.8, wall=76324
2021-06-19 15:51:00 | INFO | train_inner | epoch 003: 675 / 3002 loss=2.75, ppl=6.73, wps=5886.6, ups=0.09, wpb=64768, bsz=128, num_updates=6636, lr=9.99549e-05, gnorm=2.273, loss_scale=4, train_wall=11, gb_free=2.8, wall=76335
2021-06-19 15:51:12 | INFO | train_inner | epoch 003: 676 / 3002 loss=2.744, ppl=6.7, wps=5763.6, ups=0.09, wpb=64829, bsz=128, num_updates=6637, lr=9.99549e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=76346
2021-06-19 15:51:23 | INFO | train_inner | epoch 003: 677 / 3002 loss=2.721, ppl=6.59, wps=5811, ups=0.09, wpb=64842, bsz=128, num_updates=6638, lr=9.99549e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=76357
2021-06-19 15:51:34 | INFO | train_inner | epoch 003: 678 / 3002 loss=2.632, ppl=6.2, wps=5841.4, ups=0.09, wpb=64850, bsz=128, num_updates=6639, lr=9.99549e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=76368
2021-06-19 15:51:45 | INFO | train_inner | epoch 003: 679 / 3002 loss=2.502, ppl=5.66, wps=5823.8, ups=0.09, wpb=64831, bsz=128, num_updates=6640, lr=9.99549e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=76379
2021-06-19 15:51:56 | INFO | train_inner | epoch 003: 680 / 3002 loss=2.767, ppl=6.81, wps=5864.3, ups=0.09, wpb=64810, bsz=128, num_updates=6641, lr=9.99549e-05, gnorm=2.462, loss_scale=4, train_wall=11, gb_free=2.8, wall=76391
2021-06-19 15:52:07 | INFO | train_inner | epoch 003: 681 / 3002 loss=2.721, ppl=6.59, wps=5803.6, ups=0.09, wpb=64848, bsz=128, num_updates=6642, lr=9.99549e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=76402
2021-06-19 15:52:18 | INFO | train_inner | epoch 003: 682 / 3002 loss=2.593, ppl=6.03, wps=5912.7, ups=0.09, wpb=64789, bsz=128, num_updates=6643, lr=9.99549e-05, gnorm=2.696, loss_scale=4, train_wall=11, gb_free=2.8, wall=76413
2021-06-19 15:52:29 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 15:52:40 | INFO | train_inner | epoch 003: 684 / 3002 loss=2.667, ppl=6.35, wps=2952.5, ups=0.05, wpb=64775, bsz=128, num_updates=6644, lr=9.99548e-05, gnorm=2.201, loss_scale=2, train_wall=21, gb_free=2.8, wall=76435
2021-06-19 15:52:52 | INFO | train_inner | epoch 003: 685 / 3002 loss=2.625, ppl=6.17, wps=5767.7, ups=0.09, wpb=64864, bsz=128, num_updates=6645, lr=9.99548e-05, gnorm=2.195, loss_scale=2, train_wall=11, gb_free=2.8, wall=76446
2021-06-19 15:53:02 | INFO | train_inner | epoch 003: 686 / 3002 loss=2.631, ppl=6.19, wps=5960.4, ups=0.09, wpb=64836, bsz=128, num_updates=6646, lr=9.99548e-05, gnorm=2.201, loss_scale=2, train_wall=10, gb_free=2.8, wall=76457
2021-06-19 15:53:14 | INFO | train_inner | epoch 003: 687 / 3002 loss=2.655, ppl=6.3, wps=5746.2, ups=0.09, wpb=64797, bsz=128, num_updates=6647, lr=9.99548e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=76468
2021-06-19 15:53:25 | INFO | train_inner | epoch 003: 688 / 3002 loss=2.723, ppl=6.6, wps=5764.9, ups=0.09, wpb=64758, bsz=128, num_updates=6648, lr=9.99548e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=76479
2021-06-19 15:53:36 | INFO | train_inner | epoch 003: 689 / 3002 loss=2.571, ppl=5.94, wps=5789.8, ups=0.09, wpb=64817, bsz=128, num_updates=6649, lr=9.99548e-05, gnorm=2.265, loss_scale=2, train_wall=11, gb_free=2.8, wall=76490
2021-06-19 15:53:47 | INFO | train_inner | epoch 003: 690 / 3002 loss=2.611, ppl=6.11, wps=5721.3, ups=0.09, wpb=64836, bsz=128, num_updates=6650, lr=9.99548e-05, gnorm=2.797, loss_scale=2, train_wall=11, gb_free=2.8, wall=76502
2021-06-19 15:53:59 | INFO | train_inner | epoch 003: 691 / 3002 loss=2.74, ppl=6.68, wps=5780.8, ups=0.09, wpb=64833, bsz=128, num_updates=6651, lr=9.99548e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=76513
2021-06-19 15:54:10 | INFO | train_inner | epoch 003: 692 / 3002 loss=2.675, ppl=6.39, wps=5946.1, ups=0.09, wpb=64867, bsz=128, num_updates=6652, lr=9.99548e-05, gnorm=2.2, loss_scale=2, train_wall=10, gb_free=2.8, wall=76524
2021-06-19 15:54:20 | INFO | train_inner | epoch 003: 693 / 3002 loss=2.632, ppl=6.2, wps=5926, ups=0.09, wpb=64888, bsz=128, num_updates=6653, lr=9.99548e-05, gnorm=4.985, loss_scale=2, train_wall=10, gb_free=2.8, wall=76535
2021-06-19 15:54:31 | INFO | train_inner | epoch 003: 694 / 3002 loss=2.57, ppl=5.94, wps=5912.5, ups=0.09, wpb=64758, bsz=128, num_updates=6654, lr=9.99548e-05, gnorm=3.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=76546
2021-06-19 15:54:42 | INFO | train_inner | epoch 003: 695 / 3002 loss=2.79, ppl=6.92, wps=5870.8, ups=0.09, wpb=64886, bsz=128, num_updates=6655, lr=9.99548e-05, gnorm=2.153, loss_scale=2, train_wall=11, gb_free=2.8, wall=76557
2021-06-19 15:54:54 | INFO | train_inner | epoch 003: 696 / 3002 loss=2.59, ppl=6.02, wps=5760.7, ups=0.09, wpb=64846, bsz=128, num_updates=6656, lr=9.99547e-05, gnorm=2.259, loss_scale=2, train_wall=11, gb_free=2.8, wall=76568
2021-06-19 15:55:05 | INFO | train_inner | epoch 003: 697 / 3002 loss=2.568, ppl=5.93, wps=5777.7, ups=0.09, wpb=64825, bsz=128, num_updates=6657, lr=9.99547e-05, gnorm=2.282, loss_scale=2, train_wall=11, gb_free=2.8, wall=76579
2021-06-19 15:55:16 | INFO | train_inner | epoch 003: 698 / 3002 loss=2.598, ppl=6.05, wps=5849.9, ups=0.09, wpb=64881, bsz=128, num_updates=6658, lr=9.99547e-05, gnorm=2.23, loss_scale=2, train_wall=11, gb_free=2.8, wall=76590
2021-06-19 15:55:27 | INFO | train_inner | epoch 003: 699 / 3002 loss=2.631, ppl=6.2, wps=5734.9, ups=0.09, wpb=64781, bsz=128, num_updates=6659, lr=9.99547e-05, gnorm=2.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=76602
2021-06-19 15:55:38 | INFO | train_inner | epoch 003: 700 / 3002 loss=2.488, ppl=5.61, wps=5872.1, ups=0.09, wpb=64847, bsz=128, num_updates=6660, lr=9.99547e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=76613
2021-06-19 15:55:50 | INFO | train_inner | epoch 003: 701 / 3002 loss=2.666, ppl=6.35, wps=5789.9, ups=0.09, wpb=64846, bsz=128, num_updates=6661, lr=9.99547e-05, gnorm=2.363, loss_scale=2, train_wall=11, gb_free=2.8, wall=76624
2021-06-19 15:56:01 | INFO | train_inner | epoch 003: 702 / 3002 loss=2.715, ppl=6.57, wps=5775.1, ups=0.09, wpb=64692, bsz=128, num_updates=6662, lr=9.99547e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=76635
2021-06-19 15:56:12 | INFO | train_inner | epoch 003: 703 / 3002 loss=2.709, ppl=6.54, wps=5778.7, ups=0.09, wpb=64876, bsz=128, num_updates=6663, lr=9.99547e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=76646
2021-06-19 15:56:23 | INFO | train_inner | epoch 003: 704 / 3002 loss=2.742, ppl=6.69, wps=5812.6, ups=0.09, wpb=64816, bsz=128, num_updates=6664, lr=9.99547e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=76658
2021-06-19 15:56:34 | INFO | train_inner | epoch 003: 705 / 3002 loss=2.58, ppl=5.98, wps=5832.7, ups=0.09, wpb=64907, bsz=128, num_updates=6665, lr=9.99547e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=76669
2021-06-19 15:56:45 | INFO | train_inner | epoch 003: 706 / 3002 loss=2.829, ppl=7.11, wps=5867.9, ups=0.09, wpb=64811, bsz=128, num_updates=6666, lr=9.99547e-05, gnorm=2.323, loss_scale=2, train_wall=11, gb_free=2.8, wall=76680
2021-06-19 15:56:56 | INFO | train_inner | epoch 003: 707 / 3002 loss=2.502, ppl=5.66, wps=5835.8, ups=0.09, wpb=64887, bsz=128, num_updates=6667, lr=9.99547e-05, gnorm=2.051, loss_scale=2, train_wall=11, gb_free=2.8, wall=76691
2021-06-19 15:57:07 | INFO | train_inner | epoch 003: 708 / 3002 loss=2.689, ppl=6.45, wps=5934, ups=0.09, wpb=64776, bsz=128, num_updates=6668, lr=9.99547e-05, gnorm=2.211, loss_scale=2, train_wall=10, gb_free=2.8, wall=76702
2021-06-19 15:57:19 | INFO | train_inner | epoch 003: 709 / 3002 loss=2.559, ppl=5.89, wps=5830.1, ups=0.09, wpb=64889, bsz=128, num_updates=6669, lr=9.99546e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=76713
2021-06-19 15:57:30 | INFO | train_inner | epoch 003: 710 / 3002 loss=2.742, ppl=6.69, wps=5888.8, ups=0.09, wpb=64849, bsz=128, num_updates=6670, lr=9.99546e-05, gnorm=2.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=76724
2021-06-19 15:57:41 | INFO | train_inner | epoch 003: 711 / 3002 loss=2.774, ppl=6.84, wps=5825.5, ups=0.09, wpb=64794, bsz=128, num_updates=6671, lr=9.99546e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=76735
2021-06-19 15:57:52 | INFO | train_inner | epoch 003: 712 / 3002 loss=2.642, ppl=6.24, wps=5887.6, ups=0.09, wpb=64828, bsz=128, num_updates=6672, lr=9.99546e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=76746
2021-06-19 15:58:02 | INFO | train_inner | epoch 003: 713 / 3002 loss=2.665, ppl=6.34, wps=6020.6, ups=0.09, wpb=64920, bsz=128, num_updates=6673, lr=9.99546e-05, gnorm=2.1, loss_scale=2, train_wall=10, gb_free=2.8, wall=76757
2021-06-19 15:58:13 | INFO | train_inner | epoch 003: 714 / 3002 loss=2.684, ppl=6.43, wps=5894.4, ups=0.09, wpb=64866, bsz=128, num_updates=6674, lr=9.99546e-05, gnorm=2.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=76768
2021-06-19 15:58:24 | INFO | train_inner | epoch 003: 715 / 3002 loss=2.71, ppl=6.55, wps=5964.2, ups=0.09, wpb=64865, bsz=128, num_updates=6675, lr=9.99546e-05, gnorm=2.076, loss_scale=2, train_wall=10, gb_free=2.8, wall=76779
2021-06-19 15:58:36 | INFO | train_inner | epoch 003: 716 / 3002 loss=2.552, ppl=5.86, wps=5801.7, ups=0.09, wpb=64769, bsz=128, num_updates=6676, lr=9.99546e-05, gnorm=2.066, loss_scale=2, train_wall=11, gb_free=2.8, wall=76790
2021-06-19 15:58:46 | INFO | train_inner | epoch 003: 717 / 3002 loss=2.534, ppl=5.79, wps=5976.8, ups=0.09, wpb=64779, bsz=128, num_updates=6677, lr=9.99546e-05, gnorm=2.078, loss_scale=2, train_wall=10, gb_free=2.8, wall=76801
2021-06-19 15:58:58 | INFO | train_inner | epoch 003: 718 / 3002 loss=2.683, ppl=6.42, wps=5750.6, ups=0.09, wpb=64790, bsz=128, num_updates=6678, lr=9.99546e-05, gnorm=2.246, loss_scale=2, train_wall=11, gb_free=2.8, wall=76812
2021-06-19 15:59:09 | INFO | train_inner | epoch 003: 719 / 3002 loss=2.612, ppl=6.12, wps=5832, ups=0.09, wpb=64886, bsz=128, num_updates=6679, lr=9.99546e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=76823
2021-06-19 15:59:20 | INFO | train_inner | epoch 003: 720 / 3002 loss=2.892, ppl=7.42, wps=5986.6, ups=0.09, wpb=64803, bsz=128, num_updates=6680, lr=9.99546e-05, gnorm=2.167, loss_scale=2, train_wall=10, gb_free=2.8, wall=76834
2021-06-19 15:59:31 | INFO | train_inner | epoch 003: 721 / 3002 loss=2.655, ppl=6.3, wps=5802, ups=0.09, wpb=64790, bsz=128, num_updates=6681, lr=9.99545e-05, gnorm=2.05, loss_scale=2, train_wall=11, gb_free=2.8, wall=76845
2021-06-19 15:59:42 | INFO | train_inner | epoch 003: 722 / 3002 loss=2.692, ppl=6.46, wps=6014, ups=0.09, wpb=64930, bsz=128, num_updates=6682, lr=9.99545e-05, gnorm=2.252, loss_scale=2, train_wall=10, gb_free=2.8, wall=76856
2021-06-19 15:59:52 | INFO | train_inner | epoch 003: 723 / 3002 loss=2.812, ppl=7.02, wps=5996.3, ups=0.09, wpb=64927, bsz=128, num_updates=6683, lr=9.99545e-05, gnorm=2.197, loss_scale=2, train_wall=10, gb_free=2.8, wall=76867
2021-06-19 16:00:03 | INFO | train_inner | epoch 003: 724 / 3002 loss=2.587, ppl=6.01, wps=5816.9, ups=0.09, wpb=64785, bsz=128, num_updates=6684, lr=9.99545e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=76878
2021-06-19 16:00:15 | INFO | train_inner | epoch 003: 725 / 3002 loss=2.628, ppl=6.18, wps=5849.8, ups=0.09, wpb=64822, bsz=128, num_updates=6685, lr=9.99545e-05, gnorm=2.173, loss_scale=2, train_wall=11, gb_free=2.8, wall=76889
2021-06-19 16:00:26 | INFO | train_inner | epoch 003: 726 / 3002 loss=2.572, ppl=5.95, wps=5836.1, ups=0.09, wpb=64810, bsz=128, num_updates=6686, lr=9.99545e-05, gnorm=2.267, loss_scale=2, train_wall=11, gb_free=2.8, wall=76900
2021-06-19 16:00:37 | INFO | train_inner | epoch 003: 727 / 3002 loss=2.59, ppl=6.02, wps=5766.3, ups=0.09, wpb=64866, bsz=128, num_updates=6687, lr=9.99545e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=76911
2021-06-19 16:00:48 | INFO | train_inner | epoch 003: 728 / 3002 loss=2.641, ppl=6.24, wps=5834.3, ups=0.09, wpb=64854, bsz=128, num_updates=6688, lr=9.99545e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=76922
2021-06-19 16:00:59 | INFO | train_inner | epoch 003: 729 / 3002 loss=2.626, ppl=6.17, wps=5860.1, ups=0.09, wpb=64879, bsz=128, num_updates=6689, lr=9.99545e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=76933
2021-06-19 16:01:10 | INFO | train_inner | epoch 003: 730 / 3002 loss=2.708, ppl=6.54, wps=5870.4, ups=0.09, wpb=64879, bsz=128, num_updates=6690, lr=9.99545e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=76944
2021-06-19 16:01:21 | INFO | train_inner | epoch 003: 731 / 3002 loss=2.574, ppl=5.95, wps=5923.9, ups=0.09, wpb=64832, bsz=128, num_updates=6691, lr=9.99545e-05, gnorm=2.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=76955
2021-06-19 16:01:32 | INFO | train_inner | epoch 003: 732 / 3002 loss=2.566, ppl=5.92, wps=5975, ups=0.09, wpb=64786, bsz=128, num_updates=6692, lr=9.99545e-05, gnorm=2.185, loss_scale=2, train_wall=10, gb_free=2.8, wall=76966
2021-06-19 16:01:43 | INFO | train_inner | epoch 003: 733 / 3002 loss=2.781, ppl=6.87, wps=5874.5, ups=0.09, wpb=64845, bsz=128, num_updates=6693, lr=9.99545e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=76977
2021-06-19 16:01:54 | INFO | train_inner | epoch 003: 734 / 3002 loss=2.591, ppl=6.03, wps=5804.9, ups=0.09, wpb=64879, bsz=128, num_updates=6694, lr=9.99544e-05, gnorm=2.125, loss_scale=2, train_wall=11, gb_free=2.8, wall=76989
2021-06-19 16:02:05 | INFO | train_inner | epoch 003: 735 / 3002 loss=2.547, ppl=5.84, wps=5901.8, ups=0.09, wpb=64865, bsz=128, num_updates=6695, lr=9.99544e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=76999
2021-06-19 16:02:16 | INFO | train_inner | epoch 003: 736 / 3002 loss=2.515, ppl=5.72, wps=5864.9, ups=0.09, wpb=64817, bsz=128, num_updates=6696, lr=9.99544e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=77011
2021-06-19 16:02:27 | INFO | train_inner | epoch 003: 737 / 3002 loss=2.759, ppl=6.77, wps=5829.4, ups=0.09, wpb=64737, bsz=128, num_updates=6697, lr=9.99544e-05, gnorm=2.184, loss_scale=2, train_wall=11, gb_free=2.8, wall=77022
2021-06-19 16:02:38 | INFO | train_inner | epoch 003: 738 / 3002 loss=2.702, ppl=6.51, wps=5837.8, ups=0.09, wpb=64881, bsz=128, num_updates=6698, lr=9.99544e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=77033
2021-06-19 16:02:50 | INFO | train_inner | epoch 003: 739 / 3002 loss=2.83, ppl=7.11, wps=5851.8, ups=0.09, wpb=64836, bsz=128, num_updates=6699, lr=9.99544e-05, gnorm=2.16, loss_scale=2, train_wall=11, gb_free=2.8, wall=77044
2021-06-19 16:03:01 | INFO | train_inner | epoch 003: 740 / 3002 loss=2.673, ppl=6.38, wps=5773, ups=0.09, wpb=64878, bsz=128, num_updates=6700, lr=9.99544e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=77055
2021-06-19 16:03:12 | INFO | train_inner | epoch 003: 741 / 3002 loss=2.694, ppl=6.47, wps=5855.9, ups=0.09, wpb=64816, bsz=128, num_updates=6701, lr=9.99544e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=77066
2021-06-19 16:03:23 | INFO | train_inner | epoch 003: 742 / 3002 loss=2.637, ppl=6.22, wps=5744.9, ups=0.09, wpb=64894, bsz=128, num_updates=6702, lr=9.99544e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=77077
2021-06-19 16:03:34 | INFO | train_inner | epoch 003: 743 / 3002 loss=2.704, ppl=6.51, wps=5849.3, ups=0.09, wpb=64810, bsz=128, num_updates=6703, lr=9.99544e-05, gnorm=2.205, loss_scale=2, train_wall=11, gb_free=2.8, wall=77089
2021-06-19 16:03:45 | INFO | train_inner | epoch 003: 744 / 3002 loss=2.663, ppl=6.33, wps=5835.5, ups=0.09, wpb=64916, bsz=128, num_updates=6704, lr=9.99544e-05, gnorm=2.095, loss_scale=2, train_wall=11, gb_free=2.8, wall=77100
2021-06-19 16:03:56 | INFO | train_inner | epoch 003: 745 / 3002 loss=2.579, ppl=5.97, wps=5983.2, ups=0.09, wpb=64846, bsz=128, num_updates=6705, lr=9.99544e-05, gnorm=2.152, loss_scale=2, train_wall=10, gb_free=2.8, wall=77110
2021-06-19 16:04:07 | INFO | train_inner | epoch 003: 746 / 3002 loss=2.662, ppl=6.33, wps=5782.1, ups=0.09, wpb=64827, bsz=128, num_updates=6706, lr=9.99543e-05, gnorm=5.654, loss_scale=2, train_wall=11, gb_free=2.8, wall=77122
2021-06-19 16:04:19 | INFO | train_inner | epoch 003: 747 / 3002 loss=2.632, ppl=6.2, wps=5740.8, ups=0.09, wpb=64927, bsz=128, num_updates=6707, lr=9.99543e-05, gnorm=6.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=77133
2021-06-19 16:04:30 | INFO | train_inner | epoch 003: 748 / 3002 loss=2.706, ppl=6.52, wps=5843.4, ups=0.09, wpb=64805, bsz=128, num_updates=6708, lr=9.99543e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=77144
2021-06-19 16:04:41 | INFO | train_inner | epoch 003: 749 / 3002 loss=2.636, ppl=6.21, wps=5766.3, ups=0.09, wpb=64798, bsz=128, num_updates=6709, lr=9.99543e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=77155
2021-06-19 16:04:52 | INFO | train_inner | epoch 003: 750 / 3002 loss=2.642, ppl=6.24, wps=5773.1, ups=0.09, wpb=64813, bsz=128, num_updates=6710, lr=9.99543e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=77167
2021-06-19 16:05:04 | INFO | train_inner | epoch 003: 751 / 3002 loss=2.497, ppl=5.64, wps=5743.4, ups=0.09, wpb=64776, bsz=128, num_updates=6711, lr=9.99543e-05, gnorm=2.264, loss_scale=2, train_wall=11, gb_free=2.8, wall=77178
2021-06-19 16:05:15 | INFO | train_inner | epoch 003: 752 / 3002 loss=2.656, ppl=6.3, wps=5877, ups=0.09, wpb=64804, bsz=128, num_updates=6712, lr=9.99543e-05, gnorm=3.183, loss_scale=2, train_wall=11, gb_free=2.8, wall=77189
2021-06-19 16:05:26 | INFO | train_inner | epoch 003: 753 / 3002 loss=2.689, ppl=6.45, wps=5859.1, ups=0.09, wpb=64911, bsz=128, num_updates=6713, lr=9.99543e-05, gnorm=2.409, loss_scale=2, train_wall=11, gb_free=2.8, wall=77200
2021-06-19 16:05:37 | INFO | train_inner | epoch 003: 754 / 3002 loss=2.89, ppl=7.41, wps=5758.8, ups=0.09, wpb=64833, bsz=128, num_updates=6714, lr=9.99543e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=77211
2021-06-19 16:05:48 | INFO | train_inner | epoch 003: 755 / 3002 loss=2.781, ppl=6.88, wps=5752.3, ups=0.09, wpb=64812, bsz=128, num_updates=6715, lr=9.99543e-05, gnorm=2.235, loss_scale=2, train_wall=11, gb_free=2.8, wall=77222
2021-06-19 16:05:59 | INFO | train_inner | epoch 003: 756 / 3002 loss=2.651, ppl=6.28, wps=5795.8, ups=0.09, wpb=64862, bsz=128, num_updates=6716, lr=9.99543e-05, gnorm=2.313, loss_scale=2, train_wall=11, gb_free=2.8, wall=77234
2021-06-19 16:06:10 | INFO | train_inner | epoch 003: 757 / 3002 loss=2.584, ppl=6, wps=5878.8, ups=0.09, wpb=64832, bsz=128, num_updates=6717, lr=9.99543e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=77245
2021-06-19 16:06:22 | INFO | train_inner | epoch 003: 758 / 3002 loss=2.709, ppl=6.54, wps=5820.6, ups=0.09, wpb=64865, bsz=128, num_updates=6718, lr=9.99543e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=77256
2021-06-19 16:06:32 | INFO | train_inner | epoch 003: 759 / 3002 loss=2.723, ppl=6.6, wps=5908.3, ups=0.09, wpb=64873, bsz=128, num_updates=6719, lr=9.99542e-05, gnorm=2.625, loss_scale=2, train_wall=11, gb_free=2.8, wall=77267
2021-06-19 16:06:44 | INFO | train_inner | epoch 003: 760 / 3002 loss=2.697, ppl=6.49, wps=5843.4, ups=0.09, wpb=64790, bsz=128, num_updates=6720, lr=9.99542e-05, gnorm=2.358, loss_scale=2, train_wall=11, gb_free=2.8, wall=77278
2021-06-19 16:06:54 | INFO | train_inner | epoch 003: 761 / 3002 loss=2.611, ppl=6.11, wps=5960.9, ups=0.09, wpb=64840, bsz=128, num_updates=6721, lr=9.99542e-05, gnorm=2.151, loss_scale=2, train_wall=10, gb_free=2.8, wall=77289
2021-06-19 16:07:06 | INFO | train_inner | epoch 003: 762 / 3002 loss=2.654, ppl=6.29, wps=5821.7, ups=0.09, wpb=64787, bsz=128, num_updates=6722, lr=9.99542e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=77300
2021-06-19 16:07:17 | INFO | train_inner | epoch 003: 763 / 3002 loss=2.679, ppl=6.4, wps=5862.6, ups=0.09, wpb=64757, bsz=128, num_updates=6723, lr=9.99542e-05, gnorm=2.234, loss_scale=2, train_wall=11, gb_free=2.8, wall=77311
2021-06-19 16:07:28 | INFO | train_inner | epoch 003: 764 / 3002 loss=2.667, ppl=6.35, wps=5928.6, ups=0.09, wpb=64760, bsz=128, num_updates=6724, lr=9.99542e-05, gnorm=2.131, loss_scale=2, train_wall=10, gb_free=2.8, wall=77322
2021-06-19 16:07:39 | INFO | train_inner | epoch 003: 765 / 3002 loss=2.665, ppl=6.34, wps=5796.8, ups=0.09, wpb=64924, bsz=128, num_updates=6725, lr=9.99542e-05, gnorm=3.499, loss_scale=2, train_wall=11, gb_free=2.8, wall=77333
2021-06-19 16:07:50 | INFO | train_inner | epoch 003: 766 / 3002 loss=2.705, ppl=6.52, wps=5826.3, ups=0.09, wpb=64830, bsz=128, num_updates=6726, lr=9.99542e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=77344
2021-06-19 16:08:01 | INFO | train_inner | epoch 003: 767 / 3002 loss=2.528, ppl=5.77, wps=5876.4, ups=0.09, wpb=64878, bsz=128, num_updates=6727, lr=9.99542e-05, gnorm=2.503, loss_scale=2, train_wall=11, gb_free=2.8, wall=77355
2021-06-19 16:08:12 | INFO | train_inner | epoch 003: 768 / 3002 loss=2.742, ppl=6.69, wps=5814.1, ups=0.09, wpb=64785, bsz=128, num_updates=6728, lr=9.99542e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=77366
2021-06-19 16:08:23 | INFO | train_inner | epoch 003: 769 / 3002 loss=2.624, ppl=6.16, wps=5846.5, ups=0.09, wpb=64851, bsz=128, num_updates=6729, lr=9.99542e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=77377
2021-06-19 16:08:34 | INFO | train_inner | epoch 003: 770 / 3002 loss=2.739, ppl=6.68, wps=5827.9, ups=0.09, wpb=64894, bsz=128, num_updates=6730, lr=9.99542e-05, gnorm=7.585, loss_scale=2, train_wall=11, gb_free=2.8, wall=77389
2021-06-19 16:08:45 | INFO | train_inner | epoch 003: 771 / 3002 loss=2.557, ppl=5.89, wps=5915.2, ups=0.09, wpb=64864, bsz=128, num_updates=6731, lr=9.99541e-05, gnorm=2.055, loss_scale=2, train_wall=11, gb_free=2.8, wall=77400
2021-06-19 16:08:56 | INFO | train_inner | epoch 003: 772 / 3002 loss=2.632, ppl=6.2, wps=5983, ups=0.09, wpb=64854, bsz=128, num_updates=6732, lr=9.99541e-05, gnorm=2.297, loss_scale=2, train_wall=10, gb_free=2.8, wall=77410
2021-06-19 16:09:07 | INFO | train_inner | epoch 003: 773 / 3002 loss=2.637, ppl=6.22, wps=5801, ups=0.09, wpb=64844, bsz=128, num_updates=6733, lr=9.99541e-05, gnorm=2.431, loss_scale=2, train_wall=11, gb_free=2.8, wall=77422
2021-06-19 16:09:18 | INFO | train_inner | epoch 003: 774 / 3002 loss=2.628, ppl=6.18, wps=5872, ups=0.09, wpb=64782, bsz=128, num_updates=6734, lr=9.99541e-05, gnorm=2.365, loss_scale=2, train_wall=11, gb_free=2.8, wall=77433
2021-06-19 16:09:29 | INFO | train_inner | epoch 003: 775 / 3002 loss=2.585, ppl=6, wps=5807.8, ups=0.09, wpb=64809, bsz=128, num_updates=6735, lr=9.99541e-05, gnorm=2.745, loss_scale=2, train_wall=11, gb_free=2.8, wall=77444
2021-06-19 16:09:40 | INFO | train_inner | epoch 003: 776 / 3002 loss=2.68, ppl=6.41, wps=5902.7, ups=0.09, wpb=64761, bsz=128, num_updates=6736, lr=9.99541e-05, gnorm=2.238, loss_scale=2, train_wall=11, gb_free=2.8, wall=77455
2021-06-19 16:09:52 | INFO | train_inner | epoch 003: 777 / 3002 loss=2.624, ppl=6.17, wps=5855.9, ups=0.09, wpb=64876, bsz=128, num_updates=6737, lr=9.99541e-05, gnorm=2.523, loss_scale=2, train_wall=11, gb_free=2.8, wall=77466
2021-06-19 16:10:03 | INFO | train_inner | epoch 003: 778 / 3002 loss=2.701, ppl=6.5, wps=5862.8, ups=0.09, wpb=64813, bsz=128, num_updates=6738, lr=9.99541e-05, gnorm=2.354, loss_scale=2, train_wall=11, gb_free=2.8, wall=77477
2021-06-19 16:10:14 | INFO | train_inner | epoch 003: 779 / 3002 loss=2.714, ppl=6.56, wps=5792.6, ups=0.09, wpb=64853, bsz=128, num_updates=6739, lr=9.99541e-05, gnorm=2.073, loss_scale=2, train_wall=11, gb_free=2.8, wall=77488
2021-06-19 16:10:25 | INFO | train_inner | epoch 003: 780 / 3002 loss=2.677, ppl=6.39, wps=5749.1, ups=0.09, wpb=64764, bsz=128, num_updates=6740, lr=9.99541e-05, gnorm=4.097, loss_scale=2, train_wall=11, gb_free=2.8, wall=77499
2021-06-19 16:10:36 | INFO | train_inner | epoch 003: 781 / 3002 loss=2.844, ppl=7.18, wps=5926.5, ups=0.09, wpb=64808, bsz=128, num_updates=6741, lr=9.99541e-05, gnorm=2.213, loss_scale=2, train_wall=10, gb_free=2.8, wall=77510
2021-06-19 16:10:47 | INFO | train_inner | epoch 003: 782 / 3002 loss=2.593, ppl=6.03, wps=5987.9, ups=0.09, wpb=64866, bsz=128, num_updates=6742, lr=9.99541e-05, gnorm=2.08, loss_scale=2, train_wall=10, gb_free=2.8, wall=77521
2021-06-19 16:10:58 | INFO | train_inner | epoch 003: 783 / 3002 loss=2.511, ppl=5.7, wps=5787.7, ups=0.09, wpb=64913, bsz=128, num_updates=6743, lr=9.99541e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=77532
2021-06-19 16:11:09 | INFO | train_inner | epoch 003: 784 / 3002 loss=2.71, ppl=6.54, wps=5905.9, ups=0.09, wpb=64888, bsz=128, num_updates=6744, lr=9.9954e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=77543
2021-06-19 16:11:20 | INFO | train_inner | epoch 003: 785 / 3002 loss=2.691, ppl=6.46, wps=5828, ups=0.09, wpb=64826, bsz=128, num_updates=6745, lr=9.9954e-05, gnorm=4.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=77554
2021-06-19 16:11:31 | INFO | train_inner | epoch 003: 786 / 3002 loss=2.658, ppl=6.31, wps=5908.6, ups=0.09, wpb=64778, bsz=128, num_updates=6746, lr=9.9954e-05, gnorm=2.463, loss_scale=2, train_wall=10, gb_free=2.8, wall=77565
2021-06-19 16:11:42 | INFO | train_inner | epoch 003: 787 / 3002 loss=2.73, ppl=6.63, wps=5784.7, ups=0.09, wpb=64772, bsz=128, num_updates=6747, lr=9.9954e-05, gnorm=2.196, loss_scale=2, train_wall=11, gb_free=2.8, wall=77577
2021-06-19 16:11:54 | INFO | train_inner | epoch 003: 788 / 3002 loss=2.572, ppl=5.95, wps=5768.2, ups=0.09, wpb=64879, bsz=128, num_updates=6748, lr=9.9954e-05, gnorm=2.16, loss_scale=2, train_wall=11, gb_free=2.8, wall=77588
2021-06-19 16:12:05 | INFO | train_inner | epoch 003: 789 / 3002 loss=2.608, ppl=6.1, wps=5825.3, ups=0.09, wpb=64724, bsz=128, num_updates=6749, lr=9.9954e-05, gnorm=2.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=77599
2021-06-19 16:12:16 | INFO | train_inner | epoch 003: 790 / 3002 loss=2.774, ppl=6.84, wps=5766.7, ups=0.09, wpb=64880, bsz=128, num_updates=6750, lr=9.9954e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=77610
2021-06-19 16:12:27 | INFO | train_inner | epoch 003: 791 / 3002 loss=2.616, ppl=6.13, wps=5886.1, ups=0.09, wpb=64819, bsz=128, num_updates=6751, lr=9.9954e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=77621
2021-06-19 16:12:38 | INFO | train_inner | epoch 003: 792 / 3002 loss=2.587, ppl=6.01, wps=5852.5, ups=0.09, wpb=64717, bsz=128, num_updates=6752, lr=9.9954e-05, gnorm=2.109, loss_scale=2, train_wall=11, gb_free=2.8, wall=77632
2021-06-19 16:12:49 | INFO | train_inner | epoch 003: 793 / 3002 loss=2.806, ppl=6.99, wps=5852.8, ups=0.09, wpb=64890, bsz=128, num_updates=6753, lr=9.9954e-05, gnorm=3.43, loss_scale=2, train_wall=11, gb_free=2.8, wall=77643
2021-06-19 16:13:00 | INFO | train_inner | epoch 003: 794 / 3002 loss=2.682, ppl=6.42, wps=5864.8, ups=0.09, wpb=64861, bsz=128, num_updates=6754, lr=9.9954e-05, gnorm=7.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=77654
2021-06-19 16:13:11 | INFO | train_inner | epoch 003: 795 / 3002 loss=2.546, ppl=5.84, wps=5785.3, ups=0.09, wpb=64805, bsz=128, num_updates=6755, lr=9.9954e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=77666
2021-06-19 16:13:23 | INFO | train_inner | epoch 003: 796 / 3002 loss=2.647, ppl=6.26, wps=5726.7, ups=0.09, wpb=64724, bsz=128, num_updates=6756, lr=9.99539e-05, gnorm=2.378, loss_scale=2, train_wall=11, gb_free=2.8, wall=77677
2021-06-19 16:13:34 | INFO | train_inner | epoch 003: 797 / 3002 loss=2.727, ppl=6.62, wps=5768, ups=0.09, wpb=64816, bsz=128, num_updates=6757, lr=9.99539e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=77688
2021-06-19 16:13:45 | INFO | train_inner | epoch 003: 798 / 3002 loss=2.704, ppl=6.51, wps=5813.7, ups=0.09, wpb=64753, bsz=128, num_updates=6758, lr=9.99539e-05, gnorm=2.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=77699
2021-06-19 16:13:56 | INFO | train_inner | epoch 003: 799 / 3002 loss=2.736, ppl=6.66, wps=5876.3, ups=0.09, wpb=64901, bsz=128, num_updates=6759, lr=9.99539e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=77710
2021-06-19 16:14:07 | INFO | train_inner | epoch 003: 800 / 3002 loss=2.581, ppl=5.98, wps=5848, ups=0.09, wpb=64849, bsz=128, num_updates=6760, lr=9.99539e-05, gnorm=2.091, loss_scale=2, train_wall=11, gb_free=2.8, wall=77721
2021-06-19 16:14:18 | INFO | train_inner | epoch 003: 801 / 3002 loss=2.588, ppl=6.01, wps=5883.9, ups=0.09, wpb=64863, bsz=128, num_updates=6761, lr=9.99539e-05, gnorm=2.129, loss_scale=2, train_wall=11, gb_free=2.8, wall=77732
2021-06-19 16:14:29 | INFO | train_inner | epoch 003: 802 / 3002 loss=2.659, ppl=6.32, wps=5821.3, ups=0.09, wpb=64741, bsz=128, num_updates=6762, lr=9.99539e-05, gnorm=2.193, loss_scale=2, train_wall=11, gb_free=2.8, wall=77744
2021-06-19 16:14:40 | INFO | train_inner | epoch 003: 803 / 3002 loss=2.652, ppl=6.28, wps=5855.1, ups=0.09, wpb=64777, bsz=128, num_updates=6763, lr=9.99539e-05, gnorm=2.109, loss_scale=2, train_wall=11, gb_free=2.8, wall=77755
2021-06-19 16:14:51 | INFO | train_inner | epoch 003: 804 / 3002 loss=2.719, ppl=6.59, wps=5843, ups=0.09, wpb=64883, bsz=128, num_updates=6764, lr=9.99539e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=77766
2021-06-19 16:15:02 | INFO | train_inner | epoch 003: 805 / 3002 loss=2.728, ppl=6.63, wps=5930.3, ups=0.09, wpb=64788, bsz=128, num_updates=6765, lr=9.99539e-05, gnorm=2.128, loss_scale=2, train_wall=10, gb_free=2.8, wall=77777
2021-06-19 16:15:14 | INFO | train_inner | epoch 003: 806 / 3002 loss=2.612, ppl=6.11, wps=5823.1, ups=0.09, wpb=64865, bsz=128, num_updates=6766, lr=9.99539e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=77788
2021-06-19 16:15:25 | INFO | train_inner | epoch 003: 807 / 3002 loss=2.471, ppl=5.54, wps=5736, ups=0.09, wpb=64759, bsz=128, num_updates=6767, lr=9.99539e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=77799
2021-06-19 16:15:36 | INFO | train_inner | epoch 003: 808 / 3002 loss=2.527, ppl=5.76, wps=5892.2, ups=0.09, wpb=64902, bsz=128, num_updates=6768, lr=9.99539e-05, gnorm=2.569, loss_scale=2, train_wall=11, gb_free=2.8, wall=77810
2021-06-19 16:15:47 | INFO | train_inner | epoch 003: 809 / 3002 loss=2.6, ppl=6.06, wps=5791.9, ups=0.09, wpb=64823, bsz=128, num_updates=6769, lr=9.99538e-05, gnorm=2.221, loss_scale=2, train_wall=11, gb_free=2.8, wall=77821
2021-06-19 16:15:58 | INFO | train_inner | epoch 003: 810 / 3002 loss=2.584, ppl=6, wps=5921.7, ups=0.09, wpb=64907, bsz=128, num_updates=6770, lr=9.99538e-05, gnorm=2.21, loss_scale=2, train_wall=10, gb_free=2.8, wall=77832
2021-06-19 16:16:09 | INFO | train_inner | epoch 003: 811 / 3002 loss=2.669, ppl=6.36, wps=5775.3, ups=0.09, wpb=64787, bsz=128, num_updates=6771, lr=9.99538e-05, gnorm=2.456, loss_scale=4, train_wall=11, gb_free=2.8, wall=77844
2021-06-19 16:16:20 | INFO | train_inner | epoch 003: 812 / 3002 loss=2.697, ppl=6.48, wps=5784.1, ups=0.09, wpb=64743, bsz=128, num_updates=6772, lr=9.99538e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=77855
2021-06-19 16:16:32 | INFO | train_inner | epoch 003: 813 / 3002 loss=2.751, ppl=6.73, wps=5696, ups=0.09, wpb=64699, bsz=128, num_updates=6773, lr=9.99538e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=77866
2021-06-19 16:16:43 | INFO | train_inner | epoch 003: 814 / 3002 loss=2.802, ppl=6.97, wps=5874, ups=0.09, wpb=64946, bsz=128, num_updates=6774, lr=9.99538e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=77877
2021-06-19 16:16:54 | INFO | train_inner | epoch 003: 815 / 3002 loss=2.514, ppl=5.71, wps=5839.1, ups=0.09, wpb=64840, bsz=128, num_updates=6775, lr=9.99538e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=77888
2021-06-19 16:17:05 | INFO | train_inner | epoch 003: 816 / 3002 loss=2.68, ppl=6.41, wps=5854.1, ups=0.09, wpb=64867, bsz=128, num_updates=6776, lr=9.99538e-05, gnorm=13.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=77899
2021-06-19 16:17:16 | INFO | train_inner | epoch 003: 817 / 3002 loss=2.873, ppl=7.32, wps=5897.9, ups=0.09, wpb=64847, bsz=128, num_updates=6777, lr=9.99538e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=77910
2021-06-19 16:17:27 | INFO | train_inner | epoch 003: 818 / 3002 loss=2.572, ppl=5.95, wps=5807.2, ups=0.09, wpb=64816, bsz=128, num_updates=6778, lr=9.99538e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=77921
2021-06-19 16:17:38 | INFO | train_inner | epoch 003: 819 / 3002 loss=2.604, ppl=6.08, wps=5923.4, ups=0.09, wpb=64880, bsz=128, num_updates=6779, lr=9.99538e-05, gnorm=2.146, loss_scale=4, train_wall=10, gb_free=2.8, wall=77932
2021-06-19 16:17:49 | INFO | train_inner | epoch 003: 820 / 3002 loss=2.698, ppl=6.49, wps=5828.5, ups=0.09, wpb=64759, bsz=128, num_updates=6780, lr=9.99538e-05, gnorm=2.161, loss_scale=4, train_wall=11, gb_free=2.8, wall=77944
2021-06-19 16:18:00 | INFO | train_inner | epoch 003: 821 / 3002 loss=2.716, ppl=6.57, wps=5829.2, ups=0.09, wpb=64845, bsz=128, num_updates=6781, lr=9.99537e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=77955
2021-06-19 16:18:12 | INFO | train_inner | epoch 003: 822 / 3002 loss=2.582, ppl=5.99, wps=5744, ups=0.09, wpb=64837, bsz=128, num_updates=6782, lr=9.99537e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=77966
2021-06-19 16:18:23 | INFO | train_inner | epoch 003: 823 / 3002 loss=2.678, ppl=6.4, wps=5802.4, ups=0.09, wpb=64842, bsz=128, num_updates=6783, lr=9.99537e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=77977
2021-06-19 16:18:34 | INFO | train_inner | epoch 003: 824 / 3002 loss=2.702, ppl=6.51, wps=5864.6, ups=0.09, wpb=64894, bsz=128, num_updates=6784, lr=9.99537e-05, gnorm=2.247, loss_scale=4, train_wall=11, gb_free=2.8, wall=77988
2021-06-19 16:18:45 | INFO | train_inner | epoch 003: 825 / 3002 loss=2.548, ppl=5.85, wps=5854.9, ups=0.09, wpb=64862, bsz=128, num_updates=6785, lr=9.99537e-05, gnorm=2.664, loss_scale=4, train_wall=11, gb_free=2.8, wall=77999
2021-06-19 16:18:56 | INFO | train_inner | epoch 003: 826 / 3002 loss=2.653, ppl=6.29, wps=5886.2, ups=0.09, wpb=64765, bsz=128, num_updates=6786, lr=9.99537e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=78010
2021-06-19 16:19:07 | INFO | train_inner | epoch 003: 827 / 3002 loss=2.765, ppl=6.8, wps=5877.3, ups=0.09, wpb=64793, bsz=128, num_updates=6787, lr=9.99537e-05, gnorm=2.362, loss_scale=4, train_wall=11, gb_free=2.8, wall=78021
2021-06-19 16:19:18 | INFO | train_inner | epoch 003: 828 / 3002 loss=2.615, ppl=6.13, wps=5795.1, ups=0.09, wpb=64828, bsz=128, num_updates=6788, lr=9.99537e-05, gnorm=2.301, loss_scale=4, train_wall=11, gb_free=2.8, wall=78032
2021-06-19 16:19:29 | INFO | train_inner | epoch 003: 829 / 3002 loss=2.817, ppl=7.05, wps=5753.8, ups=0.09, wpb=64751, bsz=128, num_updates=6789, lr=9.99537e-05, gnorm=2.441, loss_scale=4, train_wall=11, gb_free=2.8, wall=78044
2021-06-19 16:19:41 | INFO | train_inner | epoch 003: 830 / 3002 loss=2.957, ppl=7.77, wps=5790.8, ups=0.09, wpb=64686, bsz=128, num_updates=6790, lr=9.99537e-05, gnorm=2.275, loss_scale=4, train_wall=11, gb_free=2.8, wall=78055
2021-06-19 16:19:51 | INFO | train_inner | epoch 003: 831 / 3002 loss=2.511, ppl=5.7, wps=6007, ups=0.09, wpb=64824, bsz=128, num_updates=6791, lr=9.99537e-05, gnorm=2.327, loss_scale=4, train_wall=10, gb_free=2.8, wall=78066
2021-06-19 16:20:02 | INFO | train_inner | epoch 003: 832 / 3002 loss=2.574, ppl=5.96, wps=5874.1, ups=0.09, wpb=64841, bsz=128, num_updates=6792, lr=9.99537e-05, gnorm=2.306, loss_scale=4, train_wall=11, gb_free=2.8, wall=78077
2021-06-19 16:20:13 | INFO | train_inner | epoch 003: 833 / 3002 loss=2.705, ppl=6.52, wps=6005.5, ups=0.09, wpb=64750, bsz=128, num_updates=6793, lr=9.99537e-05, gnorm=2.195, loss_scale=4, train_wall=10, gb_free=2.8, wall=78088
2021-06-19 16:20:24 | INFO | train_inner | epoch 003: 834 / 3002 loss=2.743, ppl=6.7, wps=5802.9, ups=0.09, wpb=64845, bsz=128, num_updates=6794, lr=9.99536e-05, gnorm=2.593, loss_scale=4, train_wall=11, gb_free=2.8, wall=78099
2021-06-19 16:20:36 | INFO | train_inner | epoch 003: 835 / 3002 loss=2.619, ppl=6.14, wps=5742, ups=0.09, wpb=64793, bsz=128, num_updates=6795, lr=9.99536e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=78110
2021-06-19 16:20:47 | INFO | train_inner | epoch 003: 836 / 3002 loss=2.558, ppl=5.89, wps=5791.3, ups=0.09, wpb=64788, bsz=128, num_updates=6796, lr=9.99536e-05, gnorm=2.814, loss_scale=4, train_wall=11, gb_free=2.8, wall=78121
2021-06-19 16:20:58 | INFO | train_inner | epoch 003: 837 / 3002 loss=2.777, ppl=6.85, wps=5935.1, ups=0.09, wpb=64881, bsz=128, num_updates=6797, lr=9.99536e-05, gnorm=2.245, loss_scale=4, train_wall=10, gb_free=2.8, wall=78132
2021-06-19 16:21:09 | INFO | train_inner | epoch 003: 838 / 3002 loss=2.573, ppl=5.95, wps=5790.3, ups=0.09, wpb=64809, bsz=128, num_updates=6798, lr=9.99536e-05, gnorm=2.475, loss_scale=4, train_wall=11, gb_free=2.8, wall=78143
2021-06-19 16:21:20 | INFO | train_inner | epoch 003: 839 / 3002 loss=2.968, ppl=7.82, wps=5868, ups=0.09, wpb=64826, bsz=128, num_updates=6799, lr=9.99536e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=78154
2021-06-19 16:21:31 | INFO | train_inner | epoch 003: 840 / 3002 loss=2.688, ppl=6.45, wps=5852.8, ups=0.09, wpb=64829, bsz=128, num_updates=6800, lr=9.99536e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=78165
2021-06-19 16:21:42 | INFO | train_inner | epoch 003: 841 / 3002 loss=2.537, ppl=5.8, wps=5898.1, ups=0.09, wpb=64841, bsz=128, num_updates=6801, lr=9.99536e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=78176
2021-06-19 16:21:53 | INFO | train_inner | epoch 003: 842 / 3002 loss=2.668, ppl=6.36, wps=5791.8, ups=0.09, wpb=64870, bsz=128, num_updates=6802, lr=9.99536e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=78188
2021-06-19 16:22:04 | INFO | train_inner | epoch 003: 843 / 3002 loss=2.813, ppl=7.03, wps=5960.2, ups=0.09, wpb=64858, bsz=128, num_updates=6803, lr=9.99536e-05, gnorm=2.394, loss_scale=4, train_wall=10, gb_free=2.8, wall=78199
2021-06-19 16:22:15 | INFO | train_inner | epoch 003: 844 / 3002 loss=2.656, ppl=6.3, wps=5965.2, ups=0.09, wpb=64912, bsz=128, num_updates=6804, lr=9.99536e-05, gnorm=2.269, loss_scale=4, train_wall=10, gb_free=2.8, wall=78209
2021-06-19 16:22:26 | INFO | train_inner | epoch 003: 845 / 3002 loss=2.686, ppl=6.44, wps=5725.9, ups=0.09, wpb=64881, bsz=128, num_updates=6805, lr=9.99536e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=78221
2021-06-19 16:22:37 | INFO | train_inner | epoch 003: 846 / 3002 loss=2.651, ppl=6.28, wps=5882.4, ups=0.09, wpb=64803, bsz=128, num_updates=6806, lr=9.99535e-05, gnorm=2.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=78232
2021-06-19 16:22:49 | INFO | train_inner | epoch 003: 847 / 3002 loss=2.554, ppl=5.87, wps=5818.4, ups=0.09, wpb=64803, bsz=128, num_updates=6807, lr=9.99535e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=78243
2021-06-19 16:23:00 | INFO | train_inner | epoch 003: 848 / 3002 loss=2.66, ppl=6.32, wps=5820.2, ups=0.09, wpb=64804, bsz=128, num_updates=6808, lr=9.99535e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=78254
2021-06-19 16:23:11 | INFO | train_inner | epoch 003: 849 / 3002 loss=2.821, ppl=7.06, wps=5882.6, ups=0.09, wpb=64817, bsz=128, num_updates=6809, lr=9.99535e-05, gnorm=2.6, loss_scale=4, train_wall=11, gb_free=2.8, wall=78265
2021-06-19 16:23:22 | INFO | train_inner | epoch 003: 850 / 3002 loss=2.736, ppl=6.66, wps=5919.6, ups=0.09, wpb=64789, bsz=128, num_updates=6810, lr=9.99535e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=78276
2021-06-19 16:23:33 | INFO | train_inner | epoch 003: 851 / 3002 loss=2.708, ppl=6.53, wps=5808, ups=0.09, wpb=64804, bsz=128, num_updates=6811, lr=9.99535e-05, gnorm=2.257, loss_scale=4, train_wall=11, gb_free=2.8, wall=78287
2021-06-19 16:23:44 | INFO | train_inner | epoch 003: 852 / 3002 loss=2.643, ppl=6.25, wps=5864.3, ups=0.09, wpb=64845, bsz=128, num_updates=6812, lr=9.99535e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=78298
2021-06-19 16:23:55 | INFO | train_inner | epoch 003: 853 / 3002 loss=2.634, ppl=6.21, wps=5766.6, ups=0.09, wpb=64849, bsz=128, num_updates=6813, lr=9.99535e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=78309
2021-06-19 16:24:06 | INFO | train_inner | epoch 003: 854 / 3002 loss=2.617, ppl=6.13, wps=5735.2, ups=0.09, wpb=64807, bsz=128, num_updates=6814, lr=9.99535e-05, gnorm=2.54, loss_scale=4, train_wall=11, gb_free=2.8, wall=78321
2021-06-19 16:24:18 | INFO | train_inner | epoch 003: 855 / 3002 loss=2.69, ppl=6.45, wps=5779.6, ups=0.09, wpb=64784, bsz=128, num_updates=6815, lr=9.99535e-05, gnorm=3.497, loss_scale=4, train_wall=11, gb_free=2.8, wall=78332
2021-06-19 16:24:29 | INFO | train_inner | epoch 003: 856 / 3002 loss=2.684, ppl=6.43, wps=5842.6, ups=0.09, wpb=64835, bsz=128, num_updates=6816, lr=9.99535e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=78343
2021-06-19 16:24:40 | INFO | train_inner | epoch 003: 857 / 3002 loss=2.579, ppl=5.97, wps=5833.1, ups=0.09, wpb=64812, bsz=128, num_updates=6817, lr=9.99535e-05, gnorm=2.506, loss_scale=4, train_wall=11, gb_free=2.8, wall=78354
2021-06-19 16:24:51 | INFO | train_inner | epoch 003: 858 / 3002 loss=2.865, ppl=7.29, wps=5784.3, ups=0.09, wpb=64848, bsz=128, num_updates=6818, lr=9.99535e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=78365
2021-06-19 16:25:02 | INFO | train_inner | epoch 003: 859 / 3002 loss=2.688, ppl=6.44, wps=5861.5, ups=0.09, wpb=64855, bsz=128, num_updates=6819, lr=9.99534e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=78376
2021-06-19 16:25:13 | INFO | train_inner | epoch 003: 860 / 3002 loss=2.583, ppl=5.99, wps=5844.5, ups=0.09, wpb=64850, bsz=128, num_updates=6820, lr=9.99534e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=78388
2021-06-19 16:25:24 | INFO | train_inner | epoch 003: 861 / 3002 loss=2.649, ppl=6.27, wps=5771.4, ups=0.09, wpb=64802, bsz=128, num_updates=6821, lr=9.99534e-05, gnorm=4.626, loss_scale=4, train_wall=11, gb_free=2.8, wall=78399
2021-06-19 16:25:36 | INFO | train_inner | epoch 003: 862 / 3002 loss=2.631, ppl=6.2, wps=5804.5, ups=0.09, wpb=64792, bsz=128, num_updates=6822, lr=9.99534e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=78410
2021-06-19 16:25:47 | INFO | train_inner | epoch 003: 863 / 3002 loss=2.635, ppl=6.21, wps=5853.2, ups=0.09, wpb=64807, bsz=128, num_updates=6823, lr=9.99534e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=78421
2021-06-19 16:25:58 | INFO | train_inner | epoch 003: 864 / 3002 loss=2.41, ppl=5.31, wps=5866.5, ups=0.09, wpb=64920, bsz=128, num_updates=6824, lr=9.99534e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=78432
2021-06-19 16:26:09 | INFO | train_inner | epoch 003: 865 / 3002 loss=2.667, ppl=6.35, wps=5917.4, ups=0.09, wpb=64867, bsz=128, num_updates=6825, lr=9.99534e-05, gnorm=10.985, loss_scale=4, train_wall=10, gb_free=2.8, wall=78443
2021-06-19 16:26:20 | INFO | train_inner | epoch 003: 866 / 3002 loss=2.48, ppl=5.58, wps=5714.2, ups=0.09, wpb=64804, bsz=128, num_updates=6826, lr=9.99534e-05, gnorm=2.081, loss_scale=4, train_wall=11, gb_free=2.8, wall=78454
2021-06-19 16:26:31 | INFO | train_inner | epoch 003: 867 / 3002 loss=2.685, ppl=6.43, wps=5927, ups=0.09, wpb=64805, bsz=128, num_updates=6827, lr=9.99534e-05, gnorm=2.256, loss_scale=4, train_wall=10, gb_free=2.8, wall=78465
2021-06-19 16:26:42 | INFO | train_inner | epoch 003: 868 / 3002 loss=2.476, ppl=5.57, wps=5864.2, ups=0.09, wpb=64756, bsz=128, num_updates=6828, lr=9.99534e-05, gnorm=2.433, loss_scale=4, train_wall=11, gb_free=2.8, wall=78476
2021-06-19 16:26:53 | INFO | train_inner | epoch 003: 869 / 3002 loss=2.653, ppl=6.29, wps=5782.2, ups=0.09, wpb=64836, bsz=128, num_updates=6829, lr=9.99534e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=78488
2021-06-19 16:27:04 | INFO | train_inner | epoch 003: 870 / 3002 loss=2.641, ppl=6.24, wps=5833.8, ups=0.09, wpb=64813, bsz=128, num_updates=6830, lr=9.99534e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=78499
2021-06-19 16:27:15 | INFO | train_inner | epoch 003: 871 / 3002 loss=2.665, ppl=6.34, wps=5807.4, ups=0.09, wpb=64833, bsz=128, num_updates=6831, lr=9.99533e-05, gnorm=2.074, loss_scale=4, train_wall=11, gb_free=2.8, wall=78510
2021-06-19 16:27:26 | INFO | train_inner | epoch 003: 872 / 3002 loss=2.684, ppl=6.43, wps=5912.3, ups=0.09, wpb=64815, bsz=128, num_updates=6832, lr=9.99533e-05, gnorm=2.194, loss_scale=4, train_wall=10, gb_free=2.8, wall=78521
2021-06-19 16:27:38 | INFO | train_inner | epoch 003: 873 / 3002 loss=2.602, ppl=6.07, wps=5736.2, ups=0.09, wpb=64857, bsz=128, num_updates=6833, lr=9.99533e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=78532
2021-06-19 16:27:49 | INFO | train_inner | epoch 003: 874 / 3002 loss=2.556, ppl=5.88, wps=5745.5, ups=0.09, wpb=64833, bsz=128, num_updates=6834, lr=9.99533e-05, gnorm=2.215, loss_scale=4, train_wall=11, gb_free=2.8, wall=78543
2021-06-19 16:28:00 | INFO | train_inner | epoch 003: 875 / 3002 loss=2.609, ppl=6.1, wps=5875.1, ups=0.09, wpb=64846, bsz=128, num_updates=6835, lr=9.99533e-05, gnorm=2.187, loss_scale=4, train_wall=11, gb_free=2.8, wall=78554
2021-06-19 16:28:11 | INFO | train_inner | epoch 003: 876 / 3002 loss=2.444, ppl=5.44, wps=5842.4, ups=0.09, wpb=64860, bsz=128, num_updates=6836, lr=9.99533e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=78566
2021-06-19 16:28:22 | INFO | train_inner | epoch 003: 877 / 3002 loss=2.672, ppl=6.37, wps=5885.4, ups=0.09, wpb=64883, bsz=128, num_updates=6837, lr=9.99533e-05, gnorm=2.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=78577
2021-06-19 16:28:33 | INFO | train_inner | epoch 003: 878 / 3002 loss=2.648, ppl=6.27, wps=5783.4, ups=0.09, wpb=64723, bsz=128, num_updates=6838, lr=9.99533e-05, gnorm=3.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=78588
2021-06-19 16:28:44 | INFO | train_inner | epoch 003: 879 / 3002 loss=2.701, ppl=6.5, wps=5913.2, ups=0.09, wpb=64858, bsz=128, num_updates=6839, lr=9.99533e-05, gnorm=2.15, loss_scale=4, train_wall=10, gb_free=2.8, wall=78599
2021-06-19 16:28:55 | INFO | train_inner | epoch 003: 880 / 3002 loss=2.504, ppl=5.67, wps=5829.1, ups=0.09, wpb=64731, bsz=128, num_updates=6840, lr=9.99533e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=78610
2021-06-19 16:29:07 | INFO | train_inner | epoch 003: 881 / 3002 loss=2.76, ppl=6.78, wps=5835.2, ups=0.09, wpb=64855, bsz=128, num_updates=6841, lr=9.99533e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=78621
2021-06-19 16:29:18 | INFO | train_inner | epoch 003: 882 / 3002 loss=2.665, ppl=6.34, wps=5853.3, ups=0.09, wpb=64837, bsz=128, num_updates=6842, lr=9.99533e-05, gnorm=3.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=78632
2021-06-19 16:29:29 | INFO | train_inner | epoch 003: 883 / 3002 loss=2.838, ppl=7.15, wps=5786, ups=0.09, wpb=64867, bsz=128, num_updates=6843, lr=9.99533e-05, gnorm=2.276, loss_scale=4, train_wall=11, gb_free=2.8, wall=78643
2021-06-19 16:29:40 | INFO | train_inner | epoch 003: 884 / 3002 loss=2.671, ppl=6.37, wps=5641.3, ups=0.09, wpb=64805, bsz=128, num_updates=6844, lr=9.99532e-05, gnorm=3.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=78655
2021-06-19 16:29:52 | INFO | train_inner | epoch 003: 885 / 3002 loss=2.62, ppl=6.15, wps=5797.2, ups=0.09, wpb=64856, bsz=128, num_updates=6845, lr=9.99532e-05, gnorm=2.689, loss_scale=4, train_wall=11, gb_free=2.8, wall=78666
2021-06-19 16:30:02 | INFO | train_inner | epoch 003: 886 / 3002 loss=2.651, ppl=6.28, wps=5995.9, ups=0.09, wpb=64830, bsz=128, num_updates=6846, lr=9.99532e-05, gnorm=2.131, loss_scale=4, train_wall=10, gb_free=2.8, wall=78677
2021-06-19 16:30:14 | INFO | train_inner | epoch 003: 887 / 3002 loss=2.573, ppl=5.95, wps=5772, ups=0.09, wpb=64875, bsz=128, num_updates=6847, lr=9.99532e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=78688
2021-06-19 16:30:25 | INFO | train_inner | epoch 003: 888 / 3002 loss=2.645, ppl=6.26, wps=5884.5, ups=0.09, wpb=64784, bsz=128, num_updates=6848, lr=9.99532e-05, gnorm=2.307, loss_scale=4, train_wall=11, gb_free=2.8, wall=78699
2021-06-19 16:30:36 | INFO | train_inner | epoch 003: 889 / 3002 loss=2.541, ppl=5.82, wps=5813.2, ups=0.09, wpb=64834, bsz=128, num_updates=6849, lr=9.99532e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=78710
2021-06-19 16:30:47 | INFO | train_inner | epoch 003: 890 / 3002 loss=2.666, ppl=6.35, wps=5829, ups=0.09, wpb=64768, bsz=128, num_updates=6850, lr=9.99532e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=78721
2021-06-19 16:30:58 | INFO | train_inner | epoch 003: 891 / 3002 loss=2.775, ppl=6.84, wps=5770, ups=0.09, wpb=64774, bsz=128, num_updates=6851, lr=9.99532e-05, gnorm=3.213, loss_scale=4, train_wall=11, gb_free=2.8, wall=78732
2021-06-19 16:31:09 | INFO | train_inner | epoch 003: 892 / 3002 loss=2.645, ppl=6.26, wps=5741, ups=0.09, wpb=64790, bsz=128, num_updates=6852, lr=9.99532e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=78744
2021-06-19 16:31:21 | INFO | train_inner | epoch 003: 893 / 3002 loss=2.642, ppl=6.24, wps=5786.3, ups=0.09, wpb=64791, bsz=128, num_updates=6853, lr=9.99532e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=78755
2021-06-19 16:31:32 | INFO | train_inner | epoch 003: 894 / 3002 loss=2.756, ppl=6.75, wps=5753.4, ups=0.09, wpb=64805, bsz=128, num_updates=6854, lr=9.99532e-05, gnorm=2.196, loss_scale=4, train_wall=11, gb_free=2.8, wall=78766
2021-06-19 16:31:43 | INFO | train_inner | epoch 003: 895 / 3002 loss=2.552, ppl=5.86, wps=5863.6, ups=0.09, wpb=64832, bsz=128, num_updates=6855, lr=9.99532e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=78777
2021-06-19 16:31:54 | INFO | train_inner | epoch 003: 896 / 3002 loss=2.524, ppl=5.75, wps=5842.4, ups=0.09, wpb=64829, bsz=128, num_updates=6856, lr=9.99531e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=78788
2021-06-19 16:32:05 | INFO | train_inner | epoch 003: 897 / 3002 loss=2.695, ppl=6.47, wps=5800, ups=0.09, wpb=64887, bsz=128, num_updates=6857, lr=9.99531e-05, gnorm=2.19, loss_scale=4, train_wall=11, gb_free=2.8, wall=78800
2021-06-19 16:32:16 | INFO | train_inner | epoch 003: 898 / 3002 loss=2.73, ppl=6.64, wps=5894.3, ups=0.09, wpb=64844, bsz=128, num_updates=6858, lr=9.99531e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=78811
2021-06-19 16:32:27 | INFO | train_inner | epoch 003: 899 / 3002 loss=2.658, ppl=6.31, wps=5761.9, ups=0.09, wpb=64808, bsz=128, num_updates=6859, lr=9.99531e-05, gnorm=3.727, loss_scale=4, train_wall=11, gb_free=2.8, wall=78822
2021-06-19 16:32:39 | INFO | train_inner | epoch 003: 900 / 3002 loss=2.804, ppl=6.99, wps=5825.7, ups=0.09, wpb=64833, bsz=128, num_updates=6860, lr=9.99531e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=78833
2021-06-19 16:32:50 | INFO | train_inner | epoch 003: 901 / 3002 loss=2.613, ppl=6.12, wps=5803.7, ups=0.09, wpb=64823, bsz=128, num_updates=6861, lr=9.99531e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=78844
2021-06-19 16:33:01 | INFO | train_inner | epoch 003: 902 / 3002 loss=2.61, ppl=6.11, wps=5872.1, ups=0.09, wpb=64775, bsz=128, num_updates=6862, lr=9.99531e-05, gnorm=4.644, loss_scale=4, train_wall=11, gb_free=2.8, wall=78855
2021-06-19 16:33:12 | INFO | train_inner | epoch 003: 903 / 3002 loss=2.713, ppl=6.56, wps=5882.1, ups=0.09, wpb=64818, bsz=128, num_updates=6863, lr=9.99531e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=78866
2021-06-19 16:33:23 | INFO | train_inner | epoch 003: 904 / 3002 loss=2.524, ppl=5.75, wps=5807.6, ups=0.09, wpb=64847, bsz=128, num_updates=6864, lr=9.99531e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=78877
2021-06-19 16:33:34 | INFO | train_inner | epoch 003: 905 / 3002 loss=2.737, ppl=6.67, wps=5828.9, ups=0.09, wpb=64794, bsz=128, num_updates=6865, lr=9.99531e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=78888
2021-06-19 16:33:45 | INFO | train_inner | epoch 003: 906 / 3002 loss=2.617, ppl=6.13, wps=5938.5, ups=0.09, wpb=64849, bsz=128, num_updates=6866, lr=9.99531e-05, gnorm=2.545, loss_scale=4, train_wall=10, gb_free=2.8, wall=78899
2021-06-19 16:33:56 | INFO | train_inner | epoch 003: 907 / 3002 loss=2.588, ppl=6.01, wps=5868.1, ups=0.09, wpb=64860, bsz=128, num_updates=6867, lr=9.99531e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=78910
2021-06-19 16:34:07 | INFO | train_inner | epoch 003: 908 / 3002 loss=2.661, ppl=6.32, wps=5835, ups=0.09, wpb=64821, bsz=128, num_updates=6868, lr=9.99531e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=78921
2021-06-19 16:34:18 | INFO | train_inner | epoch 003: 909 / 3002 loss=2.62, ppl=6.15, wps=5761.6, ups=0.09, wpb=64845, bsz=128, num_updates=6869, lr=9.9953e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=78933
2021-06-19 16:34:29 | INFO | train_inner | epoch 003: 910 / 3002 loss=2.521, ppl=5.74, wps=5884.3, ups=0.09, wpb=64922, bsz=128, num_updates=6870, lr=9.9953e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=78944
2021-06-19 16:34:41 | INFO | train_inner | epoch 003: 911 / 3002 loss=2.647, ppl=6.26, wps=5845.9, ups=0.09, wpb=64943, bsz=128, num_updates=6871, lr=9.9953e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=78955
2021-06-19 16:34:52 | INFO | train_inner | epoch 003: 912 / 3002 loss=2.506, ppl=5.68, wps=5863.6, ups=0.09, wpb=64836, bsz=128, num_updates=6872, lr=9.9953e-05, gnorm=2.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=78966
2021-06-19 16:35:03 | INFO | train_inner | epoch 003: 913 / 3002 loss=2.512, ppl=5.71, wps=5833.8, ups=0.09, wpb=64749, bsz=128, num_updates=6873, lr=9.9953e-05, gnorm=2.231, loss_scale=4, train_wall=11, gb_free=2.8, wall=78977
2021-06-19 16:35:14 | INFO | train_inner | epoch 003: 914 / 3002 loss=2.706, ppl=6.52, wps=5850.1, ups=0.09, wpb=64854, bsz=128, num_updates=6874, lr=9.9953e-05, gnorm=2.526, loss_scale=4, train_wall=11, gb_free=2.8, wall=78988
2021-06-19 16:35:25 | INFO | train_inner | epoch 003: 915 / 3002 loss=2.584, ppl=6, wps=5903.2, ups=0.09, wpb=64862, bsz=128, num_updates=6875, lr=9.9953e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=78999
2021-06-19 16:35:36 | INFO | train_inner | epoch 003: 916 / 3002 loss=2.589, ppl=6.02, wps=5918.1, ups=0.09, wpb=64795, bsz=128, num_updates=6876, lr=9.9953e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=79010
2021-06-19 16:35:47 | INFO | train_inner | epoch 003: 917 / 3002 loss=2.674, ppl=6.38, wps=5902.1, ups=0.09, wpb=64741, bsz=128, num_updates=6877, lr=9.9953e-05, gnorm=2.32, loss_scale=4, train_wall=11, gb_free=2.8, wall=79021
2021-06-19 16:35:58 | INFO | train_inner | epoch 003: 918 / 3002 loss=2.766, ppl=6.8, wps=5792.4, ups=0.09, wpb=64811, bsz=128, num_updates=6878, lr=9.9953e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=79032
2021-06-19 16:36:09 | INFO | train_inner | epoch 003: 919 / 3002 loss=2.728, ppl=6.63, wps=5763, ups=0.09, wpb=64902, bsz=128, num_updates=6879, lr=9.9953e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=79043
2021-06-19 16:36:20 | INFO | train_inner | epoch 003: 920 / 3002 loss=2.502, ppl=5.67, wps=5836.7, ups=0.09, wpb=64885, bsz=128, num_updates=6880, lr=9.9953e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=79055
2021-06-19 16:36:31 | INFO | train_inner | epoch 003: 921 / 3002 loss=2.613, ppl=6.12, wps=5781, ups=0.09, wpb=64771, bsz=128, num_updates=6881, lr=9.99529e-05, gnorm=2.378, loss_scale=4, train_wall=11, gb_free=2.8, wall=79066
2021-06-19 16:36:43 | INFO | train_inner | epoch 003: 922 / 3002 loss=2.648, ppl=6.27, wps=5724.1, ups=0.09, wpb=64822, bsz=128, num_updates=6882, lr=9.99529e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=79077
2021-06-19 16:36:54 | INFO | train_inner | epoch 003: 923 / 3002 loss=2.67, ppl=6.36, wps=5916.1, ups=0.09, wpb=64897, bsz=128, num_updates=6883, lr=9.99529e-05, gnorm=2.215, loss_scale=4, train_wall=10, gb_free=2.8, wall=79088
2021-06-19 16:37:05 | INFO | train_inner | epoch 003: 924 / 3002 loss=2.586, ppl=6.01, wps=5808.4, ups=0.09, wpb=64880, bsz=128, num_updates=6884, lr=9.99529e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=79099
2021-06-19 16:37:16 | INFO | train_inner | epoch 003: 925 / 3002 loss=2.649, ppl=6.27, wps=5853, ups=0.09, wpb=64859, bsz=128, num_updates=6885, lr=9.99529e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=79110
2021-06-19 16:37:27 | INFO | train_inner | epoch 003: 926 / 3002 loss=2.679, ppl=6.4, wps=5798.1, ups=0.09, wpb=64940, bsz=128, num_updates=6886, lr=9.99529e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=79122
2021-06-19 16:37:38 | INFO | train_inner | epoch 003: 927 / 3002 loss=2.707, ppl=6.53, wps=5825.6, ups=0.09, wpb=64867, bsz=128, num_updates=6887, lr=9.99529e-05, gnorm=2.189, loss_scale=4, train_wall=11, gb_free=2.8, wall=79133
2021-06-19 16:37:49 | INFO | train_inner | epoch 003: 928 / 3002 loss=2.626, ppl=6.17, wps=5913.9, ups=0.09, wpb=64828, bsz=128, num_updates=6888, lr=9.99529e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=79144
2021-06-19 16:38:00 | INFO | train_inner | epoch 003: 929 / 3002 loss=2.536, ppl=5.8, wps=5907.6, ups=0.09, wpb=64862, bsz=128, num_updates=6889, lr=9.99529e-05, gnorm=2.305, loss_scale=4, train_wall=11, gb_free=2.8, wall=79155
2021-06-19 16:38:11 | INFO | train_inner | epoch 003: 930 / 3002 loss=2.558, ppl=5.89, wps=5852, ups=0.09, wpb=64763, bsz=128, num_updates=6890, lr=9.99529e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=79166
2021-06-19 16:38:22 | INFO | train_inner | epoch 003: 931 / 3002 loss=2.714, ppl=6.56, wps=5853.3, ups=0.09, wpb=64884, bsz=128, num_updates=6891, lr=9.99529e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=79177
2021-06-19 16:38:34 | INFO | train_inner | epoch 003: 932 / 3002 loss=2.604, ppl=6.08, wps=5808.5, ups=0.09, wpb=64894, bsz=128, num_updates=6892, lr=9.99529e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=79188
2021-06-19 16:38:45 | INFO | train_inner | epoch 003: 933 / 3002 loss=2.633, ppl=6.2, wps=5758.1, ups=0.09, wpb=64802, bsz=128, num_updates=6893, lr=9.99529e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=79199
2021-06-19 16:38:56 | INFO | train_inner | epoch 003: 934 / 3002 loss=2.7, ppl=6.5, wps=5772.5, ups=0.09, wpb=64809, bsz=128, num_updates=6894, lr=9.99528e-05, gnorm=14.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=79210
2021-06-19 16:39:07 | INFO | train_inner | epoch 003: 935 / 3002 loss=2.729, ppl=6.63, wps=5880.5, ups=0.09, wpb=64806, bsz=128, num_updates=6895, lr=9.99528e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=79221
2021-06-19 16:39:18 | INFO | train_inner | epoch 003: 936 / 3002 loss=2.574, ppl=5.95, wps=5968.1, ups=0.09, wpb=64889, bsz=128, num_updates=6896, lr=9.99528e-05, gnorm=2.192, loss_scale=4, train_wall=10, gb_free=2.8, wall=79232
2021-06-19 16:39:29 | INFO | train_inner | epoch 003: 937 / 3002 loss=2.929, ppl=7.62, wps=5807.3, ups=0.09, wpb=64801, bsz=128, num_updates=6897, lr=9.99528e-05, gnorm=2.841, loss_scale=4, train_wall=11, gb_free=2.8, wall=79243
2021-06-19 16:39:40 | INFO | train_inner | epoch 003: 938 / 3002 loss=2.764, ppl=6.79, wps=5798.7, ups=0.09, wpb=64793, bsz=128, num_updates=6898, lr=9.99528e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=79255
2021-06-19 16:39:51 | INFO | train_inner | epoch 003: 939 / 3002 loss=2.449, ppl=5.46, wps=5875.4, ups=0.09, wpb=64909, bsz=128, num_updates=6899, lr=9.99528e-05, gnorm=2.659, loss_scale=8, train_wall=11, gb_free=2.8, wall=79266
2021-06-19 16:40:02 | INFO | train_inner | epoch 003: 940 / 3002 loss=2.566, ppl=5.92, wps=5899.8, ups=0.09, wpb=64770, bsz=128, num_updates=6900, lr=9.99528e-05, gnorm=2.236, loss_scale=8, train_wall=11, gb_free=2.8, wall=79277
2021-06-19 16:40:13 | INFO | train_inner | epoch 003: 941 / 3002 loss=2.67, ppl=6.36, wps=5839.3, ups=0.09, wpb=64877, bsz=128, num_updates=6901, lr=9.99528e-05, gnorm=3.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=79288
2021-06-19 16:40:25 | INFO | train_inner | epoch 003: 942 / 3002 loss=2.687, ppl=6.44, wps=5878.6, ups=0.09, wpb=64947, bsz=128, num_updates=6902, lr=9.99528e-05, gnorm=2.614, loss_scale=8, train_wall=11, gb_free=2.8, wall=79299
2021-06-19 16:40:36 | INFO | train_inner | epoch 003: 943 / 3002 loss=2.701, ppl=6.5, wps=5899.3, ups=0.09, wpb=64881, bsz=128, num_updates=6903, lr=9.99528e-05, gnorm=3.279, loss_scale=8, train_wall=11, gb_free=2.8, wall=79310
2021-06-19 16:40:47 | INFO | train_inner | epoch 003: 944 / 3002 loss=2.628, ppl=6.18, wps=5793.1, ups=0.09, wpb=64815, bsz=128, num_updates=6904, lr=9.99528e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=79321
2021-06-19 16:40:58 | INFO | train_inner | epoch 003: 945 / 3002 loss=2.784, ppl=6.89, wps=5924.6, ups=0.09, wpb=64849, bsz=128, num_updates=6905, lr=9.99528e-05, gnorm=2.254, loss_scale=8, train_wall=10, gb_free=2.8, wall=79332
2021-06-19 16:41:09 | INFO | train_inner | epoch 003: 946 / 3002 loss=2.75, ppl=6.73, wps=5767.8, ups=0.09, wpb=64869, bsz=128, num_updates=6906, lr=9.99527e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=79343
2021-06-19 16:41:20 | INFO | train_inner | epoch 003: 947 / 3002 loss=2.703, ppl=6.51, wps=5936.5, ups=0.09, wpb=64864, bsz=128, num_updates=6907, lr=9.99527e-05, gnorm=2.144, loss_scale=8, train_wall=10, gb_free=2.8, wall=79354
2021-06-19 16:41:31 | INFO | train_inner | epoch 003: 948 / 3002 loss=2.932, ppl=7.63, wps=5791.1, ups=0.09, wpb=64811, bsz=128, num_updates=6908, lr=9.99527e-05, gnorm=2.443, loss_scale=8, train_wall=11, gb_free=2.8, wall=79365
2021-06-19 16:41:42 | INFO | train_inner | epoch 003: 949 / 3002 loss=2.659, ppl=6.31, wps=5706.4, ups=0.09, wpb=64862, bsz=128, num_updates=6909, lr=9.99527e-05, gnorm=2.747, loss_scale=8, train_wall=11, gb_free=2.8, wall=79377
2021-06-19 16:41:54 | INFO | train_inner | epoch 003: 950 / 3002 loss=2.716, ppl=6.57, wps=5776.6, ups=0.09, wpb=64794, bsz=128, num_updates=6910, lr=9.99527e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=79388
2021-06-19 16:42:05 | INFO | train_inner | epoch 003: 951 / 3002 loss=2.686, ppl=6.44, wps=5828.5, ups=0.09, wpb=64741, bsz=128, num_updates=6911, lr=9.99527e-05, gnorm=6.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=79399
2021-06-19 16:42:16 | INFO | train_inner | epoch 003: 952 / 3002 loss=2.688, ppl=6.44, wps=5810.5, ups=0.09, wpb=64820, bsz=128, num_updates=6912, lr=9.99527e-05, gnorm=2.14, loss_scale=8, train_wall=11, gb_free=2.8, wall=79410
2021-06-19 16:42:27 | INFO | train_inner | epoch 003: 953 / 3002 loss=2.599, ppl=6.06, wps=5923.8, ups=0.09, wpb=64813, bsz=128, num_updates=6913, lr=9.99527e-05, gnorm=2.088, loss_scale=8, train_wall=10, gb_free=2.8, wall=79421
2021-06-19 16:42:38 | INFO | train_inner | epoch 003: 954 / 3002 loss=2.666, ppl=6.35, wps=5828.9, ups=0.09, wpb=64804, bsz=128, num_updates=6914, lr=9.99527e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=79432
2021-06-19 16:42:49 | INFO | train_inner | epoch 003: 955 / 3002 loss=2.684, ppl=6.43, wps=5758, ups=0.09, wpb=64774, bsz=128, num_updates=6915, lr=9.99527e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=79444
2021-06-19 16:43:00 | INFO | train_inner | epoch 003: 956 / 3002 loss=2.701, ppl=6.5, wps=5793.3, ups=0.09, wpb=64812, bsz=128, num_updates=6916, lr=9.99527e-05, gnorm=2.235, loss_scale=8, train_wall=11, gb_free=2.8, wall=79455
2021-06-19 16:43:11 | INFO | train_inner | epoch 003: 957 / 3002 loss=2.585, ppl=6, wps=5910.2, ups=0.09, wpb=64855, bsz=128, num_updates=6917, lr=9.99527e-05, gnorm=2.187, loss_scale=8, train_wall=10, gb_free=2.8, wall=79466
2021-06-19 16:43:22 | INFO | train_inner | epoch 003: 958 / 3002 loss=2.672, ppl=6.37, wps=5831.4, ups=0.09, wpb=64778, bsz=128, num_updates=6918, lr=9.99527e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=79477
2021-06-19 16:43:33 | INFO | train_inner | epoch 003: 959 / 3002 loss=2.586, ppl=6, wps=5872.9, ups=0.09, wpb=64815, bsz=128, num_updates=6919, lr=9.99526e-05, gnorm=2.092, loss_scale=8, train_wall=11, gb_free=2.8, wall=79488
2021-06-19 16:43:44 | INFO | train_inner | epoch 003: 960 / 3002 loss=2.881, ppl=7.36, wps=5892.1, ups=0.09, wpb=64859, bsz=128, num_updates=6920, lr=9.99526e-05, gnorm=2.372, loss_scale=8, train_wall=11, gb_free=2.8, wall=79499
2021-06-19 16:43:56 | INFO | train_inner | epoch 003: 961 / 3002 loss=2.604, ppl=6.08, wps=5817, ups=0.09, wpb=64787, bsz=128, num_updates=6921, lr=9.99526e-05, gnorm=2.095, loss_scale=8, train_wall=11, gb_free=2.8, wall=79510
2021-06-19 16:44:07 | INFO | train_inner | epoch 003: 962 / 3002 loss=2.579, ppl=5.97, wps=5808.3, ups=0.09, wpb=64778, bsz=128, num_updates=6922, lr=9.99526e-05, gnorm=2.327, loss_scale=8, train_wall=11, gb_free=2.8, wall=79521
2021-06-19 16:44:18 | INFO | train_inner | epoch 003: 963 / 3002 loss=2.606, ppl=6.09, wps=5838.6, ups=0.09, wpb=64823, bsz=128, num_updates=6923, lr=9.99526e-05, gnorm=2.454, loss_scale=8, train_wall=11, gb_free=2.8, wall=79532
2021-06-19 16:44:29 | INFO | train_inner | epoch 003: 964 / 3002 loss=2.593, ppl=6.03, wps=5835.1, ups=0.09, wpb=64741, bsz=128, num_updates=6924, lr=9.99526e-05, gnorm=2.178, loss_scale=8, train_wall=11, gb_free=2.8, wall=79543
2021-06-19 16:44:40 | INFO | train_inner | epoch 003: 965 / 3002 loss=2.644, ppl=6.25, wps=5735, ups=0.09, wpb=64901, bsz=128, num_updates=6925, lr=9.99526e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=79555
2021-06-19 16:44:51 | INFO | train_inner | epoch 003: 966 / 3002 loss=2.737, ppl=6.67, wps=5806.8, ups=0.09, wpb=64759, bsz=128, num_updates=6926, lr=9.99526e-05, gnorm=2.448, loss_scale=8, train_wall=11, gb_free=2.8, wall=79566
2021-06-19 16:45:02 | INFO | train_inner | epoch 003: 967 / 3002 loss=2.733, ppl=6.65, wps=5877.4, ups=0.09, wpb=64759, bsz=128, num_updates=6927, lr=9.99526e-05, gnorm=2.217, loss_scale=8, train_wall=11, gb_free=2.8, wall=79577
2021-06-19 16:45:14 | INFO | train_inner | epoch 003: 968 / 3002 loss=2.735, ppl=6.66, wps=5747, ups=0.09, wpb=64853, bsz=128, num_updates=6928, lr=9.99526e-05, gnorm=2.116, loss_scale=8, train_wall=11, gb_free=2.8, wall=79588
2021-06-19 16:45:25 | INFO | train_inner | epoch 003: 969 / 3002 loss=2.436, ppl=5.41, wps=5971.3, ups=0.09, wpb=64929, bsz=128, num_updates=6929, lr=9.99526e-05, gnorm=2.047, loss_scale=8, train_wall=10, gb_free=2.8, wall=79599
2021-06-19 16:45:36 | INFO | train_inner | epoch 003: 970 / 3002 loss=2.759, ppl=6.77, wps=5821.9, ups=0.09, wpb=64749, bsz=128, num_updates=6930, lr=9.99526e-05, gnorm=2.15, loss_scale=8, train_wall=11, gb_free=2.8, wall=79610
2021-06-19 16:45:47 | INFO | train_inner | epoch 003: 971 / 3002 loss=2.772, ppl=6.83, wps=5812.9, ups=0.09, wpb=64813, bsz=128, num_updates=6931, lr=9.99525e-05, gnorm=2.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=79621
2021-06-19 16:45:58 | INFO | train_inner | epoch 003: 972 / 3002 loss=2.6, ppl=6.06, wps=5844.2, ups=0.09, wpb=64905, bsz=128, num_updates=6932, lr=9.99525e-05, gnorm=2.119, loss_scale=8, train_wall=11, gb_free=2.8, wall=79632
2021-06-19 16:46:09 | INFO | train_inner | epoch 003: 973 / 3002 loss=2.835, ppl=7.13, wps=5827.2, ups=0.09, wpb=64781, bsz=128, num_updates=6933, lr=9.99525e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=79643
2021-06-19 16:46:20 | INFO | train_inner | epoch 003: 974 / 3002 loss=2.648, ppl=6.27, wps=5742.9, ups=0.09, wpb=64781, bsz=128, num_updates=6934, lr=9.99525e-05, gnorm=10.646, loss_scale=8, train_wall=11, gb_free=2.8, wall=79655
2021-06-19 16:46:32 | INFO | train_inner | epoch 003: 975 / 3002 loss=2.571, ppl=5.94, wps=5787.5, ups=0.09, wpb=64893, bsz=128, num_updates=6935, lr=9.99525e-05, gnorm=2.205, loss_scale=8, train_wall=11, gb_free=2.8, wall=79666
2021-06-19 16:46:43 | INFO | train_inner | epoch 003: 976 / 3002 loss=2.481, ppl=5.58, wps=5875.9, ups=0.09, wpb=64866, bsz=128, num_updates=6936, lr=9.99525e-05, gnorm=2.417, loss_scale=8, train_wall=11, gb_free=2.8, wall=79677
2021-06-19 16:46:54 | INFO | train_inner | epoch 003: 977 / 3002 loss=2.609, ppl=6.1, wps=5855.1, ups=0.09, wpb=64812, bsz=128, num_updates=6937, lr=9.99525e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=79688
2021-06-19 16:47:05 | INFO | train_inner | epoch 003: 978 / 3002 loss=2.71, ppl=6.54, wps=5846.1, ups=0.09, wpb=64802, bsz=128, num_updates=6938, lr=9.99525e-05, gnorm=2.296, loss_scale=8, train_wall=11, gb_free=2.8, wall=79699
2021-06-19 16:47:16 | INFO | train_inner | epoch 003: 979 / 3002 loss=2.722, ppl=6.6, wps=5831.9, ups=0.09, wpb=64823, bsz=128, num_updates=6939, lr=9.99525e-05, gnorm=2.119, loss_scale=8, train_wall=11, gb_free=2.8, wall=79710
2021-06-19 16:47:27 | INFO | train_inner | epoch 003: 980 / 3002 loss=2.614, ppl=6.12, wps=5851.1, ups=0.09, wpb=64817, bsz=128, num_updates=6940, lr=9.99525e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=79721
2021-06-19 16:47:38 | INFO | train_inner | epoch 003: 981 / 3002 loss=2.669, ppl=6.36, wps=5930.9, ups=0.09, wpb=64883, bsz=128, num_updates=6941, lr=9.99525e-05, gnorm=2.44, loss_scale=8, train_wall=10, gb_free=2.8, wall=79732
2021-06-19 16:47:49 | INFO | train_inner | epoch 003: 982 / 3002 loss=2.656, ppl=6.3, wps=5881.7, ups=0.09, wpb=64859, bsz=128, num_updates=6942, lr=9.99525e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=79743
2021-06-19 16:48:00 | INFO | train_inner | epoch 003: 983 / 3002 loss=2.735, ppl=6.66, wps=5794, ups=0.09, wpb=64773, bsz=128, num_updates=6943, lr=9.99525e-05, gnorm=2.219, loss_scale=8, train_wall=11, gb_free=2.8, wall=79754
2021-06-19 16:48:11 | INFO | train_inner | epoch 003: 984 / 3002 loss=2.637, ppl=6.22, wps=5827.8, ups=0.09, wpb=64877, bsz=128, num_updates=6944, lr=9.99524e-05, gnorm=2.172, loss_scale=8, train_wall=11, gb_free=2.8, wall=79766
2021-06-19 16:48:22 | INFO | train_inner | epoch 003: 985 / 3002 loss=2.454, ppl=5.48, wps=5853.9, ups=0.09, wpb=64763, bsz=128, num_updates=6945, lr=9.99524e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=79777
2021-06-19 16:48:34 | INFO | train_inner | epoch 003: 986 / 3002 loss=2.562, ppl=5.91, wps=5784.2, ups=0.09, wpb=64850, bsz=128, num_updates=6946, lr=9.99524e-05, gnorm=2.126, loss_scale=8, train_wall=11, gb_free=2.8, wall=79788
2021-06-19 16:48:45 | INFO | train_inner | epoch 003: 987 / 3002 loss=2.62, ppl=6.15, wps=5915, ups=0.09, wpb=64872, bsz=128, num_updates=6947, lr=9.99524e-05, gnorm=2.165, loss_scale=8, train_wall=11, gb_free=2.8, wall=79799
2021-06-19 16:48:56 | INFO | train_inner | epoch 003: 988 / 3002 loss=2.797, ppl=6.95, wps=5815.5, ups=0.09, wpb=64814, bsz=128, num_updates=6948, lr=9.99524e-05, gnorm=2.276, loss_scale=8, train_wall=11, gb_free=2.8, wall=79810
2021-06-19 16:49:07 | INFO | train_inner | epoch 003: 989 / 3002 loss=2.511, ppl=5.7, wps=5887, ups=0.09, wpb=64893, bsz=128, num_updates=6949, lr=9.99524e-05, gnorm=2.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=79821
2021-06-19 16:49:18 | INFO | train_inner | epoch 003: 990 / 3002 loss=2.648, ppl=6.27, wps=5791.1, ups=0.09, wpb=64789, bsz=128, num_updates=6950, lr=9.99524e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=79832
2021-06-19 16:49:29 | INFO | train_inner | epoch 003: 991 / 3002 loss=2.605, ppl=6.08, wps=5898.4, ups=0.09, wpb=64870, bsz=128, num_updates=6951, lr=9.99524e-05, gnorm=2.088, loss_scale=8, train_wall=11, gb_free=2.8, wall=79843
2021-06-19 16:49:40 | INFO | train_inner | epoch 003: 992 / 3002 loss=2.699, ppl=6.49, wps=5697.9, ups=0.09, wpb=64797, bsz=128, num_updates=6952, lr=9.99524e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=79855
2021-06-19 16:49:51 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 16:50:02 | INFO | train_inner | epoch 003: 994 / 3002 loss=2.725, ppl=6.61, wps=2920.3, ups=0.05, wpb=64847, bsz=128, num_updates=6953, lr=9.99524e-05, gnorm=2.16, loss_scale=4, train_wall=21, gb_free=2.8, wall=79877
2021-06-19 16:50:14 | INFO | train_inner | epoch 003: 995 / 3002 loss=2.513, ppl=5.71, wps=5857.1, ups=0.09, wpb=64900, bsz=128, num_updates=6954, lr=9.99524e-05, gnorm=2.137, loss_scale=4, train_wall=11, gb_free=2.8, wall=79888
2021-06-19 16:50:25 | INFO | train_inner | epoch 003: 996 / 3002 loss=2.669, ppl=6.36, wps=5832.9, ups=0.09, wpb=64831, bsz=128, num_updates=6955, lr=9.99524e-05, gnorm=2.092, loss_scale=4, train_wall=11, gb_free=2.8, wall=79899
2021-06-19 16:50:36 | INFO | train_inner | epoch 003: 997 / 3002 loss=2.652, ppl=6.29, wps=5865.7, ups=0.09, wpb=64824, bsz=128, num_updates=6956, lr=9.99523e-05, gnorm=2.413, loss_scale=4, train_wall=11, gb_free=2.8, wall=79910
2021-06-19 16:50:47 | INFO | train_inner | epoch 003: 998 / 3002 loss=2.661, ppl=6.33, wps=5846.7, ups=0.09, wpb=64906, bsz=128, num_updates=6957, lr=9.99523e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=79921
2021-06-19 16:50:58 | INFO | train_inner | epoch 003: 999 / 3002 loss=2.571, ppl=5.94, wps=6004.6, ups=0.09, wpb=64805, bsz=128, num_updates=6958, lr=9.99523e-05, gnorm=2.404, loss_scale=4, train_wall=10, gb_free=2.8, wall=79932
2021-06-19 16:51:09 | INFO | train_inner | epoch 003: 1000 / 3002 loss=2.646, ppl=6.26, wps=5722.7, ups=0.09, wpb=64859, bsz=128, num_updates=6959, lr=9.99523e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=79943
2021-06-19 16:51:20 | INFO | train_inner | epoch 003: 1001 / 3002 loss=2.653, ppl=6.29, wps=5715.7, ups=0.09, wpb=64794, bsz=128, num_updates=6960, lr=9.99523e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=79955
2021-06-19 16:51:31 | INFO | train_inner | epoch 003: 1002 / 3002 loss=2.54, ppl=5.81, wps=5837.3, ups=0.09, wpb=64888, bsz=128, num_updates=6961, lr=9.99523e-05, gnorm=4.778, loss_scale=4, train_wall=11, gb_free=2.8, wall=79966
2021-06-19 16:51:43 | INFO | train_inner | epoch 003: 1003 / 3002 loss=2.605, ppl=6.08, wps=5783, ups=0.09, wpb=64753, bsz=128, num_updates=6962, lr=9.99523e-05, gnorm=2.764, loss_scale=4, train_wall=11, gb_free=2.8, wall=79977
2021-06-19 16:51:54 | INFO | train_inner | epoch 003: 1004 / 3002 loss=2.631, ppl=6.19, wps=5822.5, ups=0.09, wpb=64756, bsz=128, num_updates=6963, lr=9.99523e-05, gnorm=2.252, loss_scale=4, train_wall=11, gb_free=2.8, wall=79988
2021-06-19 16:52:05 | INFO | train_inner | epoch 003: 1005 / 3002 loss=2.597, ppl=6.05, wps=5814.9, ups=0.09, wpb=64781, bsz=128, num_updates=6964, lr=9.99523e-05, gnorm=2.259, loss_scale=4, train_wall=11, gb_free=2.8, wall=79999
2021-06-19 16:52:16 | INFO | train_inner | epoch 003: 1006 / 3002 loss=2.569, ppl=5.94, wps=5804.1, ups=0.09, wpb=64796, bsz=128, num_updates=6965, lr=9.99523e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=80010
2021-06-19 16:52:27 | INFO | train_inner | epoch 003: 1007 / 3002 loss=2.654, ppl=6.3, wps=5814.9, ups=0.09, wpb=64804, bsz=128, num_updates=6966, lr=9.99523e-05, gnorm=2.639, loss_scale=4, train_wall=11, gb_free=2.8, wall=80021
2021-06-19 16:52:38 | INFO | train_inner | epoch 003: 1008 / 3002 loss=2.566, ppl=5.92, wps=5874.1, ups=0.09, wpb=64836, bsz=128, num_updates=6967, lr=9.99523e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=80033
2021-06-19 16:52:49 | INFO | train_inner | epoch 003: 1009 / 3002 loss=2.831, ppl=7.11, wps=5896.9, ups=0.09, wpb=64795, bsz=128, num_updates=6968, lr=9.99523e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=80044
2021-06-19 16:53:00 | INFO | train_inner | epoch 003: 1010 / 3002 loss=2.668, ppl=6.36, wps=5853.3, ups=0.09, wpb=64874, bsz=128, num_updates=6969, lr=9.99522e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=80055
2021-06-19 16:53:11 | INFO | train_inner | epoch 003: 1011 / 3002 loss=2.554, ppl=5.87, wps=6084.2, ups=0.09, wpb=64917, bsz=128, num_updates=6970, lr=9.99522e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=80065
2021-06-19 16:53:22 | INFO | train_inner | epoch 003: 1012 / 3002 loss=2.539, ppl=5.81, wps=5806.6, ups=0.09, wpb=64861, bsz=128, num_updates=6971, lr=9.99522e-05, gnorm=2.086, loss_scale=4, train_wall=11, gb_free=2.8, wall=80076
2021-06-19 16:53:33 | INFO | train_inner | epoch 003: 1013 / 3002 loss=2.686, ppl=6.43, wps=5854.5, ups=0.09, wpb=64786, bsz=128, num_updates=6972, lr=9.99522e-05, gnorm=2.241, loss_scale=4, train_wall=11, gb_free=2.8, wall=80088
2021-06-19 16:53:44 | INFO | train_inner | epoch 003: 1014 / 3002 loss=2.882, ppl=7.37, wps=5898.3, ups=0.09, wpb=64721, bsz=128, num_updates=6973, lr=9.99522e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=80098
2021-06-19 16:53:55 | INFO | train_inner | epoch 003: 1015 / 3002 loss=2.611, ppl=6.11, wps=5975.9, ups=0.09, wpb=64929, bsz=128, num_updates=6974, lr=9.99522e-05, gnorm=2.138, loss_scale=4, train_wall=10, gb_free=2.8, wall=80109
2021-06-19 16:54:06 | INFO | train_inner | epoch 003: 1016 / 3002 loss=2.752, ppl=6.74, wps=5830.4, ups=0.09, wpb=64788, bsz=128, num_updates=6975, lr=9.99522e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=80120
2021-06-19 16:54:17 | INFO | train_inner | epoch 003: 1017 / 3002 loss=2.599, ppl=6.06, wps=5806, ups=0.09, wpb=64849, bsz=128, num_updates=6976, lr=9.99522e-05, gnorm=2.214, loss_scale=4, train_wall=11, gb_free=2.8, wall=80132
2021-06-19 16:54:28 | INFO | train_inner | epoch 003: 1018 / 3002 loss=2.649, ppl=6.27, wps=5902.5, ups=0.09, wpb=64739, bsz=128, num_updates=6977, lr=9.99522e-05, gnorm=2.403, loss_scale=4, train_wall=10, gb_free=2.8, wall=80143
2021-06-19 16:54:39 | INFO | train_inner | epoch 003: 1019 / 3002 loss=2.861, ppl=7.26, wps=5829, ups=0.09, wpb=64746, bsz=128, num_updates=6978, lr=9.99522e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=80154
2021-06-19 16:54:51 | INFO | train_inner | epoch 003: 1020 / 3002 loss=2.696, ppl=6.48, wps=5810.4, ups=0.09, wpb=64732, bsz=128, num_updates=6979, lr=9.99522e-05, gnorm=2.223, loss_scale=4, train_wall=11, gb_free=2.8, wall=80165
2021-06-19 16:55:02 | INFO | train_inner | epoch 003: 1021 / 3002 loss=2.597, ppl=6.05, wps=5843.5, ups=0.09, wpb=64858, bsz=128, num_updates=6980, lr=9.99522e-05, gnorm=2.273, loss_scale=4, train_wall=11, gb_free=2.8, wall=80176
2021-06-19 16:55:13 | INFO | train_inner | epoch 003: 1022 / 3002 loss=2.638, ppl=6.22, wps=5852.8, ups=0.09, wpb=64827, bsz=128, num_updates=6981, lr=9.99521e-05, gnorm=2.094, loss_scale=4, train_wall=11, gb_free=2.8, wall=80187
2021-06-19 16:55:24 | INFO | train_inner | epoch 003: 1023 / 3002 loss=2.618, ppl=6.14, wps=5893.1, ups=0.09, wpb=64899, bsz=128, num_updates=6982, lr=9.99521e-05, gnorm=2.47, loss_scale=4, train_wall=11, gb_free=2.8, wall=80198
2021-06-19 16:55:35 | INFO | train_inner | epoch 003: 1024 / 3002 loss=2.666, ppl=6.35, wps=5871.1, ups=0.09, wpb=64856, bsz=128, num_updates=6983, lr=9.99521e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=80209
2021-06-19 16:55:46 | INFO | train_inner | epoch 003: 1025 / 3002 loss=2.598, ppl=6.05, wps=5727.1, ups=0.09, wpb=64828, bsz=128, num_updates=6984, lr=9.99521e-05, gnorm=3.5, loss_scale=4, train_wall=11, gb_free=2.8, wall=80220
2021-06-19 16:55:57 | INFO | train_inner | epoch 003: 1026 / 3002 loss=2.755, ppl=6.75, wps=5903.6, ups=0.09, wpb=64912, bsz=128, num_updates=6985, lr=9.99521e-05, gnorm=7.131, loss_scale=4, train_wall=11, gb_free=2.8, wall=80231
2021-06-19 16:56:08 | INFO | train_inner | epoch 003: 1027 / 3002 loss=2.489, ppl=5.61, wps=5775.9, ups=0.09, wpb=64755, bsz=128, num_updates=6986, lr=9.99521e-05, gnorm=2.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=80243
2021-06-19 16:56:19 | INFO | train_inner | epoch 003: 1028 / 3002 loss=2.778, ppl=6.86, wps=5934.9, ups=0.09, wpb=64869, bsz=128, num_updates=6987, lr=9.99521e-05, gnorm=2.221, loss_scale=4, train_wall=10, gb_free=2.8, wall=80254
2021-06-19 16:56:30 | INFO | train_inner | epoch 003: 1029 / 3002 loss=2.592, ppl=6.03, wps=5937.9, ups=0.09, wpb=64763, bsz=128, num_updates=6988, lr=9.99521e-05, gnorm=2.092, loss_scale=4, train_wall=10, gb_free=2.8, wall=80264
2021-06-19 16:56:41 | INFO | train_inner | epoch 003: 1030 / 3002 loss=2.59, ppl=6.02, wps=5758, ups=0.09, wpb=64853, bsz=128, num_updates=6989, lr=9.99521e-05, gnorm=4.44, loss_scale=4, train_wall=11, gb_free=2.8, wall=80276
2021-06-19 16:56:52 | INFO | train_inner | epoch 003: 1031 / 3002 loss=2.655, ppl=6.3, wps=5844.2, ups=0.09, wpb=64718, bsz=128, num_updates=6990, lr=9.99521e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=80287
2021-06-19 16:57:03 | INFO | train_inner | epoch 003: 1032 / 3002 loss=2.722, ppl=6.6, wps=5879.1, ups=0.09, wpb=64905, bsz=128, num_updates=6991, lr=9.99521e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=80298
2021-06-19 16:57:15 | INFO | train_inner | epoch 003: 1033 / 3002 loss=2.73, ppl=6.63, wps=5726.6, ups=0.09, wpb=64894, bsz=128, num_updates=6992, lr=9.99521e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=80309
2021-06-19 16:57:26 | INFO | train_inner | epoch 003: 1034 / 3002 loss=2.796, ppl=6.94, wps=5843.4, ups=0.09, wpb=64869, bsz=128, num_updates=6993, lr=9.99521e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=80320
2021-06-19 16:57:37 | INFO | train_inner | epoch 003: 1035 / 3002 loss=2.771, ppl=6.83, wps=5777.4, ups=0.09, wpb=64803, bsz=128, num_updates=6994, lr=9.9952e-05, gnorm=3.828, loss_scale=4, train_wall=11, gb_free=2.8, wall=80331
2021-06-19 16:57:48 | INFO | train_inner | epoch 003: 1036 / 3002 loss=2.78, ppl=6.87, wps=5871.6, ups=0.09, wpb=64907, bsz=128, num_updates=6995, lr=9.9952e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=80343
2021-06-19 16:57:59 | INFO | train_inner | epoch 003: 1037 / 3002 loss=2.653, ppl=6.29, wps=5847.4, ups=0.09, wpb=64838, bsz=128, num_updates=6996, lr=9.9952e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=80354
2021-06-19 16:58:10 | INFO | train_inner | epoch 003: 1038 / 3002 loss=2.618, ppl=6.14, wps=5850, ups=0.09, wpb=64852, bsz=128, num_updates=6997, lr=9.9952e-05, gnorm=2.129, loss_scale=4, train_wall=11, gb_free=2.8, wall=80365
2021-06-19 16:58:21 | INFO | train_inner | epoch 003: 1039 / 3002 loss=2.797, ppl=6.95, wps=5894, ups=0.09, wpb=64879, bsz=128, num_updates=6998, lr=9.9952e-05, gnorm=2.688, loss_scale=4, train_wall=11, gb_free=2.8, wall=80376
2021-06-19 16:58:33 | INFO | train_inner | epoch 003: 1040 / 3002 loss=2.578, ppl=5.97, wps=5770.9, ups=0.09, wpb=64801, bsz=128, num_updates=6999, lr=9.9952e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=80387
2021-06-19 16:58:44 | INFO | train_inner | epoch 003: 1041 / 3002 loss=2.66, ppl=6.32, wps=5752.2, ups=0.09, wpb=64824, bsz=128, num_updates=7000, lr=9.9952e-05, gnorm=2.902, loss_scale=4, train_wall=11, gb_free=2.8, wall=80398
2021-06-19 16:58:55 | INFO | train_inner | epoch 003: 1042 / 3002 loss=2.606, ppl=6.09, wps=5882.9, ups=0.09, wpb=64887, bsz=128, num_updates=7001, lr=9.9952e-05, gnorm=2.314, loss_scale=4, train_wall=11, gb_free=2.8, wall=80409
2021-06-19 16:59:06 | INFO | train_inner | epoch 003: 1043 / 3002 loss=2.613, ppl=6.12, wps=5883.9, ups=0.09, wpb=64802, bsz=128, num_updates=7002, lr=9.9952e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=80420
2021-06-19 16:59:17 | INFO | train_inner | epoch 003: 1044 / 3002 loss=2.496, ppl=5.64, wps=5858, ups=0.09, wpb=64814, bsz=128, num_updates=7003, lr=9.9952e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=80431
2021-06-19 16:59:28 | INFO | train_inner | epoch 003: 1045 / 3002 loss=2.717, ppl=6.57, wps=5891.1, ups=0.09, wpb=64863, bsz=128, num_updates=7004, lr=9.9952e-05, gnorm=2.146, loss_scale=4, train_wall=11, gb_free=2.8, wall=80442
2021-06-19 16:59:39 | INFO | train_inner | epoch 003: 1046 / 3002 loss=2.668, ppl=6.36, wps=5898.7, ups=0.09, wpb=64849, bsz=128, num_updates=7005, lr=9.9952e-05, gnorm=3.319, loss_scale=4, train_wall=11, gb_free=2.8, wall=80453
2021-06-19 16:59:50 | INFO | train_inner | epoch 003: 1047 / 3002 loss=2.737, ppl=6.67, wps=5767.8, ups=0.09, wpb=64766, bsz=128, num_updates=7006, lr=9.99519e-05, gnorm=2.368, loss_scale=4, train_wall=11, gb_free=2.8, wall=80465
2021-06-19 17:00:01 | INFO | train_inner | epoch 003: 1048 / 3002 loss=2.647, ppl=6.27, wps=5850.3, ups=0.09, wpb=64755, bsz=128, num_updates=7007, lr=9.99519e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=80476
2021-06-19 17:00:12 | INFO | train_inner | epoch 003: 1049 / 3002 loss=2.637, ppl=6.22, wps=5901.4, ups=0.09, wpb=64789, bsz=128, num_updates=7008, lr=9.99519e-05, gnorm=2.327, loss_scale=4, train_wall=11, gb_free=2.8, wall=80487
2021-06-19 17:00:23 | INFO | train_inner | epoch 003: 1050 / 3002 loss=2.711, ppl=6.55, wps=5845.8, ups=0.09, wpb=64835, bsz=128, num_updates=7009, lr=9.99519e-05, gnorm=2.602, loss_scale=4, train_wall=11, gb_free=2.8, wall=80498
2021-06-19 17:00:35 | INFO | train_inner | epoch 003: 1051 / 3002 loss=2.449, ppl=5.46, wps=5768.2, ups=0.09, wpb=64890, bsz=128, num_updates=7010, lr=9.99519e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=80509
2021-06-19 17:00:46 | INFO | train_inner | epoch 003: 1052 / 3002 loss=2.5, ppl=5.66, wps=5893, ups=0.09, wpb=64780, bsz=128, num_updates=7011, lr=9.99519e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=80520
2021-06-19 17:00:57 | INFO | train_inner | epoch 003: 1053 / 3002 loss=2.634, ppl=6.21, wps=5751.2, ups=0.09, wpb=64777, bsz=128, num_updates=7012, lr=9.99519e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=80531
2021-06-19 17:01:08 | INFO | train_inner | epoch 003: 1054 / 3002 loss=2.539, ppl=5.81, wps=5837.4, ups=0.09, wpb=64813, bsz=128, num_updates=7013, lr=9.99519e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=80542
2021-06-19 17:01:19 | INFO | train_inner | epoch 003: 1055 / 3002 loss=2.716, ppl=6.57, wps=5851.2, ups=0.09, wpb=64846, bsz=128, num_updates=7014, lr=9.99519e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=80553
2021-06-19 17:01:30 | INFO | train_inner | epoch 003: 1056 / 3002 loss=2.567, ppl=5.92, wps=5871.6, ups=0.09, wpb=64862, bsz=128, num_updates=7015, lr=9.99519e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=80564
2021-06-19 17:01:41 | INFO | train_inner | epoch 003: 1057 / 3002 loss=2.483, ppl=5.59, wps=5932.5, ups=0.09, wpb=64856, bsz=128, num_updates=7016, lr=9.99519e-05, gnorm=2.144, loss_scale=4, train_wall=10, gb_free=2.8, wall=80575
2021-06-19 17:01:52 | INFO | train_inner | epoch 003: 1058 / 3002 loss=2.595, ppl=6.04, wps=5814.8, ups=0.09, wpb=64733, bsz=128, num_updates=7017, lr=9.99519e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=80586
2021-06-19 17:02:03 | INFO | train_inner | epoch 003: 1059 / 3002 loss=2.658, ppl=6.31, wps=5997.1, ups=0.09, wpb=64880, bsz=128, num_updates=7018, lr=9.99519e-05, gnorm=2.304, loss_scale=4, train_wall=10, gb_free=2.8, wall=80597
2021-06-19 17:02:14 | INFO | train_inner | epoch 003: 1060 / 3002 loss=2.687, ppl=6.44, wps=5855.9, ups=0.09, wpb=64727, bsz=128, num_updates=7019, lr=9.99518e-05, gnorm=2.341, loss_scale=4, train_wall=11, gb_free=2.8, wall=80608
2021-06-19 17:02:25 | INFO | train_inner | epoch 003: 1061 / 3002 loss=2.639, ppl=6.23, wps=5787.9, ups=0.09, wpb=64855, bsz=128, num_updates=7020, lr=9.99518e-05, gnorm=2.295, loss_scale=4, train_wall=11, gb_free=2.8, wall=80620
2021-06-19 17:02:36 | INFO | train_inner | epoch 003: 1062 / 3002 loss=2.631, ppl=6.19, wps=5846.5, ups=0.09, wpb=64894, bsz=128, num_updates=7021, lr=9.99518e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=80631
2021-06-19 17:02:47 | INFO | train_inner | epoch 003: 1063 / 3002 loss=2.8, ppl=6.97, wps=5896.8, ups=0.09, wpb=64807, bsz=128, num_updates=7022, lr=9.99518e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=80642
2021-06-19 17:02:58 | INFO | train_inner | epoch 003: 1064 / 3002 loss=2.539, ppl=5.81, wps=5837.7, ups=0.09, wpb=64834, bsz=128, num_updates=7023, lr=9.99518e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=80653
2021-06-19 17:03:10 | INFO | train_inner | epoch 003: 1065 / 3002 loss=2.522, ppl=5.74, wps=5841.4, ups=0.09, wpb=64856, bsz=128, num_updates=7024, lr=9.99518e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=80664
2021-06-19 17:03:21 | INFO | train_inner | epoch 003: 1066 / 3002 loss=2.617, ppl=6.14, wps=5863.2, ups=0.09, wpb=64870, bsz=128, num_updates=7025, lr=9.99518e-05, gnorm=5.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=80675
2021-06-19 17:03:32 | INFO | train_inner | epoch 003: 1067 / 3002 loss=2.617, ppl=6.13, wps=5737.4, ups=0.09, wpb=64840, bsz=128, num_updates=7026, lr=9.99518e-05, gnorm=2.873, loss_scale=4, train_wall=11, gb_free=2.8, wall=80686
2021-06-19 17:03:43 | INFO | train_inner | epoch 003: 1068 / 3002 loss=2.673, ppl=6.38, wps=5846.8, ups=0.09, wpb=64802, bsz=128, num_updates=7027, lr=9.99518e-05, gnorm=2.167, loss_scale=4, train_wall=11, gb_free=2.8, wall=80697
2021-06-19 17:03:54 | INFO | train_inner | epoch 003: 1069 / 3002 loss=2.734, ppl=6.65, wps=5827.7, ups=0.09, wpb=64751, bsz=128, num_updates=7028, lr=9.99518e-05, gnorm=2.491, loss_scale=4, train_wall=11, gb_free=2.8, wall=80708
2021-06-19 17:04:05 | INFO | train_inner | epoch 003: 1070 / 3002 loss=2.709, ppl=6.54, wps=5828.8, ups=0.09, wpb=64817, bsz=128, num_updates=7029, lr=9.99518e-05, gnorm=2.524, loss_scale=4, train_wall=11, gb_free=2.8, wall=80720
2021-06-19 17:04:16 | INFO | train_inner | epoch 003: 1071 / 3002 loss=2.464, ppl=5.52, wps=5888.1, ups=0.09, wpb=64839, bsz=128, num_updates=7030, lr=9.99518e-05, gnorm=2.839, loss_scale=4, train_wall=11, gb_free=2.8, wall=80731
2021-06-19 17:04:27 | INFO | train_inner | epoch 003: 1072 / 3002 loss=2.772, ppl=6.83, wps=5859.2, ups=0.09, wpb=64766, bsz=128, num_updates=7031, lr=9.99517e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=80742
2021-06-19 17:04:38 | INFO | train_inner | epoch 003: 1073 / 3002 loss=2.484, ppl=5.59, wps=5867, ups=0.09, wpb=64880, bsz=128, num_updates=7032, lr=9.99517e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=80753
2021-06-19 17:04:50 | INFO | train_inner | epoch 003: 1074 / 3002 loss=2.644, ppl=6.25, wps=5780.7, ups=0.09, wpb=64863, bsz=128, num_updates=7033, lr=9.99517e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=80764
2021-06-19 17:05:01 | INFO | train_inner | epoch 003: 1075 / 3002 loss=2.654, ppl=6.29, wps=5856.5, ups=0.09, wpb=64801, bsz=128, num_updates=7034, lr=9.99517e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=80775
2021-06-19 17:05:12 | INFO | train_inner | epoch 003: 1076 / 3002 loss=2.815, ppl=7.04, wps=5814.1, ups=0.09, wpb=64693, bsz=128, num_updates=7035, lr=9.99517e-05, gnorm=2.807, loss_scale=4, train_wall=11, gb_free=2.8, wall=80786
2021-06-19 17:05:23 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 17:05:34 | INFO | train_inner | epoch 003: 1078 / 3002 loss=2.736, ppl=6.66, wps=2925.6, ups=0.05, wpb=64801, bsz=128, num_updates=7036, lr=9.99517e-05, gnorm=2.186, loss_scale=2, train_wall=21, gb_free=2.8, wall=80808
2021-06-19 17:05:45 | INFO | train_inner | epoch 003: 1079 / 3002 loss=2.805, ppl=6.99, wps=5890, ups=0.09, wpb=64794, bsz=128, num_updates=7037, lr=9.99517e-05, gnorm=2.138, loss_scale=2, train_wall=11, gb_free=2.8, wall=80819
2021-06-19 17:05:56 | INFO | train_inner | epoch 003: 1080 / 3002 loss=2.65, ppl=6.28, wps=5856.6, ups=0.09, wpb=64906, bsz=128, num_updates=7038, lr=9.99517e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=80830
2021-06-19 17:06:07 | INFO | train_inner | epoch 003: 1081 / 3002 loss=2.777, ppl=6.86, wps=5919.5, ups=0.09, wpb=64828, bsz=128, num_updates=7039, lr=9.99517e-05, gnorm=2.192, loss_scale=2, train_wall=10, gb_free=2.8, wall=80841
2021-06-19 17:06:18 | INFO | train_inner | epoch 003: 1082 / 3002 loss=2.783, ppl=6.88, wps=5802.5, ups=0.09, wpb=64798, bsz=128, num_updates=7040, lr=9.99517e-05, gnorm=2.279, loss_scale=2, train_wall=11, gb_free=2.8, wall=80852
2021-06-19 17:06:29 | INFO | train_inner | epoch 003: 1083 / 3002 loss=2.578, ppl=5.97, wps=5947.5, ups=0.09, wpb=64820, bsz=128, num_updates=7041, lr=9.99517e-05, gnorm=2.213, loss_scale=2, train_wall=10, gb_free=2.8, wall=80863
2021-06-19 17:06:40 | INFO | train_inner | epoch 003: 1084 / 3002 loss=2.709, ppl=6.54, wps=5808.2, ups=0.09, wpb=64847, bsz=128, num_updates=7042, lr=9.99517e-05, gnorm=2.219, loss_scale=2, train_wall=11, gb_free=2.8, wall=80875
2021-06-19 17:06:51 | INFO | train_inner | epoch 003: 1085 / 3002 loss=2.649, ppl=6.27, wps=5842.5, ups=0.09, wpb=64816, bsz=128, num_updates=7043, lr=9.99517e-05, gnorm=2.657, loss_scale=2, train_wall=11, gb_free=2.8, wall=80886
2021-06-19 17:07:03 | INFO | train_inner | epoch 003: 1086 / 3002 loss=2.564, ppl=5.91, wps=5768.5, ups=0.09, wpb=64861, bsz=128, num_updates=7044, lr=9.99516e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=80897
2021-06-19 17:07:14 | INFO | train_inner | epoch 003: 1087 / 3002 loss=2.659, ppl=6.31, wps=5741.4, ups=0.09, wpb=64824, bsz=128, num_updates=7045, lr=9.99516e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=80908
2021-06-19 17:07:25 | INFO | train_inner | epoch 003: 1088 / 3002 loss=2.632, ppl=6.2, wps=5708.5, ups=0.09, wpb=64790, bsz=128, num_updates=7046, lr=9.99516e-05, gnorm=2.312, loss_scale=2, train_wall=11, gb_free=2.8, wall=80919
2021-06-19 17:07:36 | INFO | train_inner | epoch 003: 1089 / 3002 loss=2.691, ppl=6.46, wps=5834.1, ups=0.09, wpb=64786, bsz=128, num_updates=7047, lr=9.99516e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=80931
2021-06-19 17:07:47 | INFO | train_inner | epoch 003: 1090 / 3002 loss=2.674, ppl=6.38, wps=5891.8, ups=0.09, wpb=64830, bsz=128, num_updates=7048, lr=9.99516e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=80942
2021-06-19 17:07:59 | INFO | train_inner | epoch 003: 1091 / 3002 loss=2.817, ppl=7.05, wps=5747.9, ups=0.09, wpb=64794, bsz=128, num_updates=7049, lr=9.99516e-05, gnorm=2.097, loss_scale=2, train_wall=11, gb_free=2.8, wall=80953
2021-06-19 17:08:10 | INFO | train_inner | epoch 003: 1092 / 3002 loss=2.414, ppl=5.33, wps=5785.8, ups=0.09, wpb=64773, bsz=128, num_updates=7050, lr=9.99516e-05, gnorm=2.338, loss_scale=2, train_wall=11, gb_free=2.8, wall=80964
2021-06-19 17:08:21 | INFO | train_inner | epoch 003: 1093 / 3002 loss=2.728, ppl=6.63, wps=5809.6, ups=0.09, wpb=64855, bsz=128, num_updates=7051, lr=9.99516e-05, gnorm=2.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=80975
2021-06-19 17:08:32 | INFO | train_inner | epoch 003: 1094 / 3002 loss=2.693, ppl=6.47, wps=5927.8, ups=0.09, wpb=64809, bsz=128, num_updates=7052, lr=9.99516e-05, gnorm=2.069, loss_scale=2, train_wall=10, gb_free=2.8, wall=80986
2021-06-19 17:08:43 | INFO | train_inner | epoch 003: 1095 / 3002 loss=2.627, ppl=6.18, wps=5863.7, ups=0.09, wpb=64839, bsz=128, num_updates=7053, lr=9.99516e-05, gnorm=2.178, loss_scale=2, train_wall=11, gb_free=2.8, wall=80997
2021-06-19 17:08:54 | INFO | train_inner | epoch 003: 1096 / 3002 loss=2.652, ppl=6.29, wps=5830.5, ups=0.09, wpb=64882, bsz=128, num_updates=7054, lr=9.99516e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=81008
2021-06-19 17:09:05 | INFO | train_inner | epoch 003: 1097 / 3002 loss=2.538, ppl=5.81, wps=5919.1, ups=0.09, wpb=64871, bsz=128, num_updates=7055, lr=9.99516e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=81019
2021-06-19 17:09:16 | INFO | train_inner | epoch 003: 1098 / 3002 loss=2.604, ppl=6.08, wps=5922, ups=0.09, wpb=64873, bsz=128, num_updates=7056, lr=9.99515e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=81030
2021-06-19 17:09:27 | INFO | train_inner | epoch 003: 1099 / 3002 loss=2.591, ppl=6.03, wps=5925.5, ups=0.09, wpb=64747, bsz=128, num_updates=7057, lr=9.99515e-05, gnorm=2.494, loss_scale=2, train_wall=10, gb_free=2.8, wall=81041
2021-06-19 17:09:38 | INFO | train_inner | epoch 003: 1100 / 3002 loss=2.668, ppl=6.36, wps=5724.7, ups=0.09, wpb=64783, bsz=128, num_updates=7058, lr=9.99515e-05, gnorm=2.107, loss_scale=2, train_wall=11, gb_free=2.8, wall=81053
2021-06-19 17:09:49 | INFO | train_inner | epoch 003: 1101 / 3002 loss=2.786, ppl=6.9, wps=5757.7, ups=0.09, wpb=64807, bsz=128, num_updates=7059, lr=9.99515e-05, gnorm=17.904, loss_scale=2, train_wall=11, gb_free=2.8, wall=81064
2021-06-19 17:10:00 | INFO | train_inner | epoch 003: 1102 / 3002 loss=2.589, ppl=6.02, wps=5907.9, ups=0.09, wpb=64894, bsz=128, num_updates=7060, lr=9.99515e-05, gnorm=2.575, loss_scale=2, train_wall=11, gb_free=2.8, wall=81075
2021-06-19 17:10:11 | INFO | train_inner | epoch 003: 1103 / 3002 loss=2.698, ppl=6.49, wps=5879.6, ups=0.09, wpb=64849, bsz=128, num_updates=7061, lr=9.99515e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=81086
2021-06-19 17:10:22 | INFO | train_inner | epoch 003: 1104 / 3002 loss=2.57, ppl=5.94, wps=5883.3, ups=0.09, wpb=64814, bsz=128, num_updates=7062, lr=9.99515e-05, gnorm=2.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=81097
2021-06-19 17:10:34 | INFO | train_inner | epoch 003: 1105 / 3002 loss=2.604, ppl=6.08, wps=5848.6, ups=0.09, wpb=64812, bsz=128, num_updates=7063, lr=9.99515e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=81108
2021-06-19 17:10:45 | INFO | train_inner | epoch 003: 1106 / 3002 loss=2.542, ppl=5.82, wps=5820.8, ups=0.09, wpb=64845, bsz=128, num_updates=7064, lr=9.99515e-05, gnorm=2.21, loss_scale=2, train_wall=11, gb_free=2.8, wall=81119
2021-06-19 17:10:56 | INFO | train_inner | epoch 003: 1107 / 3002 loss=2.818, ppl=7.05, wps=5956.1, ups=0.09, wpb=64883, bsz=128, num_updates=7065, lr=9.99515e-05, gnorm=3.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=81130
2021-06-19 17:11:07 | INFO | train_inner | epoch 003: 1108 / 3002 loss=2.643, ppl=6.25, wps=5903.1, ups=0.09, wpb=64803, bsz=128, num_updates=7066, lr=9.99515e-05, gnorm=2.197, loss_scale=2, train_wall=11, gb_free=2.8, wall=81141
2021-06-19 17:11:17 | INFO | train_inner | epoch 003: 1109 / 3002 loss=2.635, ppl=6.21, wps=5981.4, ups=0.09, wpb=64832, bsz=128, num_updates=7067, lr=9.99515e-05, gnorm=2.355, loss_scale=2, train_wall=10, gb_free=2.8, wall=81152
2021-06-19 17:11:28 | INFO | train_inner | epoch 003: 1110 / 3002 loss=2.738, ppl=6.67, wps=5862.9, ups=0.09, wpb=64853, bsz=128, num_updates=7068, lr=9.99515e-05, gnorm=2.435, loss_scale=2, train_wall=11, gb_free=2.8, wall=81163
2021-06-19 17:11:39 | INFO | train_inner | epoch 003: 1111 / 3002 loss=2.71, ppl=6.54, wps=5924.8, ups=0.09, wpb=64879, bsz=128, num_updates=7069, lr=9.99514e-05, gnorm=2.425, loss_scale=2, train_wall=10, gb_free=2.8, wall=81174
2021-06-19 17:11:50 | INFO | train_inner | epoch 003: 1112 / 3002 loss=2.637, ppl=6.22, wps=5868.8, ups=0.09, wpb=64873, bsz=128, num_updates=7070, lr=9.99514e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=81185
2021-06-19 17:12:02 | INFO | train_inner | epoch 003: 1113 / 3002 loss=2.635, ppl=6.21, wps=5770, ups=0.09, wpb=64821, bsz=128, num_updates=7071, lr=9.99514e-05, gnorm=3.114, loss_scale=2, train_wall=11, gb_free=2.8, wall=81196
2021-06-19 17:12:13 | INFO | train_inner | epoch 003: 1114 / 3002 loss=2.651, ppl=6.28, wps=5812, ups=0.09, wpb=64811, bsz=128, num_updates=7072, lr=9.99514e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=81207
2021-06-19 17:12:24 | INFO | train_inner | epoch 003: 1115 / 3002 loss=2.751, ppl=6.73, wps=5983.1, ups=0.09, wpb=64900, bsz=128, num_updates=7073, lr=9.99514e-05, gnorm=2.493, loss_scale=2, train_wall=10, gb_free=2.8, wall=81218
2021-06-19 17:12:35 | INFO | train_inner | epoch 003: 1116 / 3002 loss=2.624, ppl=6.16, wps=5813.6, ups=0.09, wpb=64764, bsz=128, num_updates=7074, lr=9.99514e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=81229
2021-06-19 17:12:46 | INFO | train_inner | epoch 003: 1117 / 3002 loss=2.744, ppl=6.7, wps=5786.6, ups=0.09, wpb=64812, bsz=128, num_updates=7075, lr=9.99514e-05, gnorm=6.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=81240
2021-06-19 17:12:57 | INFO | train_inner | epoch 003: 1118 / 3002 loss=2.748, ppl=6.72, wps=5893, ups=0.09, wpb=64835, bsz=128, num_updates=7076, lr=9.99514e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=81251
2021-06-19 17:13:08 | INFO | train_inner | epoch 003: 1119 / 3002 loss=2.735, ppl=6.66, wps=5939.3, ups=0.09, wpb=64897, bsz=128, num_updates=7077, lr=9.99514e-05, gnorm=2.204, loss_scale=2, train_wall=10, gb_free=2.8, wall=81262
2021-06-19 17:13:19 | INFO | train_inner | epoch 003: 1120 / 3002 loss=2.791, ppl=6.92, wps=5811.4, ups=0.09, wpb=64838, bsz=128, num_updates=7078, lr=9.99514e-05, gnorm=3.496, loss_scale=2, train_wall=11, gb_free=2.8, wall=81273
2021-06-19 17:13:30 | INFO | train_inner | epoch 003: 1121 / 3002 loss=2.607, ppl=6.09, wps=5728.1, ups=0.09, wpb=64845, bsz=128, num_updates=7079, lr=9.99514e-05, gnorm=2.351, loss_scale=2, train_wall=11, gb_free=2.8, wall=81285
2021-06-19 17:13:42 | INFO | train_inner | epoch 003: 1122 / 3002 loss=2.541, ppl=5.82, wps=5807.7, ups=0.09, wpb=64743, bsz=128, num_updates=7080, lr=9.99514e-05, gnorm=2.537, loss_scale=2, train_wall=11, gb_free=2.8, wall=81296
2021-06-19 17:13:53 | INFO | train_inner | epoch 003: 1123 / 3002 loss=2.595, ppl=6.04, wps=5823.2, ups=0.09, wpb=64756, bsz=128, num_updates=7081, lr=9.99513e-05, gnorm=2.172, loss_scale=2, train_wall=11, gb_free=2.8, wall=81307
2021-06-19 17:14:04 | INFO | train_inner | epoch 003: 1124 / 3002 loss=2.659, ppl=6.32, wps=5657.7, ups=0.09, wpb=64832, bsz=128, num_updates=7082, lr=9.99513e-05, gnorm=2.543, loss_scale=2, train_wall=11, gb_free=2.8, wall=81319
2021-06-19 17:14:15 | INFO | train_inner | epoch 003: 1125 / 3002 loss=2.783, ppl=6.88, wps=5956.6, ups=0.09, wpb=64818, bsz=128, num_updates=7083, lr=9.99513e-05, gnorm=3.05, loss_scale=2, train_wall=10, gb_free=2.8, wall=81329
2021-06-19 17:14:26 | INFO | train_inner | epoch 003: 1126 / 3002 loss=2.455, ppl=5.48, wps=5799.9, ups=0.09, wpb=64850, bsz=128, num_updates=7084, lr=9.99513e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=81341
2021-06-19 17:14:37 | INFO | train_inner | epoch 003: 1127 / 3002 loss=2.619, ppl=6.14, wps=5767.3, ups=0.09, wpb=64778, bsz=128, num_updates=7085, lr=9.99513e-05, gnorm=2.367, loss_scale=2, train_wall=11, gb_free=2.8, wall=81352
2021-06-19 17:14:48 | INFO | train_inner | epoch 003: 1128 / 3002 loss=2.643, ppl=6.25, wps=5900.4, ups=0.09, wpb=64817, bsz=128, num_updates=7086, lr=9.99513e-05, gnorm=2.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=81363
2021-06-19 17:15:00 | INFO | train_inner | epoch 003: 1129 / 3002 loss=2.68, ppl=6.41, wps=5786.3, ups=0.09, wpb=64929, bsz=128, num_updates=7087, lr=9.99513e-05, gnorm=2.251, loss_scale=2, train_wall=11, gb_free=2.8, wall=81374
2021-06-19 17:15:11 | INFO | train_inner | epoch 003: 1130 / 3002 loss=2.634, ppl=6.21, wps=5769.1, ups=0.09, wpb=64900, bsz=128, num_updates=7088, lr=9.99513e-05, gnorm=2.383, loss_scale=2, train_wall=11, gb_free=2.8, wall=81385
2021-06-19 17:15:22 | INFO | train_inner | epoch 003: 1131 / 3002 loss=2.599, ppl=6.06, wps=5794, ups=0.09, wpb=64825, bsz=128, num_updates=7089, lr=9.99513e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=81396
2021-06-19 17:15:33 | INFO | train_inner | epoch 003: 1132 / 3002 loss=2.627, ppl=6.18, wps=5917.7, ups=0.09, wpb=64862, bsz=128, num_updates=7090, lr=9.99513e-05, gnorm=2.642, loss_scale=2, train_wall=10, gb_free=2.8, wall=81407
2021-06-19 17:15:44 | INFO | train_inner | epoch 003: 1133 / 3002 loss=2.575, ppl=5.96, wps=5764.3, ups=0.09, wpb=64768, bsz=128, num_updates=7091, lr=9.99513e-05, gnorm=2.065, loss_scale=2, train_wall=11, gb_free=2.8, wall=81419
2021-06-19 17:15:55 | INFO | train_inner | epoch 003: 1134 / 3002 loss=2.814, ppl=7.03, wps=5837.1, ups=0.09, wpb=64798, bsz=128, num_updates=7092, lr=9.99513e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=81430
2021-06-19 17:16:06 | INFO | train_inner | epoch 003: 1135 / 3002 loss=2.636, ppl=6.21, wps=5887.2, ups=0.09, wpb=64848, bsz=128, num_updates=7093, lr=9.99513e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=81441
2021-06-19 17:16:18 | INFO | train_inner | epoch 003: 1136 / 3002 loss=2.644, ppl=6.25, wps=5812.8, ups=0.09, wpb=64839, bsz=128, num_updates=7094, lr=9.99512e-05, gnorm=2.288, loss_scale=2, train_wall=11, gb_free=2.8, wall=81452
2021-06-19 17:16:29 | INFO | train_inner | epoch 003: 1137 / 3002 loss=2.742, ppl=6.69, wps=5910, ups=0.09, wpb=64810, bsz=128, num_updates=7095, lr=9.99512e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=81463
2021-06-19 17:16:40 | INFO | train_inner | epoch 003: 1138 / 3002 loss=2.691, ppl=6.46, wps=5801.8, ups=0.09, wpb=64836, bsz=128, num_updates=7096, lr=9.99512e-05, gnorm=2.222, loss_scale=2, train_wall=11, gb_free=2.8, wall=81474
2021-06-19 17:16:51 | INFO | train_inner | epoch 003: 1139 / 3002 loss=2.672, ppl=6.38, wps=5945.6, ups=0.09, wpb=64898, bsz=128, num_updates=7097, lr=9.99512e-05, gnorm=2.398, loss_scale=2, train_wall=10, gb_free=2.8, wall=81485
2021-06-19 17:17:02 | INFO | train_inner | epoch 003: 1140 / 3002 loss=2.651, ppl=6.28, wps=5874.2, ups=0.09, wpb=64774, bsz=128, num_updates=7098, lr=9.99512e-05, gnorm=2.289, loss_scale=2, train_wall=11, gb_free=2.8, wall=81496
2021-06-19 17:17:13 | INFO | train_inner | epoch 003: 1141 / 3002 loss=2.719, ppl=6.58, wps=5898.4, ups=0.09, wpb=64890, bsz=128, num_updates=7099, lr=9.99512e-05, gnorm=2.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=81507
2021-06-19 17:17:24 | INFO | train_inner | epoch 003: 1142 / 3002 loss=2.741, ppl=6.68, wps=5781.4, ups=0.09, wpb=64748, bsz=128, num_updates=7100, lr=9.99512e-05, gnorm=2.168, loss_scale=2, train_wall=11, gb_free=2.8, wall=81518
2021-06-19 17:17:35 | INFO | train_inner | epoch 003: 1143 / 3002 loss=2.818, ppl=7.05, wps=5866.5, ups=0.09, wpb=64803, bsz=128, num_updates=7101, lr=9.99512e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=81529
2021-06-19 17:17:46 | INFO | train_inner | epoch 003: 1144 / 3002 loss=2.518, ppl=5.73, wps=5693.1, ups=0.09, wpb=64750, bsz=128, num_updates=7102, lr=9.99512e-05, gnorm=2.729, loss_scale=2, train_wall=11, gb_free=2.8, wall=81541
2021-06-19 17:17:57 | INFO | train_inner | epoch 003: 1145 / 3002 loss=2.678, ppl=6.4, wps=5824, ups=0.09, wpb=64796, bsz=128, num_updates=7103, lr=9.99512e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=81552
2021-06-19 17:18:09 | INFO | train_inner | epoch 003: 1146 / 3002 loss=2.644, ppl=6.25, wps=5805.7, ups=0.09, wpb=64725, bsz=128, num_updates=7104, lr=9.99512e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=81563
2021-06-19 17:18:20 | INFO | train_inner | epoch 003: 1147 / 3002 loss=2.562, ppl=5.9, wps=5754, ups=0.09, wpb=64855, bsz=128, num_updates=7105, lr=9.99512e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=81574
2021-06-19 17:18:31 | INFO | train_inner | epoch 003: 1148 / 3002 loss=2.628, ppl=6.18, wps=5949.6, ups=0.09, wpb=64808, bsz=128, num_updates=7106, lr=9.99511e-05, gnorm=2.295, loss_scale=2, train_wall=10, gb_free=2.8, wall=81585
2021-06-19 17:18:42 | INFO | train_inner | epoch 003: 1149 / 3002 loss=2.689, ppl=6.45, wps=5832.2, ups=0.09, wpb=64771, bsz=128, num_updates=7107, lr=9.99511e-05, gnorm=2.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=81596
2021-06-19 17:18:53 | INFO | train_inner | epoch 003: 1150 / 3002 loss=2.523, ppl=5.75, wps=5798.1, ups=0.09, wpb=64760, bsz=128, num_updates=7108, lr=9.99511e-05, gnorm=2.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=81607
2021-06-19 17:19:04 | INFO | train_inner | epoch 003: 1151 / 3002 loss=2.616, ppl=6.13, wps=5724.9, ups=0.09, wpb=64809, bsz=128, num_updates=7109, lr=9.99511e-05, gnorm=3.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=81619
2021-06-19 17:19:16 | INFO | train_inner | epoch 003: 1152 / 3002 loss=2.527, ppl=5.76, wps=5793.2, ups=0.09, wpb=64884, bsz=128, num_updates=7110, lr=9.99511e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=81630
2021-06-19 17:19:27 | INFO | train_inner | epoch 003: 1153 / 3002 loss=2.593, ppl=6.03, wps=5713.2, ups=0.09, wpb=64833, bsz=128, num_updates=7111, lr=9.99511e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=81641
2021-06-19 17:19:38 | INFO | train_inner | epoch 003: 1154 / 3002 loss=2.652, ppl=6.29, wps=5863.1, ups=0.09, wpb=64728, bsz=128, num_updates=7112, lr=9.99511e-05, gnorm=2.53, loss_scale=2, train_wall=11, gb_free=2.8, wall=81652
2021-06-19 17:19:49 | INFO | train_inner | epoch 003: 1155 / 3002 loss=2.705, ppl=6.52, wps=5811, ups=0.09, wpb=64793, bsz=128, num_updates=7113, lr=9.99511e-05, gnorm=2.588, loss_scale=2, train_wall=11, gb_free=2.8, wall=81663
2021-06-19 17:20:00 | INFO | train_inner | epoch 003: 1156 / 3002 loss=2.602, ppl=6.07, wps=5765.7, ups=0.09, wpb=64803, bsz=128, num_updates=7114, lr=9.99511e-05, gnorm=2.121, loss_scale=2, train_wall=11, gb_free=2.8, wall=81675
2021-06-19 17:20:11 | INFO | train_inner | epoch 003: 1157 / 3002 loss=2.647, ppl=6.26, wps=5959, ups=0.09, wpb=64804, bsz=128, num_updates=7115, lr=9.99511e-05, gnorm=2.275, loss_scale=2, train_wall=10, gb_free=2.8, wall=81686
2021-06-19 17:20:22 | INFO | train_inner | epoch 003: 1158 / 3002 loss=2.511, ppl=5.7, wps=5910.5, ups=0.09, wpb=64847, bsz=128, num_updates=7116, lr=9.99511e-05, gnorm=2.56, loss_scale=2, train_wall=10, gb_free=2.8, wall=81696
2021-06-19 17:20:33 | INFO | train_inner | epoch 003: 1159 / 3002 loss=2.695, ppl=6.48, wps=5821.5, ups=0.09, wpb=64896, bsz=128, num_updates=7117, lr=9.99511e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=81708
2021-06-19 17:20:44 | INFO | train_inner | epoch 003: 1160 / 3002 loss=2.699, ppl=6.49, wps=5976.5, ups=0.09, wpb=64894, bsz=128, num_updates=7118, lr=9.99511e-05, gnorm=2.111, loss_scale=2, train_wall=10, gb_free=2.8, wall=81718
2021-06-19 17:20:55 | INFO | train_inner | epoch 003: 1161 / 3002 loss=2.754, ppl=6.75, wps=5825.1, ups=0.09, wpb=64826, bsz=128, num_updates=7119, lr=9.9951e-05, gnorm=13.478, loss_scale=2, train_wall=11, gb_free=2.8, wall=81730
2021-06-19 17:21:06 | INFO | train_inner | epoch 003: 1162 / 3002 loss=2.793, ppl=6.93, wps=5804.8, ups=0.09, wpb=64822, bsz=128, num_updates=7120, lr=9.9951e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=81741
2021-06-19 17:21:17 | INFO | train_inner | epoch 003: 1163 / 3002 loss=2.71, ppl=6.54, wps=6007.8, ups=0.09, wpb=64863, bsz=128, num_updates=7121, lr=9.9951e-05, gnorm=2.081, loss_scale=2, train_wall=10, gb_free=2.8, wall=81752
2021-06-19 17:21:28 | INFO | train_inner | epoch 003: 1164 / 3002 loss=2.698, ppl=6.49, wps=5785.2, ups=0.09, wpb=64827, bsz=128, num_updates=7122, lr=9.9951e-05, gnorm=2.16, loss_scale=2, train_wall=11, gb_free=2.8, wall=81763
2021-06-19 17:21:40 | INFO | train_inner | epoch 003: 1165 / 3002 loss=2.7, ppl=6.5, wps=5846.8, ups=0.09, wpb=64859, bsz=128, num_updates=7123, lr=9.9951e-05, gnorm=4.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=81774
2021-06-19 17:21:50 | INFO | train_inner | epoch 003: 1166 / 3002 loss=2.613, ppl=6.12, wps=5935.2, ups=0.09, wpb=64805, bsz=128, num_updates=7124, lr=9.9951e-05, gnorm=2.143, loss_scale=2, train_wall=10, gb_free=2.8, wall=81785
2021-06-19 17:22:01 | INFO | train_inner | epoch 003: 1167 / 3002 loss=2.78, ppl=6.87, wps=5967.6, ups=0.09, wpb=64839, bsz=128, num_updates=7125, lr=9.9951e-05, gnorm=2.253, loss_scale=2, train_wall=10, gb_free=2.8, wall=81796
2021-06-19 17:22:13 | INFO | train_inner | epoch 003: 1168 / 3002 loss=2.659, ppl=6.32, wps=5767.5, ups=0.09, wpb=64841, bsz=128, num_updates=7126, lr=9.9951e-05, gnorm=2.353, loss_scale=2, train_wall=11, gb_free=2.8, wall=81807
2021-06-19 17:22:24 | INFO | train_inner | epoch 003: 1169 / 3002 loss=2.624, ppl=6.16, wps=5917.3, ups=0.09, wpb=64915, bsz=128, num_updates=7127, lr=9.9951e-05, gnorm=2.295, loss_scale=2, train_wall=11, gb_free=2.8, wall=81818
2021-06-19 17:22:35 | INFO | train_inner | epoch 003: 1170 / 3002 loss=2.762, ppl=6.78, wps=5823.4, ups=0.09, wpb=64844, bsz=128, num_updates=7128, lr=9.9951e-05, gnorm=2.35, loss_scale=2, train_wall=11, gb_free=2.8, wall=81829
2021-06-19 17:22:46 | INFO | train_inner | epoch 003: 1171 / 3002 loss=2.641, ppl=6.24, wps=5806.1, ups=0.09, wpb=64849, bsz=128, num_updates=7129, lr=9.9951e-05, gnorm=2.7, loss_scale=2, train_wall=11, gb_free=2.8, wall=81840
2021-06-19 17:22:57 | INFO | train_inner | epoch 003: 1172 / 3002 loss=2.664, ppl=6.34, wps=5825.7, ups=0.09, wpb=64796, bsz=128, num_updates=7130, lr=9.9951e-05, gnorm=2.738, loss_scale=2, train_wall=11, gb_free=2.8, wall=81851
2021-06-19 17:23:08 | INFO | train_inner | epoch 003: 1173 / 3002 loss=2.573, ppl=5.95, wps=5790.4, ups=0.09, wpb=64843, bsz=128, num_updates=7131, lr=9.99509e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=81863
2021-06-19 17:23:19 | INFO | train_inner | epoch 003: 1174 / 3002 loss=2.569, ppl=5.93, wps=5867.7, ups=0.09, wpb=64862, bsz=128, num_updates=7132, lr=9.99509e-05, gnorm=2.211, loss_scale=2, train_wall=11, gb_free=2.8, wall=81874
2021-06-19 17:23:30 | INFO | train_inner | epoch 003: 1175 / 3002 loss=2.728, ppl=6.63, wps=5912.1, ups=0.09, wpb=64826, bsz=128, num_updates=7133, lr=9.99509e-05, gnorm=2.496, loss_scale=2, train_wall=11, gb_free=2.8, wall=81885
2021-06-19 17:23:41 | INFO | train_inner | epoch 003: 1176 / 3002 loss=2.697, ppl=6.49, wps=5806.4, ups=0.09, wpb=64737, bsz=128, num_updates=7134, lr=9.99509e-05, gnorm=6.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=81896
2021-06-19 17:23:52 | INFO | train_inner | epoch 003: 1177 / 3002 loss=2.708, ppl=6.53, wps=5981.2, ups=0.09, wpb=64835, bsz=128, num_updates=7135, lr=9.99509e-05, gnorm=2.463, loss_scale=2, train_wall=10, gb_free=2.8, wall=81907
2021-06-19 17:24:03 | INFO | train_inner | epoch 003: 1178 / 3002 loss=2.66, ppl=6.32, wps=5795.4, ups=0.09, wpb=64818, bsz=128, num_updates=7136, lr=9.99509e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=81918
2021-06-19 17:24:14 | INFO | train_inner | epoch 003: 1179 / 3002 loss=2.531, ppl=5.78, wps=6035.6, ups=0.09, wpb=64932, bsz=128, num_updates=7137, lr=9.99509e-05, gnorm=2.621, loss_scale=2, train_wall=10, gb_free=2.8, wall=81928
2021-06-19 17:24:25 | INFO | train_inner | epoch 003: 1180 / 3002 loss=2.544, ppl=5.83, wps=5774.3, ups=0.09, wpb=64813, bsz=128, num_updates=7138, lr=9.99509e-05, gnorm=2.062, loss_scale=2, train_wall=11, gb_free=2.8, wall=81940
2021-06-19 17:24:37 | INFO | train_inner | epoch 003: 1181 / 3002 loss=2.58, ppl=5.98, wps=5729.6, ups=0.09, wpb=64855, bsz=128, num_updates=7139, lr=9.99509e-05, gnorm=2.127, loss_scale=2, train_wall=11, gb_free=2.8, wall=81951
2021-06-19 17:24:48 | INFO | train_inner | epoch 003: 1182 / 3002 loss=2.631, ppl=6.19, wps=5945.1, ups=0.09, wpb=64857, bsz=128, num_updates=7140, lr=9.99509e-05, gnorm=2.159, loss_scale=2, train_wall=10, gb_free=2.8, wall=81962
2021-06-19 17:24:59 | INFO | train_inner | epoch 003: 1183 / 3002 loss=2.626, ppl=6.17, wps=5868, ups=0.09, wpb=64794, bsz=128, num_updates=7141, lr=9.99509e-05, gnorm=2.211, loss_scale=2, train_wall=11, gb_free=2.8, wall=81973
2021-06-19 17:25:10 | INFO | train_inner | epoch 003: 1184 / 3002 loss=2.765, ppl=6.8, wps=5831.9, ups=0.09, wpb=64834, bsz=128, num_updates=7142, lr=9.99509e-05, gnorm=2.677, loss_scale=2, train_wall=11, gb_free=2.8, wall=81984
2021-06-19 17:25:21 | INFO | train_inner | epoch 003: 1185 / 3002 loss=2.656, ppl=6.3, wps=5790.8, ups=0.09, wpb=64866, bsz=128, num_updates=7143, lr=9.99509e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=81995
2021-06-19 17:25:32 | INFO | train_inner | epoch 003: 1186 / 3002 loss=2.812, ppl=7.02, wps=5842.9, ups=0.09, wpb=64875, bsz=128, num_updates=7144, lr=9.99508e-05, gnorm=2.26, loss_scale=2, train_wall=11, gb_free=2.8, wall=82006
2021-06-19 17:25:43 | INFO | train_inner | epoch 003: 1187 / 3002 loss=2.637, ppl=6.22, wps=5953.2, ups=0.09, wpb=64823, bsz=128, num_updates=7145, lr=9.99508e-05, gnorm=2.283, loss_scale=2, train_wall=10, gb_free=2.8, wall=82017
2021-06-19 17:25:54 | INFO | train_inner | epoch 003: 1188 / 3002 loss=2.639, ppl=6.23, wps=5924.5, ups=0.09, wpb=64865, bsz=128, num_updates=7146, lr=9.99508e-05, gnorm=3.032, loss_scale=2, train_wall=10, gb_free=2.8, wall=82028
2021-06-19 17:26:05 | INFO | train_inner | epoch 003: 1189 / 3002 loss=2.513, ppl=5.71, wps=5918.8, ups=0.09, wpb=64856, bsz=128, num_updates=7147, lr=9.99508e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=82039
2021-06-19 17:26:16 | INFO | train_inner | epoch 003: 1190 / 3002 loss=2.625, ppl=6.17, wps=5802.4, ups=0.09, wpb=64780, bsz=128, num_updates=7148, lr=9.99508e-05, gnorm=2.218, loss_scale=2, train_wall=11, gb_free=2.8, wall=82050
2021-06-19 17:26:27 | INFO | train_inner | epoch 003: 1191 / 3002 loss=2.66, ppl=6.32, wps=5789.9, ups=0.09, wpb=64802, bsz=128, num_updates=7149, lr=9.99508e-05, gnorm=2.262, loss_scale=2, train_wall=11, gb_free=2.8, wall=82062
2021-06-19 17:26:38 | INFO | train_inner | epoch 003: 1192 / 3002 loss=2.694, ppl=6.47, wps=5968.1, ups=0.09, wpb=64898, bsz=128, num_updates=7150, lr=9.99508e-05, gnorm=2.714, loss_scale=2, train_wall=10, gb_free=2.8, wall=82072
2021-06-19 17:26:49 | INFO | train_inner | epoch 003: 1193 / 3002 loss=2.64, ppl=6.23, wps=5873.3, ups=0.09, wpb=64823, bsz=128, num_updates=7151, lr=9.99508e-05, gnorm=2.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=82083
2021-06-19 17:27:00 | INFO | train_inner | epoch 003: 1194 / 3002 loss=2.667, ppl=6.35, wps=5785.1, ups=0.09, wpb=64840, bsz=128, num_updates=7152, lr=9.99508e-05, gnorm=2.912, loss_scale=2, train_wall=11, gb_free=2.8, wall=82095
2021-06-19 17:27:12 | INFO | train_inner | epoch 003: 1195 / 3002 loss=2.648, ppl=6.27, wps=5756.9, ups=0.09, wpb=64864, bsz=128, num_updates=7153, lr=9.99508e-05, gnorm=2.354, loss_scale=2, train_wall=11, gb_free=2.8, wall=82106
2021-06-19 17:27:23 | INFO | train_inner | epoch 003: 1196 / 3002 loss=2.463, ppl=5.51, wps=5887.4, ups=0.09, wpb=64896, bsz=128, num_updates=7154, lr=9.99508e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=82117
2021-06-19 17:27:34 | INFO | train_inner | epoch 003: 1197 / 3002 loss=2.808, ppl=7, wps=5919.8, ups=0.09, wpb=64843, bsz=128, num_updates=7155, lr=9.99508e-05, gnorm=2.115, loss_scale=2, train_wall=10, gb_free=2.8, wall=82128
2021-06-19 17:27:45 | INFO | train_inner | epoch 003: 1198 / 3002 loss=2.617, ppl=6.14, wps=5918.9, ups=0.09, wpb=64839, bsz=128, num_updates=7156, lr=9.99507e-05, gnorm=9.55, loss_scale=2, train_wall=11, gb_free=2.8, wall=82139
2021-06-19 17:27:56 | INFO | train_inner | epoch 003: 1199 / 3002 loss=2.614, ppl=6.12, wps=5823.9, ups=0.09, wpb=64773, bsz=128, num_updates=7157, lr=9.99507e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=82150
2021-06-19 17:28:07 | INFO | train_inner | epoch 003: 1200 / 3002 loss=2.606, ppl=6.09, wps=5897.6, ups=0.09, wpb=64807, bsz=128, num_updates=7158, lr=9.99507e-05, gnorm=2.118, loss_scale=2, train_wall=10, gb_free=2.8, wall=82161
2021-06-19 17:28:18 | INFO | train_inner | epoch 003: 1201 / 3002 loss=2.586, ppl=6, wps=5795.3, ups=0.09, wpb=64902, bsz=128, num_updates=7159, lr=9.99507e-05, gnorm=2.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=82172
2021-06-19 17:28:29 | INFO | train_inner | epoch 003: 1202 / 3002 loss=2.736, ppl=6.66, wps=5918, ups=0.09, wpb=64759, bsz=128, num_updates=7160, lr=9.99507e-05, gnorm=2.239, loss_scale=2, train_wall=10, gb_free=2.8, wall=82183
2021-06-19 17:28:40 | INFO | train_inner | epoch 003: 1203 / 3002 loss=2.662, ppl=6.33, wps=5858.8, ups=0.09, wpb=64911, bsz=128, num_updates=7161, lr=9.99507e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=82194
2021-06-19 17:28:51 | INFO | train_inner | epoch 003: 1204 / 3002 loss=2.527, ppl=5.76, wps=5830.2, ups=0.09, wpb=64886, bsz=128, num_updates=7162, lr=9.99507e-05, gnorm=2.3, loss_scale=2, train_wall=11, gb_free=2.8, wall=82205
2021-06-19 17:29:02 | INFO | train_inner | epoch 003: 1205 / 3002 loss=2.713, ppl=6.55, wps=5897.4, ups=0.09, wpb=64874, bsz=128, num_updates=7163, lr=9.99507e-05, gnorm=2.238, loss_scale=4, train_wall=11, gb_free=2.8, wall=82216
2021-06-19 17:29:13 | INFO | train_inner | epoch 003: 1206 / 3002 loss=2.546, ppl=5.84, wps=5784.5, ups=0.09, wpb=64782, bsz=128, num_updates=7164, lr=9.99507e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=82228
2021-06-19 17:29:24 | INFO | train_inner | epoch 003: 1207 / 3002 loss=2.594, ppl=6.04, wps=5819.7, ups=0.09, wpb=64904, bsz=128, num_updates=7165, lr=9.99507e-05, gnorm=2.104, loss_scale=4, train_wall=11, gb_free=2.8, wall=82239
2021-06-19 17:29:35 | INFO | train_inner | epoch 003: 1208 / 3002 loss=2.636, ppl=6.22, wps=5865.6, ups=0.09, wpb=64837, bsz=128, num_updates=7166, lr=9.99507e-05, gnorm=2.421, loss_scale=4, train_wall=11, gb_free=2.8, wall=82250
2021-06-19 17:29:46 | INFO | train_inner | epoch 003: 1209 / 3002 loss=2.593, ppl=6.03, wps=5916.1, ups=0.09, wpb=64877, bsz=128, num_updates=7167, lr=9.99507e-05, gnorm=2.216, loss_scale=4, train_wall=10, gb_free=2.8, wall=82261
2021-06-19 17:29:57 | INFO | train_inner | epoch 003: 1210 / 3002 loss=2.555, ppl=5.88, wps=5902.7, ups=0.09, wpb=64947, bsz=128, num_updates=7168, lr=9.99507e-05, gnorm=2.131, loss_scale=4, train_wall=11, gb_free=2.8, wall=82272
2021-06-19 17:30:08 | INFO | train_inner | epoch 003: 1211 / 3002 loss=2.603, ppl=6.07, wps=5888.7, ups=0.09, wpb=64872, bsz=128, num_updates=7169, lr=9.99506e-05, gnorm=2.427, loss_scale=4, train_wall=11, gb_free=2.8, wall=82283
2021-06-19 17:30:19 | INFO | train_inner | epoch 003: 1212 / 3002 loss=2.582, ppl=5.99, wps=6015.6, ups=0.09, wpb=64938, bsz=128, num_updates=7170, lr=9.99506e-05, gnorm=2.103, loss_scale=4, train_wall=10, gb_free=2.8, wall=82294
2021-06-19 17:30:30 | INFO | train_inner | epoch 003: 1213 / 3002 loss=2.736, ppl=6.66, wps=5919.4, ups=0.09, wpb=64758, bsz=128, num_updates=7171, lr=9.99506e-05, gnorm=2.327, loss_scale=4, train_wall=10, gb_free=2.8, wall=82304
2021-06-19 17:30:41 | INFO | train_inner | epoch 003: 1214 / 3002 loss=2.606, ppl=6.09, wps=5765.2, ups=0.09, wpb=64828, bsz=128, num_updates=7172, lr=9.99506e-05, gnorm=2.184, loss_scale=4, train_wall=11, gb_free=2.8, wall=82316
2021-06-19 17:30:52 | INFO | train_inner | epoch 003: 1215 / 3002 loss=2.684, ppl=6.43, wps=5898.8, ups=0.09, wpb=64818, bsz=128, num_updates=7173, lr=9.99506e-05, gnorm=3.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=82327
2021-06-19 17:31:03 | INFO | train_inner | epoch 003: 1216 / 3002 loss=2.643, ppl=6.24, wps=5810.7, ups=0.09, wpb=64787, bsz=128, num_updates=7174, lr=9.99506e-05, gnorm=2.352, loss_scale=4, train_wall=11, gb_free=2.8, wall=82338
2021-06-19 17:31:15 | INFO | train_inner | epoch 003: 1217 / 3002 loss=2.579, ppl=5.97, wps=5754.8, ups=0.09, wpb=64807, bsz=128, num_updates=7175, lr=9.99506e-05, gnorm=2.253, loss_scale=4, train_wall=11, gb_free=2.8, wall=82349
2021-06-19 17:31:26 | INFO | train_inner | epoch 003: 1218 / 3002 loss=2.607, ppl=6.09, wps=5886, ups=0.09, wpb=64864, bsz=128, num_updates=7176, lr=9.99506e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=82360
2021-06-19 17:31:37 | INFO | train_inner | epoch 003: 1219 / 3002 loss=2.717, ppl=6.58, wps=5780.3, ups=0.09, wpb=64758, bsz=128, num_updates=7177, lr=9.99506e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=82371
2021-06-19 17:31:48 | INFO | train_inner | epoch 003: 1220 / 3002 loss=2.775, ppl=6.85, wps=5765.9, ups=0.09, wpb=64915, bsz=128, num_updates=7178, lr=9.99506e-05, gnorm=2.454, loss_scale=4, train_wall=11, gb_free=2.8, wall=82383
2021-06-19 17:31:59 | INFO | train_inner | epoch 003: 1221 / 3002 loss=2.655, ppl=6.3, wps=5939.1, ups=0.09, wpb=64891, bsz=128, num_updates=7179, lr=9.99506e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=82394
2021-06-19 17:32:10 | INFO | train_inner | epoch 003: 1222 / 3002 loss=2.639, ppl=6.23, wps=5849.9, ups=0.09, wpb=64817, bsz=128, num_updates=7180, lr=9.99506e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=82405
2021-06-19 17:32:21 | INFO | train_inner | epoch 003: 1223 / 3002 loss=2.7, ppl=6.5, wps=5893.5, ups=0.09, wpb=64829, bsz=128, num_updates=7181, lr=9.99505e-05, gnorm=10.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=82416
2021-06-19 17:32:32 | INFO | train_inner | epoch 003: 1224 / 3002 loss=2.696, ppl=6.48, wps=5935.2, ups=0.09, wpb=64835, bsz=128, num_updates=7182, lr=9.99505e-05, gnorm=2.207, loss_scale=4, train_wall=10, gb_free=2.8, wall=82427
2021-06-19 17:32:43 | INFO | train_inner | epoch 003: 1225 / 3002 loss=2.712, ppl=6.55, wps=5853, ups=0.09, wpb=64793, bsz=128, num_updates=7183, lr=9.99505e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=82438
2021-06-19 17:32:54 | INFO | train_inner | epoch 003: 1226 / 3002 loss=2.616, ppl=6.13, wps=5837.8, ups=0.09, wpb=64857, bsz=128, num_updates=7184, lr=9.99505e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=82449
2021-06-19 17:33:06 | INFO | train_inner | epoch 003: 1227 / 3002 loss=2.576, ppl=5.96, wps=5775.4, ups=0.09, wpb=64897, bsz=128, num_updates=7185, lr=9.99505e-05, gnorm=2.139, loss_scale=4, train_wall=11, gb_free=2.8, wall=82460
2021-06-19 17:33:17 | INFO | train_inner | epoch 003: 1228 / 3002 loss=2.721, ppl=6.59, wps=5917.8, ups=0.09, wpb=64853, bsz=128, num_updates=7186, lr=9.99505e-05, gnorm=2.599, loss_scale=4, train_wall=10, gb_free=2.8, wall=82471
2021-06-19 17:33:28 | INFO | train_inner | epoch 003: 1229 / 3002 loss=2.492, ppl=5.63, wps=5860.1, ups=0.09, wpb=64760, bsz=128, num_updates=7187, lr=9.99505e-05, gnorm=2.299, loss_scale=4, train_wall=11, gb_free=2.8, wall=82482
2021-06-19 17:33:39 | INFO | train_inner | epoch 003: 1230 / 3002 loss=2.677, ppl=6.4, wps=5745.9, ups=0.09, wpb=64820, bsz=128, num_updates=7188, lr=9.99505e-05, gnorm=5.694, loss_scale=4, train_wall=11, gb_free=2.8, wall=82493
2021-06-19 17:33:50 | INFO | train_inner | epoch 003: 1231 / 3002 loss=2.657, ppl=6.31, wps=5727.8, ups=0.09, wpb=64740, bsz=128, num_updates=7189, lr=9.99505e-05, gnorm=2.637, loss_scale=4, train_wall=11, gb_free=2.8, wall=82505
2021-06-19 17:34:01 | INFO | train_inner | epoch 003: 1232 / 3002 loss=2.791, ppl=6.92, wps=5866.1, ups=0.09, wpb=64912, bsz=128, num_updates=7190, lr=9.99505e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=82516
2021-06-19 17:34:12 | INFO | train_inner | epoch 003: 1233 / 3002 loss=2.704, ppl=6.52, wps=5768.7, ups=0.09, wpb=64850, bsz=128, num_updates=7191, lr=9.99505e-05, gnorm=2.338, loss_scale=4, train_wall=11, gb_free=2.8, wall=82527
2021-06-19 17:34:24 | INFO | train_inner | epoch 003: 1234 / 3002 loss=2.69, ppl=6.45, wps=5858.6, ups=0.09, wpb=64856, bsz=128, num_updates=7192, lr=9.99505e-05, gnorm=2.124, loss_scale=4, train_wall=11, gb_free=2.8, wall=82538
2021-06-19 17:34:35 | INFO | train_inner | epoch 003: 1235 / 3002 loss=2.68, ppl=6.41, wps=5801, ups=0.09, wpb=64800, bsz=128, num_updates=7193, lr=9.99505e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=82549
2021-06-19 17:34:46 | INFO | train_inner | epoch 003: 1236 / 3002 loss=2.524, ppl=5.75, wps=5882.1, ups=0.09, wpb=64822, bsz=128, num_updates=7194, lr=9.99504e-05, gnorm=3.36, loss_scale=4, train_wall=11, gb_free=2.8, wall=82560
2021-06-19 17:34:57 | INFO | train_inner | epoch 003: 1237 / 3002 loss=2.473, ppl=5.55, wps=5904.9, ups=0.09, wpb=64824, bsz=128, num_updates=7195, lr=9.99504e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=82571
2021-06-19 17:35:08 | INFO | train_inner | epoch 003: 1238 / 3002 loss=2.696, ppl=6.48, wps=5793.2, ups=0.09, wpb=64832, bsz=128, num_updates=7196, lr=9.99504e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=82582
2021-06-19 17:35:19 | INFO | train_inner | epoch 003: 1239 / 3002 loss=2.547, ppl=5.84, wps=5833.1, ups=0.09, wpb=64821, bsz=128, num_updates=7197, lr=9.99504e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=82593
2021-06-19 17:35:30 | INFO | train_inner | epoch 003: 1240 / 3002 loss=2.587, ppl=6.01, wps=5780.1, ups=0.09, wpb=64740, bsz=128, num_updates=7198, lr=9.99504e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=82605
2021-06-19 17:35:41 | INFO | train_inner | epoch 003: 1241 / 3002 loss=2.762, ppl=6.78, wps=5890.5, ups=0.09, wpb=64863, bsz=128, num_updates=7199, lr=9.99504e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=82616
2021-06-19 17:35:52 | INFO | train_inner | epoch 003: 1242 / 3002 loss=2.761, ppl=6.78, wps=5777, ups=0.09, wpb=64734, bsz=128, num_updates=7200, lr=9.99504e-05, gnorm=2.229, loss_scale=4, train_wall=11, gb_free=2.8, wall=82627
2021-06-19 17:36:04 | INFO | train_inner | epoch 003: 1243 / 3002 loss=2.674, ppl=6.38, wps=5793.4, ups=0.09, wpb=64782, bsz=128, num_updates=7201, lr=9.99504e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=82638
2021-06-19 17:36:15 | INFO | train_inner | epoch 003: 1244 / 3002 loss=2.771, ppl=6.82, wps=5724.6, ups=0.09, wpb=64806, bsz=128, num_updates=7202, lr=9.99504e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=82649
2021-06-19 17:36:26 | INFO | train_inner | epoch 003: 1245 / 3002 loss=2.618, ppl=6.14, wps=5972.5, ups=0.09, wpb=64735, bsz=128, num_updates=7203, lr=9.99504e-05, gnorm=2.15, loss_scale=4, train_wall=10, gb_free=2.8, wall=82660
2021-06-19 17:36:37 | INFO | train_inner | epoch 003: 1246 / 3002 loss=2.78, ppl=6.87, wps=5832.6, ups=0.09, wpb=64845, bsz=128, num_updates=7204, lr=9.99504e-05, gnorm=2.854, loss_scale=4, train_wall=11, gb_free=2.8, wall=82671
2021-06-19 17:36:48 | INFO | train_inner | epoch 003: 1247 / 3002 loss=2.698, ppl=6.49, wps=5993.6, ups=0.09, wpb=64797, bsz=128, num_updates=7205, lr=9.99504e-05, gnorm=2.432, loss_scale=4, train_wall=10, gb_free=2.8, wall=82682
2021-06-19 17:36:59 | INFO | train_inner | epoch 003: 1248 / 3002 loss=2.603, ppl=6.08, wps=5917.6, ups=0.09, wpb=64886, bsz=128, num_updates=7206, lr=9.99503e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=82693
2021-06-19 17:37:10 | INFO | train_inner | epoch 003: 1249 / 3002 loss=2.775, ppl=6.85, wps=5783.6, ups=0.09, wpb=64865, bsz=128, num_updates=7207, lr=9.99503e-05, gnorm=2.163, loss_scale=4, train_wall=11, gb_free=2.8, wall=82704
2021-06-19 17:37:21 | INFO | train_inner | epoch 003: 1250 / 3002 loss=2.683, ppl=6.42, wps=5822.4, ups=0.09, wpb=64854, bsz=128, num_updates=7208, lr=9.99503e-05, gnorm=2.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=82715
2021-06-19 17:37:32 | INFO | train_inner | epoch 003: 1251 / 3002 loss=2.599, ppl=6.06, wps=5793.7, ups=0.09, wpb=64839, bsz=128, num_updates=7209, lr=9.99503e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=82727
2021-06-19 17:37:43 | INFO | train_inner | epoch 003: 1252 / 3002 loss=2.517, ppl=5.73, wps=5818.5, ups=0.09, wpb=64814, bsz=128, num_updates=7210, lr=9.99503e-05, gnorm=2.772, loss_scale=4, train_wall=11, gb_free=2.8, wall=82738
2021-06-19 17:37:55 | INFO | train_inner | epoch 003: 1253 / 3002 loss=2.705, ppl=6.52, wps=5812.3, ups=0.09, wpb=64832, bsz=128, num_updates=7211, lr=9.99503e-05, gnorm=2.669, loss_scale=4, train_wall=11, gb_free=2.8, wall=82749
2021-06-19 17:38:06 | INFO | train_inner | epoch 003: 1254 / 3002 loss=2.545, ppl=5.84, wps=5867.4, ups=0.09, wpb=64856, bsz=128, num_updates=7212, lr=9.99503e-05, gnorm=2.5, loss_scale=4, train_wall=11, gb_free=2.8, wall=82760
2021-06-19 17:38:17 | INFO | train_inner | epoch 003: 1255 / 3002 loss=2.621, ppl=6.15, wps=5811, ups=0.09, wpb=64816, bsz=128, num_updates=7213, lr=9.99503e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=82771
2021-06-19 17:38:28 | INFO | train_inner | epoch 003: 1256 / 3002 loss=2.609, ppl=6.1, wps=5859.8, ups=0.09, wpb=64756, bsz=128, num_updates=7214, lr=9.99503e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=82782
2021-06-19 17:38:39 | INFO | train_inner | epoch 003: 1257 / 3002 loss=2.638, ppl=6.22, wps=5797.4, ups=0.09, wpb=64783, bsz=128, num_updates=7215, lr=9.99503e-05, gnorm=2.115, loss_scale=4, train_wall=11, gb_free=2.8, wall=82793
2021-06-19 17:38:50 | INFO | train_inner | epoch 003: 1258 / 3002 loss=2.669, ppl=6.36, wps=5822.7, ups=0.09, wpb=64862, bsz=128, num_updates=7216, lr=9.99503e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=82804
2021-06-19 17:39:01 | INFO | train_inner | epoch 003: 1259 / 3002 loss=2.667, ppl=6.35, wps=5853.4, ups=0.09, wpb=64783, bsz=128, num_updates=7217, lr=9.99503e-05, gnorm=2.221, loss_scale=4, train_wall=11, gb_free=2.8, wall=82816
2021-06-19 17:39:12 | INFO | train_inner | epoch 003: 1260 / 3002 loss=2.745, ppl=6.7, wps=5937.6, ups=0.09, wpb=64869, bsz=128, num_updates=7218, lr=9.99503e-05, gnorm=2.232, loss_scale=4, train_wall=10, gb_free=2.8, wall=82826
2021-06-19 17:39:23 | INFO | train_inner | epoch 003: 1261 / 3002 loss=2.619, ppl=6.14, wps=5840.8, ups=0.09, wpb=64900, bsz=128, num_updates=7219, lr=9.99502e-05, gnorm=2.133, loss_scale=4, train_wall=11, gb_free=2.8, wall=82838
2021-06-19 17:39:34 | INFO | train_inner | epoch 003: 1262 / 3002 loss=2.627, ppl=6.18, wps=5999.1, ups=0.09, wpb=64832, bsz=128, num_updates=7220, lr=9.99502e-05, gnorm=2.258, loss_scale=4, train_wall=10, gb_free=2.8, wall=82848
2021-06-19 17:39:45 | INFO | train_inner | epoch 003: 1263 / 3002 loss=2.705, ppl=6.52, wps=5775.5, ups=0.09, wpb=64827, bsz=128, num_updates=7221, lr=9.99502e-05, gnorm=2.374, loss_scale=4, train_wall=11, gb_free=2.8, wall=82860
2021-06-19 17:39:56 | INFO | train_inner | epoch 003: 1264 / 3002 loss=2.624, ppl=6.17, wps=5835.3, ups=0.09, wpb=64866, bsz=128, num_updates=7222, lr=9.99502e-05, gnorm=2.289, loss_scale=4, train_wall=11, gb_free=2.8, wall=82871
2021-06-19 17:40:07 | INFO | train_inner | epoch 003: 1265 / 3002 loss=2.709, ppl=6.54, wps=5830.4, ups=0.09, wpb=64875, bsz=128, num_updates=7223, lr=9.99502e-05, gnorm=2.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=82882
2021-06-19 17:40:19 | INFO | train_inner | epoch 003: 1266 / 3002 loss=2.823, ppl=7.08, wps=5784.7, ups=0.09, wpb=64783, bsz=128, num_updates=7224, lr=9.99502e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=82893
2021-06-19 17:40:30 | INFO | train_inner | epoch 003: 1267 / 3002 loss=2.69, ppl=6.45, wps=5843.5, ups=0.09, wpb=64846, bsz=128, num_updates=7225, lr=9.99502e-05, gnorm=2.439, loss_scale=4, train_wall=11, gb_free=2.8, wall=82904
2021-06-19 17:40:41 | INFO | train_inner | epoch 003: 1268 / 3002 loss=2.541, ppl=5.82, wps=5817.3, ups=0.09, wpb=64769, bsz=128, num_updates=7226, lr=9.99502e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=82915
2021-06-19 17:40:52 | INFO | train_inner | epoch 003: 1269 / 3002 loss=2.804, ppl=6.99, wps=5851.3, ups=0.09, wpb=64838, bsz=128, num_updates=7227, lr=9.99502e-05, gnorm=2.177, loss_scale=4, train_wall=11, gb_free=2.8, wall=82926
2021-06-19 17:41:03 | INFO | train_inner | epoch 003: 1270 / 3002 loss=2.735, ppl=6.66, wps=5732.8, ups=0.09, wpb=64781, bsz=128, num_updates=7228, lr=9.99502e-05, gnorm=2.312, loss_scale=4, train_wall=11, gb_free=2.8, wall=82938
2021-06-19 17:41:14 | INFO | train_inner | epoch 003: 1271 / 3002 loss=2.721, ppl=6.59, wps=5814.8, ups=0.09, wpb=64803, bsz=128, num_updates=7229, lr=9.99502e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=82949
2021-06-19 17:41:26 | INFO | train_inner | epoch 003: 1272 / 3002 loss=2.769, ppl=6.82, wps=5849.6, ups=0.09, wpb=64786, bsz=128, num_updates=7230, lr=9.99502e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=82960
2021-06-19 17:41:36 | INFO | train_inner | epoch 003: 1273 / 3002 loss=2.601, ppl=6.07, wps=5964.9, ups=0.09, wpb=64782, bsz=128, num_updates=7231, lr=9.99501e-05, gnorm=2.446, loss_scale=4, train_wall=10, gb_free=2.8, wall=82971
2021-06-19 17:41:48 | INFO | train_inner | epoch 003: 1274 / 3002 loss=2.722, ppl=6.6, wps=5780.3, ups=0.09, wpb=64809, bsz=128, num_updates=7232, lr=9.99501e-05, gnorm=2.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=82982
2021-06-19 17:41:59 | INFO | train_inner | epoch 003: 1275 / 3002 loss=2.688, ppl=6.44, wps=5858, ups=0.09, wpb=64794, bsz=128, num_updates=7233, lr=9.99501e-05, gnorm=2.184, loss_scale=4, train_wall=11, gb_free=2.8, wall=82993
2021-06-19 17:42:10 | INFO | train_inner | epoch 003: 1276 / 3002 loss=2.552, ppl=5.86, wps=5838.3, ups=0.09, wpb=64821, bsz=128, num_updates=7234, lr=9.99501e-05, gnorm=2.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=83004
2021-06-19 17:42:21 | INFO | train_inner | epoch 003: 1277 / 3002 loss=2.635, ppl=6.21, wps=5929, ups=0.09, wpb=64860, bsz=128, num_updates=7235, lr=9.99501e-05, gnorm=2.235, loss_scale=4, train_wall=10, gb_free=2.8, wall=83015
2021-06-19 17:42:32 | INFO | train_inner | epoch 003: 1278 / 3002 loss=2.619, ppl=6.14, wps=5919.2, ups=0.09, wpb=64780, bsz=128, num_updates=7236, lr=9.99501e-05, gnorm=2.174, loss_scale=4, train_wall=10, gb_free=2.8, wall=83026
2021-06-19 17:42:43 | INFO | train_inner | epoch 003: 1279 / 3002 loss=2.725, ppl=6.61, wps=5889.7, ups=0.09, wpb=64865, bsz=128, num_updates=7237, lr=9.99501e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=83037
2021-06-19 17:42:54 | INFO | train_inner | epoch 003: 1280 / 3002 loss=2.74, ppl=6.68, wps=5732.2, ups=0.09, wpb=64820, bsz=128, num_updates=7238, lr=9.99501e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=83048
2021-06-19 17:43:05 | INFO | train_inner | epoch 003: 1281 / 3002 loss=2.589, ppl=6.02, wps=5710.4, ups=0.09, wpb=64827, bsz=128, num_updates=7239, lr=9.99501e-05, gnorm=2.16, loss_scale=4, train_wall=11, gb_free=2.8, wall=83060
2021-06-19 17:43:16 | INFO | train_inner | epoch 003: 1282 / 3002 loss=2.536, ppl=5.8, wps=5884.3, ups=0.09, wpb=64788, bsz=128, num_updates=7240, lr=9.99501e-05, gnorm=4.549, loss_scale=4, train_wall=11, gb_free=2.8, wall=83071
2021-06-19 17:43:28 | INFO | train_inner | epoch 003: 1283 / 3002 loss=2.669, ppl=6.36, wps=5745.3, ups=0.09, wpb=64801, bsz=128, num_updates=7241, lr=9.99501e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=83082
2021-06-19 17:43:39 | INFO | train_inner | epoch 003: 1284 / 3002 loss=2.818, ppl=7.05, wps=5926.6, ups=0.09, wpb=64792, bsz=128, num_updates=7242, lr=9.99501e-05, gnorm=2.288, loss_scale=4, train_wall=10, gb_free=2.8, wall=83093
2021-06-19 17:43:50 | INFO | train_inner | epoch 003: 1285 / 3002 loss=2.844, ppl=7.18, wps=5843.8, ups=0.09, wpb=64817, bsz=128, num_updates=7243, lr=9.99501e-05, gnorm=2.302, loss_scale=4, train_wall=11, gb_free=2.8, wall=83104
2021-06-19 17:44:01 | INFO | train_inner | epoch 003: 1286 / 3002 loss=2.582, ppl=5.99, wps=5875.3, ups=0.09, wpb=64825, bsz=128, num_updates=7244, lr=9.995e-05, gnorm=2.718, loss_scale=4, train_wall=11, gb_free=2.8, wall=83115
2021-06-19 17:44:12 | INFO | train_inner | epoch 003: 1287 / 3002 loss=2.833, ppl=7.13, wps=5853.8, ups=0.09, wpb=64805, bsz=128, num_updates=7245, lr=9.995e-05, gnorm=2.277, loss_scale=4, train_wall=11, gb_free=2.8, wall=83126
2021-06-19 17:44:23 | INFO | train_inner | epoch 003: 1288 / 3002 loss=2.706, ppl=6.52, wps=5770.4, ups=0.09, wpb=64758, bsz=128, num_updates=7246, lr=9.995e-05, gnorm=2.2, loss_scale=4, train_wall=11, gb_free=2.8, wall=83137
2021-06-19 17:44:34 | INFO | train_inner | epoch 003: 1289 / 3002 loss=2.59, ppl=6.02, wps=5766.9, ups=0.09, wpb=64835, bsz=128, num_updates=7247, lr=9.995e-05, gnorm=2.169, loss_scale=4, train_wall=11, gb_free=2.8, wall=83149
2021-06-19 17:44:45 | INFO | train_inner | epoch 003: 1290 / 3002 loss=2.655, ppl=6.3, wps=5850.9, ups=0.09, wpb=64794, bsz=128, num_updates=7248, lr=9.995e-05, gnorm=2.1, loss_scale=4, train_wall=11, gb_free=2.8, wall=83160
2021-06-19 17:44:56 | INFO | train_inner | epoch 003: 1291 / 3002 loss=2.645, ppl=6.25, wps=5922.1, ups=0.09, wpb=64836, bsz=128, num_updates=7249, lr=9.995e-05, gnorm=2.158, loss_scale=4, train_wall=10, gb_free=2.8, wall=83171
2021-06-19 17:45:07 | INFO | train_inner | epoch 003: 1292 / 3002 loss=2.598, ppl=6.06, wps=5831.1, ups=0.09, wpb=64820, bsz=128, num_updates=7250, lr=9.995e-05, gnorm=3.574, loss_scale=4, train_wall=11, gb_free=2.8, wall=83182
2021-06-19 17:45:18 | INFO | train_inner | epoch 003: 1293 / 3002 loss=2.578, ppl=5.97, wps=5961.3, ups=0.09, wpb=64828, bsz=128, num_updates=7251, lr=9.995e-05, gnorm=2.248, loss_scale=4, train_wall=10, gb_free=2.8, wall=83193
2021-06-19 17:45:29 | INFO | train_inner | epoch 003: 1294 / 3002 loss=2.655, ppl=6.3, wps=5877.8, ups=0.09, wpb=64820, bsz=128, num_updates=7252, lr=9.995e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=83204
2021-06-19 17:45:40 | INFO | train_inner | epoch 003: 1295 / 3002 loss=2.695, ppl=6.47, wps=5815.5, ups=0.09, wpb=64690, bsz=128, num_updates=7253, lr=9.995e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=83215
2021-06-19 17:45:52 | INFO | train_inner | epoch 003: 1296 / 3002 loss=2.738, ppl=6.67, wps=5777.5, ups=0.09, wpb=64702, bsz=128, num_updates=7254, lr=9.995e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=83226
2021-06-19 17:46:03 | INFO | train_inner | epoch 003: 1297 / 3002 loss=2.598, ppl=6.05, wps=5838.7, ups=0.09, wpb=64766, bsz=128, num_updates=7255, lr=9.995e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=83237
2021-06-19 17:46:14 | INFO | train_inner | epoch 003: 1298 / 3002 loss=2.524, ppl=5.75, wps=5870, ups=0.09, wpb=64934, bsz=128, num_updates=7256, lr=9.99499e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=83248
2021-06-19 17:46:25 | INFO | train_inner | epoch 003: 1299 / 3002 loss=2.66, ppl=6.32, wps=5725.3, ups=0.09, wpb=64842, bsz=128, num_updates=7257, lr=9.99499e-05, gnorm=3.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=83259
2021-06-19 17:46:36 | INFO | train_inner | epoch 003: 1300 / 3002 loss=2.648, ppl=6.27, wps=5807.4, ups=0.09, wpb=64811, bsz=128, num_updates=7258, lr=9.99499e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=83271
2021-06-19 17:46:47 | INFO | train_inner | epoch 003: 1301 / 3002 loss=2.658, ppl=6.31, wps=5742.4, ups=0.09, wpb=64771, bsz=128, num_updates=7259, lr=9.99499e-05, gnorm=7.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=83282
2021-06-19 17:46:59 | INFO | train_inner | epoch 003: 1302 / 3002 loss=2.611, ppl=6.11, wps=5865, ups=0.09, wpb=64831, bsz=128, num_updates=7260, lr=9.99499e-05, gnorm=2.22, loss_scale=4, train_wall=11, gb_free=2.8, wall=83293
2021-06-19 17:47:10 | INFO | train_inner | epoch 003: 1303 / 3002 loss=2.627, ppl=6.18, wps=5789.7, ups=0.09, wpb=64823, bsz=128, num_updates=7261, lr=9.99499e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=83304
2021-06-19 17:47:21 | INFO | train_inner | epoch 003: 1304 / 3002 loss=2.579, ppl=5.98, wps=5937.7, ups=0.09, wpb=64917, bsz=128, num_updates=7262, lr=9.99499e-05, gnorm=2.201, loss_scale=4, train_wall=10, gb_free=2.8, wall=83315
2021-06-19 17:47:32 | INFO | train_inner | epoch 003: 1305 / 3002 loss=2.638, ppl=6.22, wps=5704.5, ups=0.09, wpb=64835, bsz=128, num_updates=7263, lr=9.99499e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=83326
2021-06-19 17:47:43 | INFO | train_inner | epoch 003: 1306 / 3002 loss=2.566, ppl=5.92, wps=5684.2, ups=0.09, wpb=64784, bsz=128, num_updates=7264, lr=9.99499e-05, gnorm=5.61, loss_scale=4, train_wall=11, gb_free=2.8, wall=83338
2021-06-19 17:47:55 | INFO | train_inner | epoch 003: 1307 / 3002 loss=2.592, ppl=6.03, wps=5831.5, ups=0.09, wpb=64853, bsz=128, num_updates=7265, lr=9.99499e-05, gnorm=2.264, loss_scale=4, train_wall=11, gb_free=2.8, wall=83349
2021-06-19 17:48:06 | INFO | train_inner | epoch 003: 1308 / 3002 loss=2.789, ppl=6.91, wps=5813.1, ups=0.09, wpb=64778, bsz=128, num_updates=7266, lr=9.99499e-05, gnorm=2.364, loss_scale=4, train_wall=11, gb_free=2.8, wall=83360
2021-06-19 17:48:17 | INFO | train_inner | epoch 003: 1309 / 3002 loss=2.653, ppl=6.29, wps=5903.1, ups=0.09, wpb=64851, bsz=128, num_updates=7267, lr=9.99499e-05, gnorm=2.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=83371
2021-06-19 17:48:28 | INFO | train_inner | epoch 003: 1310 / 3002 loss=2.547, ppl=5.84, wps=5890.8, ups=0.09, wpb=64899, bsz=128, num_updates=7268, lr=9.99499e-05, gnorm=3.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=83382
2021-06-19 17:48:39 | INFO | train_inner | epoch 003: 1311 / 3002 loss=2.676, ppl=6.39, wps=5833, ups=0.09, wpb=64894, bsz=128, num_updates=7269, lr=9.99498e-05, gnorm=2.838, loss_scale=4, train_wall=11, gb_free=2.8, wall=83393
2021-06-19 17:48:50 | INFO | train_inner | epoch 003: 1312 / 3002 loss=2.653, ppl=6.29, wps=5848.9, ups=0.09, wpb=64823, bsz=128, num_updates=7270, lr=9.99498e-05, gnorm=2.557, loss_scale=4, train_wall=11, gb_free=2.8, wall=83404
2021-06-19 17:49:01 | INFO | train_inner | epoch 003: 1313 / 3002 loss=2.61, ppl=6.1, wps=5771.1, ups=0.09, wpb=64889, bsz=128, num_updates=7271, lr=9.99498e-05, gnorm=2.655, loss_scale=4, train_wall=11, gb_free=2.8, wall=83415
2021-06-19 17:49:12 | INFO | train_inner | epoch 003: 1314 / 3002 loss=2.563, ppl=5.91, wps=5777.4, ups=0.09, wpb=64812, bsz=128, num_updates=7272, lr=9.99498e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=83427
2021-06-19 17:49:23 | INFO | train_inner | epoch 003: 1315 / 3002 loss=2.499, ppl=5.65, wps=5853.8, ups=0.09, wpb=64845, bsz=128, num_updates=7273, lr=9.99498e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=83438
2021-06-19 17:49:35 | INFO | train_inner | epoch 003: 1316 / 3002 loss=2.664, ppl=6.34, wps=5798.4, ups=0.09, wpb=64782, bsz=128, num_updates=7274, lr=9.99498e-05, gnorm=2.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=83449
2021-06-19 17:49:46 | INFO | train_inner | epoch 003: 1317 / 3002 loss=2.666, ppl=6.35, wps=5793, ups=0.09, wpb=64792, bsz=128, num_updates=7275, lr=9.99498e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=83460
2021-06-19 17:49:57 | INFO | train_inner | epoch 003: 1318 / 3002 loss=2.617, ppl=6.13, wps=5807, ups=0.09, wpb=64835, bsz=128, num_updates=7276, lr=9.99498e-05, gnorm=2.235, loss_scale=4, train_wall=11, gb_free=2.8, wall=83471
2021-06-19 17:50:08 | INFO | train_inner | epoch 003: 1319 / 3002 loss=2.722, ppl=6.6, wps=5873.9, ups=0.09, wpb=64842, bsz=128, num_updates=7277, lr=9.99498e-05, gnorm=2.247, loss_scale=4, train_wall=11, gb_free=2.8, wall=83482
2021-06-19 17:50:19 | INFO | train_inner | epoch 003: 1320 / 3002 loss=2.528, ppl=5.77, wps=5778.4, ups=0.09, wpb=64879, bsz=128, num_updates=7278, lr=9.99498e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=83494
2021-06-19 17:50:30 | INFO | train_inner | epoch 003: 1321 / 3002 loss=2.755, ppl=6.75, wps=5850.3, ups=0.09, wpb=64782, bsz=128, num_updates=7279, lr=9.99498e-05, gnorm=5.651, loss_scale=4, train_wall=11, gb_free=2.8, wall=83505
2021-06-19 17:50:42 | INFO | train_inner | epoch 003: 1322 / 3002 loss=2.562, ppl=5.91, wps=5737.9, ups=0.09, wpb=64818, bsz=128, num_updates=7280, lr=9.99498e-05, gnorm=2.393, loss_scale=4, train_wall=11, gb_free=2.8, wall=83516
2021-06-19 17:50:53 | INFO | train_inner | epoch 003: 1323 / 3002 loss=2.539, ppl=5.81, wps=5899.2, ups=0.09, wpb=64797, bsz=128, num_updates=7281, lr=9.99497e-05, gnorm=2.496, loss_scale=4, train_wall=11, gb_free=2.8, wall=83527
2021-06-19 17:51:03 | INFO | train_inner | epoch 003: 1324 / 3002 loss=2.546, ppl=5.84, wps=6012.6, ups=0.09, wpb=64854, bsz=128, num_updates=7282, lr=9.99497e-05, gnorm=2.19, loss_scale=4, train_wall=10, gb_free=2.8, wall=83538
2021-06-19 17:51:15 | INFO | train_inner | epoch 003: 1325 / 3002 loss=2.688, ppl=6.44, wps=5767.5, ups=0.09, wpb=64698, bsz=128, num_updates=7283, lr=9.99497e-05, gnorm=6.349, loss_scale=4, train_wall=11, gb_free=2.8, wall=83549
2021-06-19 17:51:26 | INFO | train_inner | epoch 003: 1326 / 3002 loss=2.66, ppl=6.32, wps=5925.1, ups=0.09, wpb=64855, bsz=128, num_updates=7284, lr=9.99497e-05, gnorm=2.24, loss_scale=4, train_wall=10, gb_free=2.8, wall=83560
2021-06-19 17:51:37 | INFO | train_inner | epoch 003: 1327 / 3002 loss=2.785, ppl=6.89, wps=5822.1, ups=0.09, wpb=64780, bsz=128, num_updates=7285, lr=9.99497e-05, gnorm=7.818, loss_scale=4, train_wall=11, gb_free=2.8, wall=83571
2021-06-19 17:51:48 | INFO | train_inner | epoch 003: 1328 / 3002 loss=2.613, ppl=6.12, wps=5901.4, ups=0.09, wpb=64836, bsz=128, num_updates=7286, lr=9.99497e-05, gnorm=2.601, loss_scale=4, train_wall=11, gb_free=2.8, wall=83582
2021-06-19 17:51:59 | INFO | train_inner | epoch 003: 1329 / 3002 loss=2.731, ppl=6.64, wps=5832, ups=0.09, wpb=64771, bsz=128, num_updates=7287, lr=9.99497e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=83593
2021-06-19 17:52:10 | INFO | train_inner | epoch 003: 1330 / 3002 loss=2.602, ppl=6.07, wps=5923.6, ups=0.09, wpb=64877, bsz=128, num_updates=7288, lr=9.99497e-05, gnorm=2.339, loss_scale=4, train_wall=11, gb_free=2.8, wall=83604
2021-06-19 17:52:21 | INFO | train_inner | epoch 003: 1331 / 3002 loss=2.645, ppl=6.25, wps=5867.7, ups=0.09, wpb=64852, bsz=128, num_updates=7289, lr=9.99497e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=83615
2021-06-19 17:52:32 | INFO | train_inner | epoch 003: 1332 / 3002 loss=2.545, ppl=5.84, wps=5837.3, ups=0.09, wpb=64842, bsz=128, num_updates=7290, lr=9.99497e-05, gnorm=2.325, loss_scale=4, train_wall=11, gb_free=2.8, wall=83626
2021-06-19 17:52:43 | INFO | train_inner | epoch 003: 1333 / 3002 loss=2.64, ppl=6.23, wps=5967.4, ups=0.09, wpb=64872, bsz=128, num_updates=7291, lr=9.99497e-05, gnorm=2.306, loss_scale=8, train_wall=10, gb_free=2.8, wall=83637
2021-06-19 17:52:54 | INFO | train_inner | epoch 003: 1334 / 3002 loss=2.482, ppl=5.59, wps=5845.8, ups=0.09, wpb=64886, bsz=128, num_updates=7292, lr=9.99497e-05, gnorm=6.727, loss_scale=8, train_wall=11, gb_free=2.8, wall=83648
2021-06-19 17:53:05 | INFO | train_inner | epoch 003: 1335 / 3002 loss=2.716, ppl=6.57, wps=5776.5, ups=0.09, wpb=64829, bsz=128, num_updates=7293, lr=9.99497e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=83659
2021-06-19 17:53:16 | INFO | train_inner | epoch 003: 1336 / 3002 loss=2.621, ppl=6.15, wps=5886.1, ups=0.09, wpb=64791, bsz=128, num_updates=7294, lr=9.99496e-05, gnorm=2.344, loss_scale=8, train_wall=11, gb_free=2.8, wall=83670
2021-06-19 17:53:27 | INFO | train_inner | epoch 003: 1337 / 3002 loss=2.661, ppl=6.32, wps=5837.7, ups=0.09, wpb=64821, bsz=128, num_updates=7295, lr=9.99496e-05, gnorm=2.233, loss_scale=8, train_wall=11, gb_free=2.8, wall=83682
2021-06-19 17:53:38 | INFO | train_inner | epoch 003: 1338 / 3002 loss=2.673, ppl=6.38, wps=5746, ups=0.09, wpb=64868, bsz=128, num_updates=7296, lr=9.99496e-05, gnorm=2.427, loss_scale=8, train_wall=11, gb_free=2.8, wall=83693
2021-06-19 17:53:50 | INFO | train_inner | epoch 003: 1339 / 3002 loss=2.64, ppl=6.23, wps=5784.4, ups=0.09, wpb=64752, bsz=128, num_updates=7297, lr=9.99496e-05, gnorm=2.559, loss_scale=8, train_wall=11, gb_free=2.8, wall=83704
2021-06-19 17:54:01 | INFO | train_inner | epoch 003: 1340 / 3002 loss=2.781, ppl=6.87, wps=5798.6, ups=0.09, wpb=64870, bsz=128, num_updates=7298, lr=9.99496e-05, gnorm=2.385, loss_scale=8, train_wall=11, gb_free=2.8, wall=83715
2021-06-19 17:54:12 | INFO | train_inner | epoch 003: 1341 / 3002 loss=2.676, ppl=6.39, wps=5873.6, ups=0.09, wpb=64912, bsz=128, num_updates=7299, lr=9.99496e-05, gnorm=3.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=83726
2021-06-19 17:54:23 | INFO | train_inner | epoch 003: 1342 / 3002 loss=2.607, ppl=6.09, wps=5847.7, ups=0.09, wpb=64831, bsz=128, num_updates=7300, lr=9.99496e-05, gnorm=15.518, loss_scale=8, train_wall=11, gb_free=2.8, wall=83737
2021-06-19 17:54:34 | INFO | train_inner | epoch 003: 1343 / 3002 loss=2.508, ppl=5.69, wps=5912, ups=0.09, wpb=64897, bsz=128, num_updates=7301, lr=9.99496e-05, gnorm=4.662, loss_scale=8, train_wall=11, gb_free=2.8, wall=83748
2021-06-19 17:54:45 | INFO | train_inner | epoch 003: 1344 / 3002 loss=2.77, ppl=6.82, wps=5824.9, ups=0.09, wpb=64828, bsz=128, num_updates=7302, lr=9.99496e-05, gnorm=2.33, loss_scale=8, train_wall=11, gb_free=2.8, wall=83759
2021-06-19 17:54:56 | INFO | train_inner | epoch 003: 1345 / 3002 loss=2.657, ppl=6.31, wps=5830.9, ups=0.09, wpb=64806, bsz=128, num_updates=7303, lr=9.99496e-05, gnorm=2.266, loss_scale=8, train_wall=11, gb_free=2.8, wall=83771
2021-06-19 17:55:07 | INFO | train_inner | epoch 003: 1346 / 3002 loss=2.721, ppl=6.59, wps=5874, ups=0.09, wpb=64805, bsz=128, num_updates=7304, lr=9.99496e-05, gnorm=2.359, loss_scale=8, train_wall=11, gb_free=2.8, wall=83782
2021-06-19 17:55:18 | INFO | train_inner | epoch 003: 1347 / 3002 loss=2.727, ppl=6.62, wps=5834.7, ups=0.09, wpb=64811, bsz=128, num_updates=7305, lr=9.99496e-05, gnorm=2.511, loss_scale=8, train_wall=11, gb_free=2.8, wall=83793
2021-06-19 17:55:29 | INFO | train_inner | epoch 003: 1348 / 3002 loss=2.843, ppl=7.18, wps=5889.7, ups=0.09, wpb=64859, bsz=128, num_updates=7306, lr=9.99495e-05, gnorm=2.21, loss_scale=8, train_wall=11, gb_free=2.8, wall=83804
2021-06-19 17:55:40 | INFO | train_inner | epoch 003: 1349 / 3002 loss=2.494, ppl=5.63, wps=5981.5, ups=0.09, wpb=64831, bsz=128, num_updates=7307, lr=9.99495e-05, gnorm=2.449, loss_scale=8, train_wall=10, gb_free=2.8, wall=83815
2021-06-19 17:55:51 | INFO | train_inner | epoch 003: 1350 / 3002 loss=2.86, ppl=7.26, wps=5761.8, ups=0.09, wpb=64659, bsz=128, num_updates=7308, lr=9.99495e-05, gnorm=2.392, loss_scale=8, train_wall=11, gb_free=2.8, wall=83826
2021-06-19 17:56:02 | INFO | train_inner | epoch 003: 1351 / 3002 loss=2.721, ppl=6.59, wps=5896.4, ups=0.09, wpb=64729, bsz=128, num_updates=7309, lr=9.99495e-05, gnorm=3.01, loss_scale=8, train_wall=10, gb_free=2.8, wall=83837
2021-06-19 17:56:14 | INFO | train_inner | epoch 003: 1352 / 3002 loss=2.711, ppl=6.55, wps=5837.8, ups=0.09, wpb=64808, bsz=128, num_updates=7310, lr=9.99495e-05, gnorm=2.28, loss_scale=8, train_wall=11, gb_free=2.8, wall=83848
2021-06-19 17:56:25 | INFO | train_inner | epoch 003: 1353 / 3002 loss=2.691, ppl=6.46, wps=5828.5, ups=0.09, wpb=64831, bsz=128, num_updates=7311, lr=9.99495e-05, gnorm=2.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=83859
2021-06-19 17:56:36 | INFO | train_inner | epoch 003: 1354 / 3002 loss=2.958, ppl=7.77, wps=5868.4, ups=0.09, wpb=64846, bsz=128, num_updates=7312, lr=9.99495e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=83870
2021-06-19 17:56:47 | INFO | train_inner | epoch 003: 1355 / 3002 loss=2.771, ppl=6.83, wps=5919.9, ups=0.09, wpb=64900, bsz=128, num_updates=7313, lr=9.99495e-05, gnorm=2.256, loss_scale=8, train_wall=10, gb_free=2.8, wall=83881
2021-06-19 17:56:58 | INFO | train_inner | epoch 003: 1356 / 3002 loss=2.779, ppl=6.86, wps=5674.9, ups=0.09, wpb=64792, bsz=128, num_updates=7314, lr=9.99495e-05, gnorm=3.736, loss_scale=8, train_wall=11, gb_free=2.8, wall=83892
2021-06-19 17:57:09 | INFO | train_inner | epoch 003: 1357 / 3002 loss=2.751, ppl=6.73, wps=5833.6, ups=0.09, wpb=64901, bsz=128, num_updates=7315, lr=9.99495e-05, gnorm=2.636, loss_scale=8, train_wall=11, gb_free=2.8, wall=83904
2021-06-19 17:57:20 | INFO | train_inner | epoch 003: 1358 / 3002 loss=2.639, ppl=6.23, wps=5849.2, ups=0.09, wpb=64836, bsz=128, num_updates=7316, lr=9.99495e-05, gnorm=2.346, loss_scale=8, train_wall=11, gb_free=2.8, wall=83915
2021-06-19 17:57:31 | INFO | train_inner | epoch 003: 1359 / 3002 loss=2.658, ppl=6.31, wps=5833.6, ups=0.09, wpb=64875, bsz=128, num_updates=7317, lr=9.99495e-05, gnorm=2.137, loss_scale=8, train_wall=11, gb_free=2.8, wall=83926
2021-06-19 17:57:42 | INFO | train_inner | epoch 003: 1360 / 3002 loss=2.866, ppl=7.29, wps=5957.8, ups=0.09, wpb=64843, bsz=128, num_updates=7318, lr=9.99495e-05, gnorm=2.296, loss_scale=8, train_wall=10, gb_free=2.8, wall=83937
2021-06-19 17:57:53 | INFO | train_inner | epoch 003: 1361 / 3002 loss=2.745, ppl=6.71, wps=6002.2, ups=0.09, wpb=64809, bsz=128, num_updates=7319, lr=9.99494e-05, gnorm=3.244, loss_scale=8, train_wall=10, gb_free=2.8, wall=83947
2021-06-19 17:58:04 | INFO | train_inner | epoch 003: 1362 / 3002 loss=2.772, ppl=6.83, wps=5712.3, ups=0.09, wpb=64879, bsz=128, num_updates=7320, lr=9.99494e-05, gnorm=2.399, loss_scale=8, train_wall=11, gb_free=2.8, wall=83959
2021-06-19 17:58:16 | INFO | train_inner | epoch 003: 1363 / 3002 loss=2.433, ppl=5.4, wps=5835.4, ups=0.09, wpb=64872, bsz=128, num_updates=7321, lr=9.99494e-05, gnorm=2.253, loss_scale=8, train_wall=11, gb_free=2.8, wall=83970
2021-06-19 17:58:27 | INFO | train_inner | epoch 003: 1364 / 3002 loss=2.658, ppl=6.31, wps=5910.9, ups=0.09, wpb=64748, bsz=128, num_updates=7322, lr=9.99494e-05, gnorm=2.187, loss_scale=8, train_wall=10, gb_free=2.8, wall=83981
2021-06-19 17:58:38 | INFO | train_inner | epoch 003: 1365 / 3002 loss=2.776, ppl=6.85, wps=5856.6, ups=0.09, wpb=64888, bsz=128, num_updates=7323, lr=9.99494e-05, gnorm=2.79, loss_scale=8, train_wall=11, gb_free=2.8, wall=83992
2021-06-19 17:58:49 | INFO | train_inner | epoch 003: 1366 / 3002 loss=2.746, ppl=6.71, wps=5897.7, ups=0.09, wpb=64813, bsz=128, num_updates=7324, lr=9.99494e-05, gnorm=2.237, loss_scale=8, train_wall=11, gb_free=2.8, wall=84003
2021-06-19 17:59:00 | INFO | train_inner | epoch 003: 1367 / 3002 loss=2.778, ppl=6.86, wps=5804.9, ups=0.09, wpb=64754, bsz=128, num_updates=7325, lr=9.99494e-05, gnorm=2.288, loss_scale=8, train_wall=11, gb_free=2.8, wall=84014
2021-06-19 17:59:11 | INFO | train_inner | epoch 003: 1368 / 3002 loss=2.585, ppl=6, wps=5844.6, ups=0.09, wpb=64787, bsz=128, num_updates=7326, lr=9.99494e-05, gnorm=2.239, loss_scale=8, train_wall=11, gb_free=2.8, wall=84025
2021-06-19 17:59:22 | INFO | train_inner | epoch 003: 1369 / 3002 loss=2.644, ppl=6.25, wps=5861, ups=0.09, wpb=64889, bsz=128, num_updates=7327, lr=9.99494e-05, gnorm=2.291, loss_scale=8, train_wall=11, gb_free=2.8, wall=84036
2021-06-19 17:59:33 | INFO | train_inner | epoch 003: 1370 / 3002 loss=2.772, ppl=6.83, wps=5775.6, ups=0.09, wpb=64843, bsz=128, num_updates=7328, lr=9.99494e-05, gnorm=2.196, loss_scale=8, train_wall=11, gb_free=2.8, wall=84047
2021-06-19 17:59:44 | INFO | train_inner | epoch 003: 1371 / 3002 loss=2.682, ppl=6.42, wps=5955.3, ups=0.09, wpb=64864, bsz=128, num_updates=7329, lr=9.99494e-05, gnorm=2.168, loss_scale=8, train_wall=10, gb_free=2.8, wall=84058
2021-06-19 17:59:55 | INFO | train_inner | epoch 003: 1372 / 3002 loss=2.649, ppl=6.27, wps=5896.9, ups=0.09, wpb=64869, bsz=128, num_updates=7330, lr=9.99494e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=84069
2021-06-19 18:00:06 | INFO | train_inner | epoch 003: 1373 / 3002 loss=2.742, ppl=6.69, wps=5755.9, ups=0.09, wpb=64781, bsz=128, num_updates=7331, lr=9.99493e-05, gnorm=2.37, loss_scale=8, train_wall=11, gb_free=2.8, wall=84081
2021-06-19 18:00:17 | INFO | train_inner | epoch 003: 1374 / 3002 loss=2.723, ppl=6.6, wps=5828.2, ups=0.09, wpb=64843, bsz=128, num_updates=7332, lr=9.99493e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=84092
2021-06-19 18:00:29 | INFO | train_inner | epoch 003: 1375 / 3002 loss=2.616, ppl=6.13, wps=5794.2, ups=0.09, wpb=64793, bsz=128, num_updates=7333, lr=9.99493e-05, gnorm=2.418, loss_scale=8, train_wall=11, gb_free=2.8, wall=84103
2021-06-19 18:00:40 | INFO | train_inner | epoch 003: 1376 / 3002 loss=2.575, ppl=5.96, wps=5852.6, ups=0.09, wpb=64766, bsz=128, num_updates=7334, lr=9.99493e-05, gnorm=2.231, loss_scale=8, train_wall=11, gb_free=2.8, wall=84114
2021-06-19 18:00:51 | INFO | train_inner | epoch 003: 1377 / 3002 loss=2.692, ppl=6.46, wps=5743.7, ups=0.09, wpb=64860, bsz=128, num_updates=7335, lr=9.99493e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=84125
2021-06-19 18:01:02 | INFO | train_inner | epoch 003: 1378 / 3002 loss=2.717, ppl=6.58, wps=5851.2, ups=0.09, wpb=64833, bsz=128, num_updates=7336, lr=9.99493e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=84136
2021-06-19 18:01:13 | INFO | train_inner | epoch 003: 1379 / 3002 loss=2.591, ppl=6.02, wps=5859.4, ups=0.09, wpb=64797, bsz=128, num_updates=7337, lr=9.99493e-05, gnorm=2.371, loss_scale=8, train_wall=11, gb_free=2.8, wall=84147
2021-06-19 18:01:24 | INFO | train_inner | epoch 003: 1380 / 3002 loss=2.754, ppl=6.75, wps=5917.6, ups=0.09, wpb=64874, bsz=128, num_updates=7338, lr=9.99493e-05, gnorm=2.365, loss_scale=8, train_wall=10, gb_free=2.8, wall=84158
2021-06-19 18:01:35 | INFO | train_inner | epoch 003: 1381 / 3002 loss=2.576, ppl=5.96, wps=5882.1, ups=0.09, wpb=64903, bsz=128, num_updates=7339, lr=9.99493e-05, gnorm=2.618, loss_scale=8, train_wall=11, gb_free=2.8, wall=84169
2021-06-19 18:01:46 | INFO | train_inner | epoch 003: 1382 / 3002 loss=2.706, ppl=6.52, wps=5849.7, ups=0.09, wpb=64927, bsz=128, num_updates=7340, lr=9.99493e-05, gnorm=2.223, loss_scale=8, train_wall=11, gb_free=2.8, wall=84181
2021-06-19 18:01:57 | INFO | train_inner | epoch 003: 1383 / 3002 loss=2.793, ppl=6.93, wps=5829.9, ups=0.09, wpb=64899, bsz=128, num_updates=7341, lr=9.99493e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=84192
2021-06-19 18:02:08 | INFO | train_inner | epoch 003: 1384 / 3002 loss=2.671, ppl=6.37, wps=5846.1, ups=0.09, wpb=64743, bsz=128, num_updates=7342, lr=9.99493e-05, gnorm=2.138, loss_scale=8, train_wall=11, gb_free=2.8, wall=84203
2021-06-19 18:02:19 | INFO | train_inner | epoch 003: 1385 / 3002 loss=2.737, ppl=6.67, wps=6018.2, ups=0.09, wpb=64783, bsz=128, num_updates=7343, lr=9.99493e-05, gnorm=2.309, loss_scale=8, train_wall=10, gb_free=2.8, wall=84213
2021-06-19 18:02:30 | INFO | train_inner | epoch 003: 1386 / 3002 loss=2.68, ppl=6.41, wps=5931.2, ups=0.09, wpb=64777, bsz=128, num_updates=7344, lr=9.99492e-05, gnorm=2.159, loss_scale=8, train_wall=10, gb_free=2.8, wall=84224
2021-06-19 18:02:41 | INFO | train_inner | epoch 003: 1387 / 3002 loss=2.807, ppl=7, wps=5907.9, ups=0.09, wpb=64770, bsz=128, num_updates=7345, lr=9.99492e-05, gnorm=5.546, loss_scale=8, train_wall=10, gb_free=2.8, wall=84235
2021-06-19 18:02:52 | INFO | train_inner | epoch 003: 1388 / 3002 loss=2.526, ppl=5.76, wps=5748.7, ups=0.09, wpb=64862, bsz=128, num_updates=7346, lr=9.99492e-05, gnorm=2.123, loss_scale=8, train_wall=11, gb_free=2.8, wall=84247
2021-06-19 18:03:04 | INFO | train_inner | epoch 003: 1389 / 3002 loss=2.496, ppl=5.64, wps=5757.1, ups=0.09, wpb=64913, bsz=128, num_updates=7347, lr=9.99492e-05, gnorm=3.524, loss_scale=8, train_wall=11, gb_free=2.8, wall=84258
2021-06-19 18:03:15 | INFO | train_inner | epoch 003: 1390 / 3002 loss=2.777, ppl=6.85, wps=5711.5, ups=0.09, wpb=64792, bsz=128, num_updates=7348, lr=9.99492e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=84269
2021-06-19 18:03:26 | INFO | train_inner | epoch 003: 1391 / 3002 loss=2.737, ppl=6.67, wps=5995.9, ups=0.09, wpb=64752, bsz=128, num_updates=7349, lr=9.99492e-05, gnorm=2.127, loss_scale=8, train_wall=10, gb_free=2.8, wall=84280
2021-06-19 18:03:37 | INFO | train_inner | epoch 003: 1392 / 3002 loss=2.525, ppl=5.75, wps=5847.4, ups=0.09, wpb=64888, bsz=128, num_updates=7350, lr=9.99492e-05, gnorm=2.427, loss_scale=8, train_wall=11, gb_free=2.8, wall=84291
2021-06-19 18:03:48 | INFO | train_inner | epoch 003: 1393 / 3002 loss=2.687, ppl=6.44, wps=5835.6, ups=0.09, wpb=64879, bsz=128, num_updates=7351, lr=9.99492e-05, gnorm=2.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=84302
2021-06-19 18:03:59 | INFO | train_inner | epoch 003: 1394 / 3002 loss=2.672, ppl=6.37, wps=5927.6, ups=0.09, wpb=64841, bsz=128, num_updates=7352, lr=9.99492e-05, gnorm=2.547, loss_scale=8, train_wall=10, gb_free=2.8, wall=84313
2021-06-19 18:04:10 | INFO | train_inner | epoch 003: 1395 / 3002 loss=2.777, ppl=6.85, wps=5940.1, ups=0.09, wpb=64956, bsz=128, num_updates=7353, lr=9.99492e-05, gnorm=2.393, loss_scale=8, train_wall=10, gb_free=2.8, wall=84324
2021-06-19 18:04:21 | INFO | train_inner | epoch 003: 1396 / 3002 loss=2.652, ppl=6.29, wps=5803.5, ups=0.09, wpb=64864, bsz=128, num_updates=7354, lr=9.99492e-05, gnorm=2.151, loss_scale=8, train_wall=11, gb_free=2.8, wall=84335
2021-06-19 18:04:32 | INFO | train_inner | epoch 003: 1397 / 3002 loss=2.714, ppl=6.56, wps=5810, ups=0.09, wpb=64831, bsz=128, num_updates=7355, lr=9.99492e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=84346
2021-06-19 18:04:43 | INFO | train_inner | epoch 003: 1398 / 3002 loss=2.503, ppl=5.67, wps=5921.6, ups=0.09, wpb=64789, bsz=128, num_updates=7356, lr=9.99491e-05, gnorm=2.382, loss_scale=8, train_wall=11, gb_free=2.8, wall=84357
2021-06-19 18:04:54 | INFO | train_inner | epoch 003: 1399 / 3002 loss=2.703, ppl=6.51, wps=5772.9, ups=0.09, wpb=64820, bsz=128, num_updates=7357, lr=9.99491e-05, gnorm=2.477, loss_scale=8, train_wall=11, gb_free=2.8, wall=84369
2021-06-19 18:05:06 | INFO | train_inner | epoch 003: 1400 / 3002 loss=2.722, ppl=6.6, wps=5796.8, ups=0.09, wpb=64771, bsz=128, num_updates=7358, lr=9.99491e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=84380
2021-06-19 18:05:17 | INFO | train_inner | epoch 003: 1401 / 3002 loss=2.71, ppl=6.54, wps=5791, ups=0.09, wpb=64787, bsz=128, num_updates=7359, lr=9.99491e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=84391
2021-06-19 18:05:28 | INFO | train_inner | epoch 003: 1402 / 3002 loss=2.72, ppl=6.59, wps=5903.4, ups=0.09, wpb=64913, bsz=128, num_updates=7360, lr=9.99491e-05, gnorm=2.498, loss_scale=8, train_wall=11, gb_free=2.8, wall=84402
2021-06-19 18:05:39 | INFO | train_inner | epoch 003: 1403 / 3002 loss=2.573, ppl=5.95, wps=5915.3, ups=0.09, wpb=64884, bsz=128, num_updates=7361, lr=9.99491e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=84413
2021-06-19 18:05:50 | INFO | train_inner | epoch 003: 1404 / 3002 loss=2.584, ppl=6, wps=5921, ups=0.09, wpb=64814, bsz=128, num_updates=7362, lr=9.99491e-05, gnorm=2.301, loss_scale=8, train_wall=11, gb_free=2.8, wall=84424
2021-06-19 18:06:00 | INFO | train_inner | epoch 003: 1405 / 3002 loss=2.729, ppl=6.63, wps=5961.6, ups=0.09, wpb=64882, bsz=128, num_updates=7363, lr=9.99491e-05, gnorm=2.106, loss_scale=8, train_wall=10, gb_free=2.8, wall=84435
2021-06-19 18:06:12 | INFO | train_inner | epoch 003: 1406 / 3002 loss=2.577, ppl=5.97, wps=5822.3, ups=0.09, wpb=64812, bsz=128, num_updates=7364, lr=9.99491e-05, gnorm=2.167, loss_scale=8, train_wall=11, gb_free=2.8, wall=84446
2021-06-19 18:06:23 | INFO | train_inner | epoch 003: 1407 / 3002 loss=2.551, ppl=5.86, wps=5906.7, ups=0.09, wpb=64859, bsz=128, num_updates=7365, lr=9.99491e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=84457
2021-06-19 18:06:34 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-19 18:06:45 | INFO | train_inner | epoch 003: 1409 / 3002 loss=2.654, ppl=6.29, wps=2941.6, ups=0.05, wpb=64775, bsz=128, num_updates=7366, lr=9.99491e-05, gnorm=2.53, loss_scale=4, train_wall=21, gb_free=2.8, wall=84479
2021-06-19 18:06:56 | INFO | train_inner | epoch 003: 1410 / 3002 loss=2.651, ppl=6.28, wps=5902.9, ups=0.09, wpb=64753, bsz=128, num_updates=7367, lr=9.99491e-05, gnorm=9.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=84490
2021-06-19 18:07:07 | INFO | train_inner | epoch 003: 1411 / 3002 loss=2.61, ppl=6.11, wps=5813.7, ups=0.09, wpb=64807, bsz=128, num_updates=7368, lr=9.99491e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=84501
2021-06-19 18:07:18 | INFO | train_inner | epoch 003: 1412 / 3002 loss=2.479, ppl=5.58, wps=5879.9, ups=0.09, wpb=64969, bsz=128, num_updates=7369, lr=9.9949e-05, gnorm=2.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=84512
2021-06-19 18:07:29 | INFO | train_inner | epoch 003: 1413 / 3002 loss=2.374, ppl=5.18, wps=5767, ups=0.09, wpb=64885, bsz=128, num_updates=7370, lr=9.9949e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=84523
2021-06-19 18:07:40 | INFO | train_inner | epoch 003: 1414 / 3002 loss=2.551, ppl=5.86, wps=5785.7, ups=0.09, wpb=64824, bsz=128, num_updates=7371, lr=9.9949e-05, gnorm=2.321, loss_scale=4, train_wall=11, gb_free=2.8, wall=84535
2021-06-19 18:07:51 | INFO | train_inner | epoch 003: 1415 / 3002 loss=2.743, ppl=6.7, wps=5768.1, ups=0.09, wpb=64871, bsz=128, num_updates=7372, lr=9.9949e-05, gnorm=2.458, loss_scale=4, train_wall=11, gb_free=2.8, wall=84546
2021-06-19 18:08:02 | INFO | train_inner | epoch 003: 1416 / 3002 loss=2.754, ppl=6.74, wps=5895.4, ups=0.09, wpb=64801, bsz=128, num_updates=7373, lr=9.9949e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=84557
2021-06-19 18:08:13 | INFO | train_inner | epoch 003: 1417 / 3002 loss=2.618, ppl=6.14, wps=5898.5, ups=0.09, wpb=64785, bsz=128, num_updates=7374, lr=9.9949e-05, gnorm=2.257, loss_scale=4, train_wall=11, gb_free=2.8, wall=84568
2021-06-19 18:08:24 | INFO | train_inner | epoch 003: 1418 / 3002 loss=2.719, ppl=6.58, wps=5912.2, ups=0.09, wpb=64787, bsz=128, num_updates=7375, lr=9.9949e-05, gnorm=2.361, loss_scale=4, train_wall=11, gb_free=2.8, wall=84579
2021-06-19 18:08:36 | INFO | train_inner | epoch 003: 1419 / 3002 loss=2.602, ppl=6.07, wps=5742.5, ups=0.09, wpb=64873, bsz=128, num_updates=7376, lr=9.9949e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=84590
2021-06-19 18:08:47 | INFO | train_inner | epoch 003: 1420 / 3002 loss=2.803, ppl=6.98, wps=5827.7, ups=0.09, wpb=64740, bsz=128, num_updates=7377, lr=9.9949e-05, gnorm=2.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=84601
2021-06-19 18:08:58 | INFO | train_inner | epoch 003: 1421 / 3002 loss=2.624, ppl=6.17, wps=5795.4, ups=0.09, wpb=64854, bsz=128, num_updates=7378, lr=9.9949e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=84612
2021-06-19 18:09:09 | INFO | train_inner | epoch 003: 1422 / 3002 loss=2.54, ppl=5.82, wps=5895.4, ups=0.09, wpb=64915, bsz=128, num_updates=7379, lr=9.9949e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=84623
2021-06-19 18:09:20 | INFO | train_inner | epoch 003: 1423 / 3002 loss=2.583, ppl=5.99, wps=5808.8, ups=0.09, wpb=64795, bsz=128, num_updates=7380, lr=9.9949e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=84635
2021-06-19 18:09:31 | INFO | train_inner | epoch 003: 1424 / 3002 loss=2.723, ppl=6.6, wps=5744.7, ups=0.09, wpb=64783, bsz=128, num_updates=7381, lr=9.99489e-05, gnorm=2.12, loss_scale=4, train_wall=11, gb_free=2.8, wall=84646
2021-06-19 18:09:43 | INFO | train_inner | epoch 003: 1425 / 3002 loss=2.74, ppl=6.68, wps=5782.7, ups=0.09, wpb=64754, bsz=128, num_updates=7382, lr=9.99489e-05, gnorm=2.163, loss_scale=4, train_wall=11, gb_free=2.8, wall=84657
2021-06-19 18:09:54 | INFO | train_inner | epoch 003: 1426 / 3002 loss=2.683, ppl=6.42, wps=5840.4, ups=0.09, wpb=64834, bsz=128, num_updates=7383, lr=9.99489e-05, gnorm=2.216, loss_scale=4, train_wall=11, gb_free=2.8, wall=84668
2021-06-19 18:10:05 | INFO | train_inner | epoch 003: 1427 / 3002 loss=2.65, ppl=6.28, wps=5815.1, ups=0.09, wpb=64836, bsz=128, num_updates=7384, lr=9.99489e-05, gnorm=2.175, loss_scale=4, train_wall=11, gb_free=2.8, wall=84679
2021-06-19 18:10:16 | INFO | train_inner | epoch 003: 1428 / 3002 loss=2.555, ppl=5.88, wps=5827.6, ups=0.09, wpb=64826, bsz=128, num_updates=7385, lr=9.99489e-05, gnorm=2.483, loss_scale=4, train_wall=11, gb_free=2.8, wall=84690
2021-06-19 18:10:27 | INFO | train_inner | epoch 003: 1429 / 3002 loss=2.427, ppl=5.38, wps=5799.9, ups=0.09, wpb=64872, bsz=128, num_updates=7386, lr=9.99489e-05, gnorm=4.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=84702
2021-06-19 18:10:38 | INFO | train_inner | epoch 003: 1430 / 3002 loss=2.682, ppl=6.42, wps=5951.6, ups=0.09, wpb=64860, bsz=128, num_updates=7387, lr=9.99489e-05, gnorm=2.131, loss_scale=4, train_wall=10, gb_free=2.8, wall=84712
2021-06-19 18:10:49 | INFO | train_inner | epoch 003: 1431 / 3002 loss=2.59, ppl=6.02, wps=5866.4, ups=0.09, wpb=64847, bsz=128, num_updates=7388, lr=9.99489e-05, gnorm=2.202, loss_scale=4, train_wall=11, gb_free=2.8, wall=84724
2021-06-19 18:11:00 | INFO | train_inner | epoch 003: 1432 / 3002 loss=2.596, ppl=6.05, wps=5795.6, ups=0.09, wpb=64774, bsz=128, num_updates=7389, lr=9.99489e-05, gnorm=2.239, loss_scale=4, train_wall=11, gb_free=2.8, wall=84735
2021-06-19 18:11:11 | INFO | train_inner | epoch 003: 1433 / 3002 loss=2.66, ppl=6.32, wps=5829.8, ups=0.09, wpb=64813, bsz=128, num_updates=7390, lr=9.99489e-05, gnorm=2.244, loss_scale=4, train_wall=11, gb_free=2.8, wall=84746
2021-06-19 18:11:23 | INFO | train_inner | epoch 003: 1434 / 3002 loss=2.557, ppl=5.89, wps=5865.9, ups=0.09, wpb=64769, bsz=128, num_updates=7391, lr=9.99489e-05, gnorm=2.205, loss_scale=4, train_wall=11, gb_free=2.8, wall=84757
2021-06-19 18:11:34 | INFO | train_inner | epoch 003: 1435 / 3002 loss=2.755, ppl=6.75, wps=5892.8, ups=0.09, wpb=64791, bsz=128, num_updates=7392, lr=9.99489e-05, gnorm=2.141, loss_scale=4, train_wall=11, gb_free=2.8, wall=84768
2021-06-19 18:11:44 | INFO | train_inner | epoch 003: 1436 / 3002 loss=2.696, ppl=6.48, wps=5912.6, ups=0.09, wpb=64861, bsz=128, num_updates=7393, lr=9.99489e-05, gnorm=2.448, loss_scale=4, train_wall=10, gb_free=2.8, wall=84779
2021-06-19 18:11:56 | INFO | train_inner | epoch 003: 1437 / 3002 loss=2.611, ppl=6.11, wps=5745, ups=0.09, wpb=64821, bsz=128, num_updates=7394, lr=9.99488e-05, gnorm=2.108, loss_scale=4, train_wall=11, gb_free=2.8, wall=84790
2021-06-19 18:12:07 | INFO | train_inner | epoch 003: 1438 / 3002 loss=2.559, ppl=5.89, wps=5883.7, ups=0.09, wpb=64806, bsz=128, num_updates=7395, lr=9.99488e-05, gnorm=2.388, loss_scale=4, train_wall=11, gb_free=2.8, wall=84801
2021-06-19 18:12:17 | INFO | train_inner | epoch 003: 1439 / 3002 loss=2.697, ppl=6.49, wps=6057.4, ups=0.09, wpb=64817, bsz=128, num_updates=7396, lr=9.99488e-05, gnorm=2.17, loss_scale=4, train_wall=10, gb_free=2.8, wall=84812
2021-06-19 18:12:29 | INFO | train_inner | epoch 003: 1440 / 3002 loss=2.585, ppl=6, wps=5884.3, ups=0.09, wpb=64848, bsz=128, num_updates=7397, lr=9.99488e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=84823
2021-06-19 18:12:40 | INFO | train_inner | epoch 003: 1441 / 3002 loss=2.607, ppl=6.09, wps=5898.7, ups=0.09, wpb=64887, bsz=128, num_updates=7398, lr=9.99488e-05, gnorm=2.204, loss_scale=4, train_wall=11, gb_free=2.8, wall=84834
2021-06-19 18:12:50 | INFO | train_inner | epoch 003: 1442 / 3002 loss=2.685, ppl=6.43, wps=5959.3, ups=0.09, wpb=64860, bsz=128, num_updates=7399, lr=9.99488e-05, gnorm=3.393, loss_scale=4, train_wall=10, gb_free=2.8, wall=84845
2021-06-19 18:13:02 | INFO | train_inner | epoch 003: 1443 / 3002 loss=2.654, ppl=6.3, wps=5799.7, ups=0.09, wpb=64844, bsz=128, num_updates=7400, lr=9.99488e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=84856
2021-06-19 18:13:13 | INFO | train_inner | epoch 003: 1444 / 3002 loss=2.493, ppl=5.63, wps=5892.2, ups=0.09, wpb=64771, bsz=128, num_updates=7401, lr=9.99488e-05, gnorm=2.225, loss_scale=4, train_wall=11, gb_free=2.8, wall=84867
2021-06-19 18:13:24 | INFO | train_inner | epoch 003: 1445 / 3002 loss=2.615, ppl=6.13, wps=5827.5, ups=0.09, wpb=64836, bsz=128, num_updates=7402, lr=9.99488e-05, gnorm=2.129, loss_scale=4, train_wall=11, gb_free=2.8, wall=84878
2021-06-19 18:13:35 | INFO | train_inner | epoch 003: 1446 / 3002 loss=2.596, ppl=6.04, wps=5783.8, ups=0.09, wpb=64912, bsz=128, num_updates=7403, lr=9.99488e-05, gnorm=2.149, loss_scale=4, train_wall=11, gb_free=2.8, wall=84889
2021-06-19 18:13:46 | INFO | train_inner | epoch 003: 1447 / 3002 loss=2.662, ppl=6.33, wps=5770.7, ups=0.09, wpb=64832, bsz=128, num_updates=7404, lr=9.99488e-05, gnorm=2.5, loss_scale=4, train_wall=11, gb_free=2.8, wall=84900
2021-06-19 18:13:57 | INFO | train_inner | epoch 003: 1448 / 3002 loss=2.606, ppl=6.09, wps=5869.8, ups=0.09, wpb=64802, bsz=128, num_updates=7405, lr=9.99488e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=84912
2021-06-19 18:14:08 | INFO | train_inner | epoch 003: 1449 / 3002 loss=2.544, ppl=5.83, wps=5840.5, ups=0.09, wpb=64857, bsz=128, num_updates=7406, lr=9.99487e-05, gnorm=2.172, loss_scale=4, train_wall=11, gb_free=2.8, wall=84923
2021-06-19 18:14:19 | INFO | train_inner | epoch 003: 1450 / 3002 loss=2.765, ppl=6.8, wps=6008, ups=0.09, wpb=64906, bsz=128, num_updates=7407, lr=9.99487e-05, gnorm=2.192, loss_scale=4, train_wall=10, gb_free=2.8, wall=84933
2021-06-19 18:14:30 | INFO | train_inner | epoch 003: 1451 / 3002 loss=2.586, ppl=6, wps=5995.1, ups=0.09, wpb=64848, bsz=128, num_updates=7408, lr=9.99487e-05, gnorm=2.145, loss_scale=4, train_wall=10, gb_free=2.8, wall=84944
2021-06-19 18:14:41 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 18:14:52 | INFO | train_inner | epoch 003: 1453 / 3002 loss=2.721, ppl=6.59, wps=2907.6, ups=0.04, wpb=64746, bsz=128, num_updates=7409, lr=9.99487e-05, gnorm=2.621, loss_scale=2, train_wall=21, gb_free=2.8, wall=84967
2021-06-19 18:15:03 | INFO | train_inner | epoch 003: 1454 / 3002 loss=2.586, ppl=6, wps=5830.8, ups=0.09, wpb=64787, bsz=128, num_updates=7410, lr=9.99487e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=84978
2021-06-19 18:15:14 | INFO | train_inner | epoch 003: 1455 / 3002 loss=2.696, ppl=6.48, wps=5826.4, ups=0.09, wpb=64760, bsz=128, num_updates=7411, lr=9.99487e-05, gnorm=2.229, loss_scale=2, train_wall=11, gb_free=2.8, wall=84989
2021-06-19 18:15:25 | INFO | train_inner | epoch 003: 1456 / 3002 loss=2.809, ppl=7.01, wps=5845.8, ups=0.09, wpb=64847, bsz=128, num_updates=7412, lr=9.99487e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=85000
2021-06-19 18:15:37 | INFO | train_inner | epoch 003: 1457 / 3002 loss=2.647, ppl=6.26, wps=5854.2, ups=0.09, wpb=64812, bsz=128, num_updates=7413, lr=9.99487e-05, gnorm=2.244, loss_scale=2, train_wall=11, gb_free=2.8, wall=85011
2021-06-19 18:15:48 | INFO | train_inner | epoch 003: 1458 / 3002 loss=2.694, ppl=6.47, wps=5884.9, ups=0.09, wpb=64885, bsz=128, num_updates=7414, lr=9.99487e-05, gnorm=2.318, loss_scale=2, train_wall=11, gb_free=2.8, wall=85022
2021-06-19 18:15:58 | INFO | train_inner | epoch 003: 1459 / 3002 loss=2.691, ppl=6.46, wps=6019.9, ups=0.09, wpb=64735, bsz=128, num_updates=7415, lr=9.99487e-05, gnorm=2.145, loss_scale=2, train_wall=10, gb_free=2.8, wall=85033
2021-06-19 18:16:09 | INFO | train_inner | epoch 003: 1460 / 3002 loss=2.773, ppl=6.84, wps=5959.4, ups=0.09, wpb=64864, bsz=128, num_updates=7416, lr=9.99487e-05, gnorm=3.313, loss_scale=2, train_wall=10, gb_free=2.8, wall=85044
2021-06-19 18:16:20 | INFO | train_inner | epoch 003: 1461 / 3002 loss=2.725, ppl=6.61, wps=5848.4, ups=0.09, wpb=64730, bsz=128, num_updates=7417, lr=9.99487e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=85055
2021-06-19 18:16:31 | INFO | train_inner | epoch 003: 1462 / 3002 loss=2.613, ppl=6.12, wps=5892.6, ups=0.09, wpb=64812, bsz=128, num_updates=7418, lr=9.99487e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=85066
2021-06-19 18:16:42 | INFO | train_inner | epoch 003: 1463 / 3002 loss=2.613, ppl=6.12, wps=5963.1, ups=0.09, wpb=64791, bsz=128, num_updates=7419, lr=9.99486e-05, gnorm=2.187, loss_scale=2, train_wall=10, gb_free=2.8, wall=85077
2021-06-19 18:16:53 | INFO | train_inner | epoch 003: 1464 / 3002 loss=2.659, ppl=6.32, wps=5872.5, ups=0.09, wpb=64909, bsz=128, num_updates=7420, lr=9.99486e-05, gnorm=2.415, loss_scale=2, train_wall=11, gb_free=2.8, wall=85088
2021-06-19 18:17:04 | INFO | train_inner | epoch 003: 1465 / 3002 loss=2.825, ppl=7.09, wps=5883.5, ups=0.09, wpb=64831, bsz=128, num_updates=7421, lr=9.99486e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=85099
2021-06-19 18:17:15 | INFO | train_inner | epoch 003: 1466 / 3002 loss=2.632, ppl=6.2, wps=5903, ups=0.09, wpb=64796, bsz=128, num_updates=7422, lr=9.99486e-05, gnorm=2.163, loss_scale=2, train_wall=11, gb_free=2.8, wall=85110
2021-06-19 18:17:26 | INFO | train_inner | epoch 003: 1467 / 3002 loss=2.714, ppl=6.56, wps=5762.9, ups=0.09, wpb=64829, bsz=128, num_updates=7423, lr=9.99486e-05, gnorm=2.189, loss_scale=2, train_wall=11, gb_free=2.8, wall=85121
2021-06-19 18:17:38 | INFO | train_inner | epoch 003: 1468 / 3002 loss=2.754, ppl=6.74, wps=5846.5, ups=0.09, wpb=64823, bsz=128, num_updates=7424, lr=9.99486e-05, gnorm=2.07, loss_scale=2, train_wall=11, gb_free=2.8, wall=85132
2021-06-19 18:17:49 | INFO | train_inner | epoch 003: 1469 / 3002 loss=2.591, ppl=6.03, wps=5835.8, ups=0.09, wpb=64892, bsz=128, num_updates=7425, lr=9.99486e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=85143
2021-06-19 18:18:00 | INFO | train_inner | epoch 003: 1470 / 3002 loss=2.556, ppl=5.88, wps=5940.7, ups=0.09, wpb=64819, bsz=128, num_updates=7426, lr=9.99486e-05, gnorm=2.138, loss_scale=2, train_wall=10, gb_free=2.8, wall=85154
2021-06-19 18:18:11 | INFO | train_inner | epoch 003: 1471 / 3002 loss=2.63, ppl=6.19, wps=5834.9, ups=0.09, wpb=64869, bsz=128, num_updates=7427, lr=9.99486e-05, gnorm=2.191, loss_scale=2, train_wall=11, gb_free=2.8, wall=85165
2021-06-19 18:18:22 | INFO | train_inner | epoch 003: 1472 / 3002 loss=2.663, ppl=6.33, wps=5802.3, ups=0.09, wpb=64692, bsz=128, num_updates=7428, lr=9.99486e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=85176
2021-06-19 18:18:33 | INFO | train_inner | epoch 003: 1473 / 3002 loss=2.522, ppl=5.74, wps=5800.8, ups=0.09, wpb=64814, bsz=128, num_updates=7429, lr=9.99486e-05, gnorm=3.016, loss_scale=2, train_wall=11, gb_free=2.8, wall=85187
2021-06-19 18:18:44 | INFO | train_inner | epoch 003: 1474 / 3002 loss=2.672, ppl=6.37, wps=5827.2, ups=0.09, wpb=64849, bsz=128, num_updates=7430, lr=9.99486e-05, gnorm=2.207, loss_scale=2, train_wall=11, gb_free=2.8, wall=85198
2021-06-19 18:18:55 | INFO | train_inner | epoch 003: 1475 / 3002 loss=2.722, ppl=6.6, wps=5785, ups=0.09, wpb=64863, bsz=128, num_updates=7431, lr=9.99485e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=85210
2021-06-19 18:19:06 | INFO | train_inner | epoch 003: 1476 / 3002 loss=2.453, ppl=5.48, wps=5859.3, ups=0.09, wpb=64848, bsz=128, num_updates=7432, lr=9.99485e-05, gnorm=2.196, loss_scale=2, train_wall=11, gb_free=2.8, wall=85221
2021-06-19 18:19:18 | INFO | train_inner | epoch 003: 1477 / 3002 loss=2.561, ppl=5.9, wps=5800.7, ups=0.09, wpb=64902, bsz=128, num_updates=7433, lr=9.99485e-05, gnorm=2.046, loss_scale=2, train_wall=11, gb_free=2.8, wall=85232
2021-06-19 18:19:29 | INFO | train_inner | epoch 003: 1478 / 3002 loss=2.594, ppl=6.04, wps=5797.4, ups=0.09, wpb=64729, bsz=128, num_updates=7434, lr=9.99485e-05, gnorm=2.117, loss_scale=2, train_wall=11, gb_free=2.8, wall=85243
2021-06-19 18:19:40 | INFO | train_inner | epoch 003: 1479 / 3002 loss=2.641, ppl=6.24, wps=5827.8, ups=0.09, wpb=64808, bsz=128, num_updates=7435, lr=9.99485e-05, gnorm=2.134, loss_scale=2, train_wall=11, gb_free=2.8, wall=85254
2021-06-19 18:19:51 | INFO | train_inner | epoch 003: 1480 / 3002 loss=2.525, ppl=5.76, wps=5878, ups=0.09, wpb=64790, bsz=128, num_updates=7436, lr=9.99485e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=85265
2021-06-19 18:20:02 | INFO | train_inner | epoch 003: 1481 / 3002 loss=2.521, ppl=5.74, wps=5796.3, ups=0.09, wpb=64903, bsz=128, num_updates=7437, lr=9.99485e-05, gnorm=2.381, loss_scale=2, train_wall=11, gb_free=2.8, wall=85276
2021-06-19 18:20:13 | INFO | train_inner | epoch 003: 1482 / 3002 loss=2.733, ppl=6.65, wps=5863.2, ups=0.09, wpb=64797, bsz=128, num_updates=7438, lr=9.99485e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=85288
2021-06-19 18:20:24 | INFO | train_inner | epoch 003: 1483 / 3002 loss=2.741, ppl=6.69, wps=5959.2, ups=0.09, wpb=64869, bsz=128, num_updates=7439, lr=9.99485e-05, gnorm=2.259, loss_scale=2, train_wall=10, gb_free=2.8, wall=85298
2021-06-19 18:20:35 | INFO | train_inner | epoch 003: 1484 / 3002 loss=2.575, ppl=5.96, wps=5836.5, ups=0.09, wpb=64796, bsz=128, num_updates=7440, lr=9.99485e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=85310
2021-06-19 18:20:46 | INFO | train_inner | epoch 003: 1485 / 3002 loss=2.66, ppl=6.32, wps=5761.7, ups=0.09, wpb=64850, bsz=128, num_updates=7441, lr=9.99485e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=85321
2021-06-19 18:20:58 | INFO | train_inner | epoch 003: 1486 / 3002 loss=2.599, ppl=6.06, wps=5738.3, ups=0.09, wpb=64788, bsz=128, num_updates=7442, lr=9.99485e-05, gnorm=2.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=85332
2021-06-19 18:21:09 | INFO | train_inner | epoch 003: 1487 / 3002 loss=2.57, ppl=5.94, wps=5874.9, ups=0.09, wpb=64819, bsz=128, num_updates=7443, lr=9.99485e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=85343
2021-06-19 18:21:20 | INFO | train_inner | epoch 003: 1488 / 3002 loss=2.697, ppl=6.49, wps=5858.6, ups=0.09, wpb=64781, bsz=128, num_updates=7444, lr=9.99484e-05, gnorm=2.427, loss_scale=2, train_wall=11, gb_free=2.8, wall=85354
2021-06-19 18:21:31 | INFO | train_inner | epoch 003: 1489 / 3002 loss=2.568, ppl=5.93, wps=5991.9, ups=0.09, wpb=64741, bsz=128, num_updates=7445, lr=9.99484e-05, gnorm=2.058, loss_scale=2, train_wall=10, gb_free=2.8, wall=85365
2021-06-19 18:21:42 | INFO | train_inner | epoch 003: 1490 / 3002 loss=2.532, ppl=5.78, wps=5894.8, ups=0.09, wpb=64799, bsz=128, num_updates=7446, lr=9.99484e-05, gnorm=2.416, loss_scale=2, train_wall=11, gb_free=2.8, wall=85376
2021-06-19 18:21:53 | INFO | train_inner | epoch 003: 1491 / 3002 loss=2.773, ppl=6.83, wps=5791.4, ups=0.09, wpb=64839, bsz=128, num_updates=7447, lr=9.99484e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=85387
2021-06-19 18:22:04 | INFO | train_inner | epoch 003: 1492 / 3002 loss=2.585, ppl=6, wps=5814, ups=0.09, wpb=64793, bsz=128, num_updates=7448, lr=9.99484e-05, gnorm=2.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=85398
2021-06-19 18:22:15 | INFO | train_inner | epoch 003: 1493 / 3002 loss=2.773, ppl=6.83, wps=5851.4, ups=0.09, wpb=64867, bsz=128, num_updates=7449, lr=9.99484e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=85409
2021-06-19 18:22:26 | INFO | train_inner | epoch 003: 1494 / 3002 loss=2.57, ppl=5.94, wps=5854.3, ups=0.09, wpb=64855, bsz=128, num_updates=7450, lr=9.99484e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=85420
2021-06-19 18:22:37 | INFO | train_inner | epoch 003: 1495 / 3002 loss=2.656, ppl=6.3, wps=5869.9, ups=0.09, wpb=64829, bsz=128, num_updates=7451, lr=9.99484e-05, gnorm=2.153, loss_scale=2, train_wall=11, gb_free=2.8, wall=85431
2021-06-19 18:22:48 | INFO | train_inner | epoch 003: 1496 / 3002 loss=2.703, ppl=6.51, wps=5840.1, ups=0.09, wpb=64876, bsz=128, num_updates=7452, lr=9.99484e-05, gnorm=2.226, loss_scale=2, train_wall=11, gb_free=2.8, wall=85443
2021-06-19 18:22:59 | INFO | train_inner | epoch 003: 1497 / 3002 loss=2.74, ppl=6.68, wps=5794.8, ups=0.09, wpb=64843, bsz=128, num_updates=7453, lr=9.99484e-05, gnorm=2.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=85454
2021-06-19 18:23:11 | INFO | train_inner | epoch 003: 1498 / 3002 loss=2.627, ppl=6.18, wps=5815.5, ups=0.09, wpb=64815, bsz=128, num_updates=7454, lr=9.99484e-05, gnorm=2.133, loss_scale=2, train_wall=11, gb_free=2.8, wall=85465
2021-06-19 18:23:22 | INFO | train_inner | epoch 003: 1499 / 3002 loss=2.742, ppl=6.69, wps=5881.5, ups=0.09, wpb=64923, bsz=128, num_updates=7455, lr=9.99484e-05, gnorm=2.158, loss_scale=2, train_wall=11, gb_free=2.8, wall=85476
2021-06-19 18:23:33 | INFO | train_inner | epoch 003: 1500 / 3002 loss=2.583, ppl=5.99, wps=5837.6, ups=0.09, wpb=64835, bsz=128, num_updates=7456, lr=9.99483e-05, gnorm=2.009, loss_scale=2, train_wall=11, gb_free=2.8, wall=85487
2021-06-19 18:23:44 | INFO | train_inner | epoch 003: 1501 / 3002 loss=2.585, ppl=6, wps=5885.1, ups=0.09, wpb=64891, bsz=128, num_updates=7457, lr=9.99483e-05, gnorm=2.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=85498
2021-06-19 18:23:55 | INFO | train_inner | epoch 003: 1502 / 3002 loss=2.633, ppl=6.2, wps=5872.2, ups=0.09, wpb=64811, bsz=128, num_updates=7458, lr=9.99483e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=85509
2021-06-19 18:24:06 | INFO | train_inner | epoch 003: 1503 / 3002 loss=2.653, ppl=6.29, wps=5834.1, ups=0.09, wpb=64821, bsz=128, num_updates=7459, lr=9.99483e-05, gnorm=2.046, loss_scale=2, train_wall=11, gb_free=2.8, wall=85520
2021-06-19 18:24:17 | INFO | train_inner | epoch 003: 1504 / 3002 loss=2.644, ppl=6.25, wps=5875.6, ups=0.09, wpb=64804, bsz=128, num_updates=7460, lr=9.99483e-05, gnorm=2.548, loss_scale=2, train_wall=11, gb_free=2.8, wall=85531
2021-06-19 18:24:28 | INFO | train_inner | epoch 003: 1505 / 3002 loss=2.64, ppl=6.23, wps=5947.3, ups=0.09, wpb=64816, bsz=128, num_updates=7461, lr=9.99483e-05, gnorm=2.139, loss_scale=2, train_wall=10, gb_free=2.8, wall=85542
2021-06-19 18:24:39 | INFO | train_inner | epoch 003: 1506 / 3002 loss=2.754, ppl=6.75, wps=5919, ups=0.09, wpb=64775, bsz=128, num_updates=7462, lr=9.99483e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=85553
2021-06-19 18:24:50 | INFO | train_inner | epoch 003: 1507 / 3002 loss=2.705, ppl=6.52, wps=5772.6, ups=0.09, wpb=64811, bsz=128, num_updates=7463, lr=9.99483e-05, gnorm=2.759, loss_scale=2, train_wall=11, gb_free=2.8, wall=85564
2021-06-19 18:25:01 | INFO | train_inner | epoch 003: 1508 / 3002 loss=2.513, ppl=5.71, wps=5818.3, ups=0.09, wpb=64839, bsz=128, num_updates=7464, lr=9.99483e-05, gnorm=2.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=85575
2021-06-19 18:25:12 | INFO | train_inner | epoch 003: 1509 / 3002 loss=2.94, ppl=7.67, wps=5762.5, ups=0.09, wpb=64884, bsz=128, num_updates=7465, lr=9.99483e-05, gnorm=2.188, loss_scale=2, train_wall=11, gb_free=2.8, wall=85587
2021-06-19 18:25:24 | INFO | train_inner | epoch 003: 1510 / 3002 loss=2.595, ppl=6.04, wps=5775.8, ups=0.09, wpb=64811, bsz=128, num_updates=7466, lr=9.99483e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=85598
2021-06-19 18:25:35 | INFO | train_inner | epoch 003: 1511 / 3002 loss=2.448, ppl=5.46, wps=5759.4, ups=0.09, wpb=64782, bsz=128, num_updates=7467, lr=9.99483e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=85609
2021-06-19 18:25:46 | INFO | train_inner | epoch 003: 1512 / 3002 loss=2.579, ppl=5.97, wps=5893, ups=0.09, wpb=64809, bsz=128, num_updates=7468, lr=9.99483e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=85620
2021-06-19 18:25:57 | INFO | train_inner | epoch 003: 1513 / 3002 loss=2.777, ppl=6.85, wps=5855.6, ups=0.09, wpb=64829, bsz=128, num_updates=7469, lr=9.99482e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=85631
2021-06-19 18:26:08 | INFO | train_inner | epoch 003: 1514 / 3002 loss=2.665, ppl=6.34, wps=5777.8, ups=0.09, wpb=64670, bsz=128, num_updates=7470, lr=9.99482e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=85642
2021-06-19 18:26:19 | INFO | train_inner | epoch 003: 1515 / 3002 loss=2.585, ppl=6, wps=5788.1, ups=0.09, wpb=64818, bsz=128, num_updates=7471, lr=9.99482e-05, gnorm=2.193, loss_scale=2, train_wall=11, gb_free=2.8, wall=85654
2021-06-19 18:26:30 | INFO | train_inner | epoch 003: 1516 / 3002 loss=2.726, ppl=6.62, wps=5886.5, ups=0.09, wpb=64858, bsz=128, num_updates=7472, lr=9.99482e-05, gnorm=2.133, loss_scale=2, train_wall=11, gb_free=2.8, wall=85665
2021-06-19 18:26:42 | INFO | train_inner | epoch 003: 1517 / 3002 loss=2.492, ppl=5.62, wps=5795.9, ups=0.09, wpb=64832, bsz=128, num_updates=7473, lr=9.99482e-05, gnorm=2.282, loss_scale=2, train_wall=11, gb_free=2.8, wall=85676
2021-06-19 18:26:53 | INFO | train_inner | epoch 003: 1518 / 3002 loss=2.589, ppl=6.02, wps=5920.5, ups=0.09, wpb=64825, bsz=128, num_updates=7474, lr=9.99482e-05, gnorm=2.054, loss_scale=2, train_wall=10, gb_free=2.8, wall=85687
2021-06-19 18:27:04 | INFO | train_inner | epoch 003: 1519 / 3002 loss=2.543, ppl=5.83, wps=5798.1, ups=0.09, wpb=64848, bsz=128, num_updates=7475, lr=9.99482e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=85698
2021-06-19 18:27:15 | INFO | train_inner | epoch 003: 1520 / 3002 loss=2.57, ppl=5.94, wps=5948.6, ups=0.09, wpb=64906, bsz=128, num_updates=7476, lr=9.99482e-05, gnorm=2.163, loss_scale=2, train_wall=10, gb_free=2.8, wall=85709
2021-06-19 18:27:25 | INFO | train_inner | epoch 003: 1521 / 3002 loss=2.485, ppl=5.6, wps=5958.7, ups=0.09, wpb=64822, bsz=128, num_updates=7477, lr=9.99482e-05, gnorm=2.982, loss_scale=2, train_wall=10, gb_free=2.8, wall=85720
2021-06-19 18:27:37 | INFO | train_inner | epoch 003: 1522 / 3002 loss=2.562, ppl=5.9, wps=5842.8, ups=0.09, wpb=64876, bsz=128, num_updates=7478, lr=9.99482e-05, gnorm=2.109, loss_scale=2, train_wall=11, gb_free=2.8, wall=85731
2021-06-19 18:27:48 | INFO | train_inner | epoch 003: 1523 / 3002 loss=2.937, ppl=7.66, wps=5915, ups=0.09, wpb=64826, bsz=128, num_updates=7479, lr=9.99482e-05, gnorm=2.172, loss_scale=2, train_wall=10, gb_free=2.8, wall=85742
2021-06-19 18:27:58 | INFO | train_inner | epoch 003: 1524 / 3002 loss=2.529, ppl=5.77, wps=6007.3, ups=0.09, wpb=64886, bsz=128, num_updates=7480, lr=9.99482e-05, gnorm=2.091, loss_scale=2, train_wall=10, gb_free=2.8, wall=85753
2021-06-19 18:28:09 | INFO | train_inner | epoch 003: 1525 / 3002 loss=2.548, ppl=5.85, wps=5903.3, ups=0.09, wpb=64886, bsz=128, num_updates=7481, lr=9.99481e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=85764
2021-06-19 18:28:20 | INFO | train_inner | epoch 003: 1526 / 3002 loss=2.648, ppl=6.27, wps=5868.6, ups=0.09, wpb=64782, bsz=128, num_updates=7482, lr=9.99481e-05, gnorm=2.046, loss_scale=2, train_wall=11, gb_free=2.8, wall=85775
2021-06-19 18:28:31 | INFO | train_inner | epoch 003: 1527 / 3002 loss=2.591, ppl=6.03, wps=5867.6, ups=0.09, wpb=64799, bsz=128, num_updates=7483, lr=9.99481e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=85786
2021-06-19 18:28:43 | INFO | train_inner | epoch 003: 1528 / 3002 loss=2.714, ppl=6.56, wps=5846.6, ups=0.09, wpb=64869, bsz=128, num_updates=7484, lr=9.99481e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=85797
2021-06-19 18:28:53 | INFO | train_inner | epoch 003: 1529 / 3002 loss=2.532, ppl=5.78, wps=6012.8, ups=0.09, wpb=64856, bsz=128, num_updates=7485, lr=9.99481e-05, gnorm=2.34, loss_scale=2, train_wall=10, gb_free=2.8, wall=85808
2021-06-19 18:29:04 | INFO | train_inner | epoch 003: 1530 / 3002 loss=2.719, ppl=6.58, wps=5928.2, ups=0.09, wpb=64848, bsz=128, num_updates=7486, lr=9.99481e-05, gnorm=2.116, loss_scale=2, train_wall=10, gb_free=2.8, wall=85819
2021-06-19 18:29:15 | INFO | train_inner | epoch 003: 1531 / 3002 loss=2.531, ppl=5.78, wps=5837.3, ups=0.09, wpb=64755, bsz=128, num_updates=7487, lr=9.99481e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=85830
2021-06-19 18:29:27 | INFO | train_inner | epoch 003: 1532 / 3002 loss=2.587, ppl=6.01, wps=5809.2, ups=0.09, wpb=64849, bsz=128, num_updates=7488, lr=9.99481e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=85841
2021-06-19 18:29:38 | INFO | train_inner | epoch 003: 1533 / 3002 loss=2.501, ppl=5.66, wps=5897.6, ups=0.09, wpb=64876, bsz=128, num_updates=7489, lr=9.99481e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=85852
2021-06-19 18:29:48 | INFO | train_inner | epoch 003: 1534 / 3002 loss=2.78, ppl=6.87, wps=5953, ups=0.09, wpb=64844, bsz=128, num_updates=7490, lr=9.99481e-05, gnorm=2.147, loss_scale=2, train_wall=10, gb_free=2.8, wall=85863
2021-06-19 18:29:59 | INFO | train_inner | epoch 003: 1535 / 3002 loss=2.525, ppl=5.75, wps=5883.7, ups=0.09, wpb=64868, bsz=128, num_updates=7491, lr=9.99481e-05, gnorm=2.671, loss_scale=2, train_wall=11, gb_free=2.8, wall=85874
2021-06-19 18:30:11 | INFO | train_inner | epoch 003: 1536 / 3002 loss=2.6, ppl=6.06, wps=5760, ups=0.09, wpb=64869, bsz=128, num_updates=7492, lr=9.99481e-05, gnorm=3.217, loss_scale=2, train_wall=11, gb_free=2.8, wall=85885
2021-06-19 18:30:22 | INFO | train_inner | epoch 003: 1537 / 3002 loss=2.758, ppl=6.76, wps=5868.9, ups=0.09, wpb=64822, bsz=128, num_updates=7493, lr=9.99481e-05, gnorm=2.183, loss_scale=2, train_wall=11, gb_free=2.8, wall=85896
2021-06-19 18:30:33 | INFO | train_inner | epoch 003: 1538 / 3002 loss=2.583, ppl=5.99, wps=5853.1, ups=0.09, wpb=64850, bsz=128, num_updates=7494, lr=9.9948e-05, gnorm=2.267, loss_scale=2, train_wall=11, gb_free=2.8, wall=85907
2021-06-19 18:30:44 | INFO | train_inner | epoch 003: 1539 / 3002 loss=2.562, ppl=5.91, wps=5711.8, ups=0.09, wpb=64800, bsz=128, num_updates=7495, lr=9.9948e-05, gnorm=2.249, loss_scale=2, train_wall=11, gb_free=2.8, wall=85918
2021-06-19 18:30:55 | INFO | train_inner | epoch 003: 1540 / 3002 loss=2.736, ppl=6.66, wps=5916.4, ups=0.09, wpb=64877, bsz=128, num_updates=7496, lr=9.9948e-05, gnorm=2.295, loss_scale=2, train_wall=11, gb_free=2.8, wall=85929
2021-06-19 18:31:06 | INFO | train_inner | epoch 003: 1541 / 3002 loss=2.619, ppl=6.14, wps=5833.9, ups=0.09, wpb=64794, bsz=128, num_updates=7497, lr=9.9948e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=85941
2021-06-19 18:31:17 | INFO | train_inner | epoch 003: 1542 / 3002 loss=2.636, ppl=6.22, wps=5789.9, ups=0.09, wpb=64837, bsz=128, num_updates=7498, lr=9.9948e-05, gnorm=2.208, loss_scale=2, train_wall=11, gb_free=2.8, wall=85952
2021-06-19 18:31:29 | INFO | train_inner | epoch 003: 1543 / 3002 loss=2.647, ppl=6.27, wps=5787, ups=0.09, wpb=64891, bsz=128, num_updates=7499, lr=9.9948e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=85963
2021-06-19 18:31:40 | INFO | train_inner | epoch 003: 1544 / 3002 loss=2.591, ppl=6.02, wps=5900.5, ups=0.09, wpb=64874, bsz=128, num_updates=7500, lr=9.9948e-05, gnorm=2.746, loss_scale=2, train_wall=11, gb_free=2.8, wall=85974
2021-06-19 18:31:51 | INFO | train_inner | epoch 003: 1545 / 3002 loss=2.445, ppl=5.44, wps=5737.1, ups=0.09, wpb=64779, bsz=128, num_updates=7501, lr=9.9948e-05, gnorm=2.397, loss_scale=2, train_wall=11, gb_free=2.8, wall=85985
2021-06-19 18:32:02 | INFO | train_inner | epoch 003: 1546 / 3002 loss=2.586, ppl=6, wps=5838, ups=0.09, wpb=64749, bsz=128, num_updates=7502, lr=9.9948e-05, gnorm=2.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=85996
2021-06-19 18:32:13 | INFO | train_inner | epoch 003: 1547 / 3002 loss=2.646, ppl=6.26, wps=5830.5, ups=0.09, wpb=64765, bsz=128, num_updates=7503, lr=9.9948e-05, gnorm=2.188, loss_scale=2, train_wall=11, gb_free=2.8, wall=86007
2021-06-19 18:32:24 | INFO | train_inner | epoch 003: 1548 / 3002 loss=2.758, ppl=6.77, wps=5770.8, ups=0.09, wpb=64767, bsz=128, num_updates=7504, lr=9.9948e-05, gnorm=2.174, loss_scale=2, train_wall=11, gb_free=2.8, wall=86019
2021-06-19 18:32:35 | INFO | train_inner | epoch 003: 1549 / 3002 loss=2.74, ppl=6.68, wps=5835.4, ups=0.09, wpb=64863, bsz=128, num_updates=7505, lr=9.9948e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=86030
2021-06-19 18:32:47 | INFO | train_inner | epoch 003: 1550 / 3002 loss=2.579, ppl=5.98, wps=5857.8, ups=0.09, wpb=64816, bsz=128, num_updates=7506, lr=9.99479e-05, gnorm=2.132, loss_scale=2, train_wall=11, gb_free=2.8, wall=86041
2021-06-19 18:32:58 | INFO | train_inner | epoch 003: 1551 / 3002 loss=2.616, ppl=6.13, wps=5807.3, ups=0.09, wpb=64805, bsz=128, num_updates=7507, lr=9.99479e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=86052
2021-06-19 18:33:09 | INFO | train_inner | epoch 003: 1552 / 3002 loss=2.663, ppl=6.33, wps=5863.7, ups=0.09, wpb=64773, bsz=128, num_updates=7508, lr=9.99479e-05, gnorm=2.132, loss_scale=2, train_wall=11, gb_free=2.8, wall=86063
2021-06-19 18:33:20 | INFO | train_inner | epoch 003: 1553 / 3002 loss=2.588, ppl=6.01, wps=5861, ups=0.09, wpb=64878, bsz=128, num_updates=7509, lr=9.99479e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=86074
2021-06-19 18:33:31 | INFO | train_inner | epoch 003: 1554 / 3002 loss=2.627, ppl=6.18, wps=5825.4, ups=0.09, wpb=64856, bsz=128, num_updates=7510, lr=9.99479e-05, gnorm=2.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=86085
2021-06-19 18:33:42 | INFO | train_inner | epoch 003: 1555 / 3002 loss=2.403, ppl=5.29, wps=5875.9, ups=0.09, wpb=64856, bsz=128, num_updates=7511, lr=9.99479e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=86096
2021-06-19 18:33:53 | INFO | train_inner | epoch 003: 1556 / 3002 loss=2.804, ppl=6.98, wps=5789.6, ups=0.09, wpb=64839, bsz=128, num_updates=7512, lr=9.99479e-05, gnorm=2.239, loss_scale=2, train_wall=11, gb_free=2.8, wall=86108
2021-06-19 18:34:04 | INFO | train_inner | epoch 003: 1557 / 3002 loss=2.54, ppl=5.82, wps=5838.8, ups=0.09, wpb=64858, bsz=128, num_updates=7513, lr=9.99479e-05, gnorm=19.763, loss_scale=2, train_wall=11, gb_free=2.8, wall=86119
2021-06-19 18:34:15 | INFO | train_inner | epoch 003: 1558 / 3002 loss=2.749, ppl=6.72, wps=5895.1, ups=0.09, wpb=64865, bsz=128, num_updates=7514, lr=9.99479e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=86130
2021-06-19 18:34:26 | INFO | train_inner | epoch 003: 1559 / 3002 loss=2.546, ppl=5.84, wps=5888, ups=0.09, wpb=64823, bsz=128, num_updates=7515, lr=9.99479e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=86141
2021-06-19 18:34:37 | INFO | train_inner | epoch 003: 1560 / 3002 loss=2.738, ppl=6.67, wps=5787, ups=0.09, wpb=64749, bsz=128, num_updates=7516, lr=9.99479e-05, gnorm=4.29, loss_scale=2, train_wall=11, gb_free=2.8, wall=86152
2021-06-19 18:34:49 | INFO | train_inner | epoch 003: 1561 / 3002 loss=2.727, ppl=6.62, wps=5774, ups=0.09, wpb=64775, bsz=128, num_updates=7517, lr=9.99479e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=86163
2021-06-19 18:35:00 | INFO | train_inner | epoch 003: 1562 / 3002 loss=2.524, ppl=5.75, wps=5903, ups=0.09, wpb=64866, bsz=128, num_updates=7518, lr=9.99479e-05, gnorm=2.887, loss_scale=2, train_wall=11, gb_free=2.8, wall=86174
2021-06-19 18:35:11 | INFO | train_inner | epoch 003: 1563 / 3002 loss=2.666, ppl=6.35, wps=5710.9, ups=0.09, wpb=64834, bsz=128, num_updates=7519, lr=9.99478e-05, gnorm=2.423, loss_scale=2, train_wall=11, gb_free=2.8, wall=86185
2021-06-19 18:35:22 | INFO | train_inner | epoch 003: 1564 / 3002 loss=2.619, ppl=6.14, wps=5894.5, ups=0.09, wpb=64787, bsz=128, num_updates=7520, lr=9.99478e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=86196
2021-06-19 18:35:33 | INFO | train_inner | epoch 003: 1565 / 3002 loss=2.59, ppl=6.02, wps=5872.3, ups=0.09, wpb=64794, bsz=128, num_updates=7521, lr=9.99478e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=86207
2021-06-19 18:35:44 | INFO | train_inner | epoch 003: 1566 / 3002 loss=2.586, ppl=6, wps=5849.6, ups=0.09, wpb=64847, bsz=128, num_updates=7522, lr=9.99478e-05, gnorm=2.509, loss_scale=2, train_wall=11, gb_free=2.8, wall=86218
2021-06-19 18:35:56 | INFO | train_inner | epoch 003: 1567 / 3002 loss=2.772, ppl=6.83, wps=5690.9, ups=0.09, wpb=64802, bsz=128, num_updates=7523, lr=9.99478e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=86230
2021-06-19 18:36:07 | INFO | train_inner | epoch 003: 1568 / 3002 loss=2.728, ppl=6.62, wps=5755.2, ups=0.09, wpb=64778, bsz=128, num_updates=7524, lr=9.99478e-05, gnorm=2.235, loss_scale=2, train_wall=11, gb_free=2.8, wall=86241
2021-06-19 18:36:18 | INFO | train_inner | epoch 003: 1569 / 3002 loss=2.711, ppl=6.55, wps=5921.3, ups=0.09, wpb=64947, bsz=128, num_updates=7525, lr=9.99478e-05, gnorm=2.179, loss_scale=2, train_wall=10, gb_free=2.8, wall=86252
2021-06-19 18:36:29 | INFO | train_inner | epoch 003: 1570 / 3002 loss=2.601, ppl=6.07, wps=5728.1, ups=0.09, wpb=64875, bsz=128, num_updates=7526, lr=9.99478e-05, gnorm=2.264, loss_scale=2, train_wall=11, gb_free=2.8, wall=86263
2021-06-19 18:36:40 | INFO | train_inner | epoch 003: 1571 / 3002 loss=2.666, ppl=6.35, wps=5966.1, ups=0.09, wpb=64909, bsz=128, num_updates=7527, lr=9.99478e-05, gnorm=2.249, loss_scale=2, train_wall=10, gb_free=2.8, wall=86274
2021-06-19 18:36:51 | INFO | train_inner | epoch 003: 1572 / 3002 loss=2.724, ppl=6.61, wps=5821.7, ups=0.09, wpb=64772, bsz=128, num_updates=7528, lr=9.99478e-05, gnorm=2.218, loss_scale=2, train_wall=11, gb_free=2.8, wall=86285
2021-06-19 18:37:02 | INFO | train_inner | epoch 003: 1573 / 3002 loss=2.522, ppl=5.74, wps=5823.6, ups=0.09, wpb=64854, bsz=128, num_updates=7529, lr=9.99478e-05, gnorm=2.138, loss_scale=2, train_wall=11, gb_free=2.8, wall=86297
2021-06-19 18:37:13 | INFO | train_inner | epoch 003: 1574 / 3002 loss=2.612, ppl=6.11, wps=5809.1, ups=0.09, wpb=64783, bsz=128, num_updates=7530, lr=9.99478e-05, gnorm=3.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=86308
2021-06-19 18:37:25 | INFO | train_inner | epoch 003: 1575 / 3002 loss=2.699, ppl=6.49, wps=5797.7, ups=0.09, wpb=64807, bsz=128, num_updates=7531, lr=9.99477e-05, gnorm=3.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=86319
2021-06-19 18:37:36 | INFO | train_inner | epoch 003: 1576 / 3002 loss=2.647, ppl=6.26, wps=5810.8, ups=0.09, wpb=64874, bsz=128, num_updates=7532, lr=9.99477e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=86330
2021-06-19 18:37:47 | INFO | train_inner | epoch 003: 1577 / 3002 loss=2.607, ppl=6.09, wps=5828.7, ups=0.09, wpb=64840, bsz=128, num_updates=7533, lr=9.99477e-05, gnorm=2.107, loss_scale=2, train_wall=11, gb_free=2.8, wall=86341
2021-06-19 18:37:58 | INFO | train_inner | epoch 003: 1578 / 3002 loss=2.772, ppl=6.83, wps=5881.1, ups=0.09, wpb=64852, bsz=128, num_updates=7534, lr=9.99477e-05, gnorm=2.134, loss_scale=2, train_wall=11, gb_free=2.8, wall=86352
2021-06-19 18:38:09 | INFO | train_inner | epoch 003: 1579 / 3002 loss=2.742, ppl=6.69, wps=5927.2, ups=0.09, wpb=64865, bsz=128, num_updates=7535, lr=9.99477e-05, gnorm=2.263, loss_scale=2, train_wall=10, gb_free=2.8, wall=86363
2021-06-19 18:38:20 | INFO | train_inner | epoch 003: 1580 / 3002 loss=2.547, ppl=5.84, wps=5765.3, ups=0.09, wpb=64836, bsz=128, num_updates=7536, lr=9.99477e-05, gnorm=2.39, loss_scale=4, train_wall=11, gb_free=2.8, wall=86374
2021-06-19 18:38:31 | INFO | train_inner | epoch 003: 1581 / 3002 loss=2.657, ppl=6.31, wps=5905.9, ups=0.09, wpb=64886, bsz=128, num_updates=7537, lr=9.99477e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=86385
2021-06-19 18:38:42 | INFO | train_inner | epoch 003: 1582 / 3002 loss=2.609, ppl=6.1, wps=5860.1, ups=0.09, wpb=64866, bsz=128, num_updates=7538, lr=9.99477e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=86396
2021-06-19 18:38:53 | INFO | train_inner | epoch 003: 1583 / 3002 loss=2.819, ppl=7.06, wps=5817.1, ups=0.09, wpb=64725, bsz=128, num_updates=7539, lr=9.99477e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=86408
2021-06-19 18:39:04 | INFO | train_inner | epoch 003: 1584 / 3002 loss=2.69, ppl=6.46, wps=5793.5, ups=0.09, wpb=64848, bsz=128, num_updates=7540, lr=9.99477e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=86419
2021-06-19 18:39:15 | INFO | train_inner | epoch 003: 1585 / 3002 loss=2.711, ppl=6.55, wps=5870.9, ups=0.09, wpb=64722, bsz=128, num_updates=7541, lr=9.99477e-05, gnorm=2.552, loss_scale=4, train_wall=11, gb_free=2.8, wall=86430
2021-06-19 18:39:27 | INFO | train_inner | epoch 003: 1586 / 3002 loss=2.555, ppl=5.88, wps=5847.2, ups=0.09, wpb=64787, bsz=128, num_updates=7542, lr=9.99477e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=86441
2021-06-19 18:39:38 | INFO | train_inner | epoch 003: 1587 / 3002 loss=2.579, ppl=5.98, wps=5751.9, ups=0.09, wpb=64875, bsz=128, num_updates=7543, lr=9.99477e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=86452
2021-06-19 18:39:49 | INFO | train_inner | epoch 003: 1588 / 3002 loss=2.801, ppl=6.97, wps=5903.9, ups=0.09, wpb=64805, bsz=128, num_updates=7544, lr=9.99476e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=86463
2021-06-19 18:40:00 | INFO | train_inner | epoch 003: 1589 / 3002 loss=2.636, ppl=6.22, wps=5979.7, ups=0.09, wpb=64858, bsz=128, num_updates=7545, lr=9.99476e-05, gnorm=3.784, loss_scale=4, train_wall=10, gb_free=2.8, wall=86474
2021-06-19 18:40:11 | INFO | train_inner | epoch 003: 1590 / 3002 loss=2.604, ppl=6.08, wps=5793.2, ups=0.09, wpb=64875, bsz=128, num_updates=7546, lr=9.99476e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=86485
2021-06-19 18:40:22 | INFO | train_inner | epoch 003: 1591 / 3002 loss=2.529, ppl=5.77, wps=5887.8, ups=0.09, wpb=64825, bsz=128, num_updates=7547, lr=9.99476e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=86496
2021-06-19 18:40:33 | INFO | train_inner | epoch 003: 1592 / 3002 loss=2.583, ppl=5.99, wps=5923.6, ups=0.09, wpb=64788, bsz=128, num_updates=7548, lr=9.99476e-05, gnorm=2.258, loss_scale=4, train_wall=10, gb_free=2.8, wall=86507
2021-06-19 18:40:44 | INFO | train_inner | epoch 003: 1593 / 3002 loss=2.639, ppl=6.23, wps=5746.7, ups=0.09, wpb=64748, bsz=128, num_updates=7549, lr=9.99476e-05, gnorm=2.545, loss_scale=4, train_wall=11, gb_free=2.8, wall=86518
2021-06-19 18:40:55 | INFO | train_inner | epoch 003: 1594 / 3002 loss=2.626, ppl=6.17, wps=5829.4, ups=0.09, wpb=64841, bsz=128, num_updates=7550, lr=9.99476e-05, gnorm=2.157, loss_scale=4, train_wall=11, gb_free=2.8, wall=86530
2021-06-19 18:41:07 | INFO | train_inner | epoch 003: 1595 / 3002 loss=2.437, ppl=5.42, wps=5725.2, ups=0.09, wpb=64815, bsz=128, num_updates=7551, lr=9.99476e-05, gnorm=2.152, loss_scale=4, train_wall=11, gb_free=2.8, wall=86541
2021-06-19 18:41:18 | INFO | train_inner | epoch 003: 1596 / 3002 loss=2.764, ppl=6.79, wps=5840.7, ups=0.09, wpb=64740, bsz=128, num_updates=7552, lr=9.99476e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=86552
2021-06-19 18:41:29 | INFO | train_inner | epoch 003: 1597 / 3002 loss=2.657, ppl=6.31, wps=5780.6, ups=0.09, wpb=64789, bsz=128, num_updates=7553, lr=9.99476e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=86563
2021-06-19 18:41:40 | INFO | train_inner | epoch 003: 1598 / 3002 loss=2.721, ppl=6.59, wps=5854.5, ups=0.09, wpb=64803, bsz=128, num_updates=7554, lr=9.99476e-05, gnorm=2.303, loss_scale=4, train_wall=11, gb_free=2.8, wall=86574
2021-06-19 18:41:51 | INFO | train_inner | epoch 003: 1599 / 3002 loss=2.555, ppl=5.88, wps=5856.6, ups=0.09, wpb=64877, bsz=128, num_updates=7555, lr=9.99476e-05, gnorm=2.19, loss_scale=4, train_wall=11, gb_free=2.8, wall=86585
2021-06-19 18:42:02 | INFO | train_inner | epoch 003: 1600 / 3002 loss=2.583, ppl=5.99, wps=5765.5, ups=0.09, wpb=64821, bsz=128, num_updates=7556, lr=9.99475e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=86597
2021-06-19 18:42:13 | INFO | train_inner | epoch 003: 1601 / 3002 loss=2.647, ppl=6.27, wps=5839, ups=0.09, wpb=64853, bsz=128, num_updates=7557, lr=9.99475e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=86608
2021-06-19 18:42:24 | INFO | train_inner | epoch 003: 1602 / 3002 loss=2.685, ppl=6.43, wps=5891.1, ups=0.09, wpb=64846, bsz=128, num_updates=7558, lr=9.99475e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=86619
2021-06-19 18:42:36 | INFO | train_inner | epoch 003: 1603 / 3002 loss=2.656, ppl=6.3, wps=5779.1, ups=0.09, wpb=64824, bsz=128, num_updates=7559, lr=9.99475e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=86630
2021-06-19 18:42:47 | INFO | train_inner | epoch 003: 1604 / 3002 loss=2.608, ppl=6.1, wps=5811.4, ups=0.09, wpb=64864, bsz=128, num_updates=7560, lr=9.99475e-05, gnorm=2.747, loss_scale=4, train_wall=11, gb_free=2.8, wall=86641
2021-06-19 18:42:58 | INFO | train_inner | epoch 003: 1605 / 3002 loss=2.664, ppl=6.34, wps=5719.6, ups=0.09, wpb=64765, bsz=128, num_updates=7561, lr=9.99475e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=86652
2021-06-19 18:43:09 | INFO | train_inner | epoch 003: 1606 / 3002 loss=2.655, ppl=6.3, wps=5875.6, ups=0.09, wpb=64910, bsz=128, num_updates=7562, lr=9.99475e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=86663
2021-06-19 18:43:20 | INFO | train_inner | epoch 003: 1607 / 3002 loss=2.62, ppl=6.15, wps=5926.2, ups=0.09, wpb=64912, bsz=128, num_updates=7563, lr=9.99475e-05, gnorm=2.117, loss_scale=4, train_wall=11, gb_free=2.8, wall=86674
2021-06-19 18:43:31 | INFO | train_inner | epoch 003: 1608 / 3002 loss=2.501, ppl=5.66, wps=5909.2, ups=0.09, wpb=64849, bsz=128, num_updates=7564, lr=9.99475e-05, gnorm=2.129, loss_scale=4, train_wall=11, gb_free=2.8, wall=86685
2021-06-19 18:43:42 | INFO | train_inner | epoch 003: 1609 / 3002 loss=2.416, ppl=5.34, wps=5750, ups=0.09, wpb=64849, bsz=128, num_updates=7565, lr=9.99475e-05, gnorm=2.471, loss_scale=4, train_wall=11, gb_free=2.8, wall=86697
2021-06-19 18:43:53 | INFO | train_inner | epoch 003: 1610 / 3002 loss=2.705, ppl=6.52, wps=5777.4, ups=0.09, wpb=64820, bsz=128, num_updates=7566, lr=9.99475e-05, gnorm=2.087, loss_scale=4, train_wall=11, gb_free=2.8, wall=86708
2021-06-19 18:44:05 | INFO | train_inner | epoch 003: 1611 / 3002 loss=2.767, ppl=6.8, wps=5757.4, ups=0.09, wpb=64760, bsz=128, num_updates=7567, lr=9.99475e-05, gnorm=2.291, loss_scale=4, train_wall=11, gb_free=2.8, wall=86719
2021-06-19 18:44:16 | INFO | train_inner | epoch 003: 1612 / 3002 loss=2.663, ppl=6.34, wps=5931.2, ups=0.09, wpb=64842, bsz=128, num_updates=7568, lr=9.99475e-05, gnorm=2.152, loss_scale=4, train_wall=10, gb_free=2.8, wall=86730
2021-06-19 18:44:27 | INFO | train_inner | epoch 003: 1613 / 3002 loss=2.628, ppl=6.18, wps=5848.2, ups=0.09, wpb=64791, bsz=128, num_updates=7569, lr=9.99474e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=86741
2021-06-19 18:44:38 | INFO | train_inner | epoch 003: 1614 / 3002 loss=2.461, ppl=5.5, wps=5930.8, ups=0.09, wpb=64922, bsz=128, num_updates=7570, lr=9.99474e-05, gnorm=2.038, loss_scale=4, train_wall=10, gb_free=2.8, wall=86752
2021-06-19 18:44:49 | INFO | train_inner | epoch 003: 1615 / 3002 loss=2.626, ppl=6.17, wps=5808.7, ups=0.09, wpb=64818, bsz=128, num_updates=7571, lr=9.99474e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=86763
2021-06-19 18:44:59 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 18:45:11 | INFO | train_inner | epoch 003: 1617 / 3002 loss=2.664, ppl=6.34, wps=2968, ups=0.05, wpb=64773, bsz=128, num_updates=7572, lr=9.99474e-05, gnorm=2.323, loss_scale=2, train_wall=21, gb_free=2.8, wall=86785
2021-06-19 18:45:22 | INFO | train_inner | epoch 003: 1618 / 3002 loss=2.629, ppl=6.18, wps=5808.6, ups=0.09, wpb=64803, bsz=128, num_updates=7573, lr=9.99474e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=86796
2021-06-19 18:45:33 | INFO | train_inner | epoch 003: 1619 / 3002 loss=2.639, ppl=6.23, wps=5659.8, ups=0.09, wpb=64861, bsz=128, num_updates=7574, lr=9.99474e-05, gnorm=2.19, loss_scale=2, train_wall=11, gb_free=2.8, wall=86808
2021-06-19 18:45:44 | INFO | train_inner | epoch 003: 1620 / 3002 loss=2.642, ppl=6.24, wps=5856.8, ups=0.09, wpb=64842, bsz=128, num_updates=7575, lr=9.99474e-05, gnorm=2.253, loss_scale=2, train_wall=11, gb_free=2.8, wall=86819
2021-06-19 18:45:55 | INFO | train_inner | epoch 003: 1621 / 3002 loss=2.555, ppl=5.88, wps=5832.6, ups=0.09, wpb=64870, bsz=128, num_updates=7576, lr=9.99474e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=86830
2021-06-19 18:46:06 | INFO | train_inner | epoch 003: 1622 / 3002 loss=2.758, ppl=6.77, wps=5981.3, ups=0.09, wpb=64835, bsz=128, num_updates=7577, lr=9.99474e-05, gnorm=2.245, loss_scale=2, train_wall=10, gb_free=2.8, wall=86841
2021-06-19 18:46:18 | INFO | train_inner | epoch 003: 1623 / 3002 loss=2.567, ppl=5.93, wps=5797.3, ups=0.09, wpb=64847, bsz=128, num_updates=7578, lr=9.99474e-05, gnorm=2.082, loss_scale=2, train_wall=11, gb_free=2.8, wall=86852
2021-06-19 18:46:28 | INFO | train_inner | epoch 003: 1624 / 3002 loss=2.53, ppl=5.78, wps=6070.7, ups=0.09, wpb=64894, bsz=128, num_updates=7579, lr=9.99474e-05, gnorm=2.113, loss_scale=2, train_wall=10, gb_free=2.8, wall=86863
2021-06-19 18:46:39 | INFO | train_inner | epoch 003: 1625 / 3002 loss=2.767, ppl=6.81, wps=5843.4, ups=0.09, wpb=64788, bsz=128, num_updates=7580, lr=9.99474e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=86874
2021-06-19 18:46:50 | INFO | train_inner | epoch 003: 1626 / 3002 loss=2.686, ppl=6.44, wps=5810.5, ups=0.09, wpb=64786, bsz=128, num_updates=7581, lr=9.99473e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=86885
2021-06-19 18:47:02 | INFO | train_inner | epoch 003: 1627 / 3002 loss=2.513, ppl=5.71, wps=5824.3, ups=0.09, wpb=64787, bsz=128, num_updates=7582, lr=9.99473e-05, gnorm=2.137, loss_scale=2, train_wall=11, gb_free=2.8, wall=86896
2021-06-19 18:47:13 | INFO | train_inner | epoch 003: 1628 / 3002 loss=2.785, ppl=6.89, wps=5828, ups=0.09, wpb=64854, bsz=128, num_updates=7583, lr=9.99473e-05, gnorm=3.169, loss_scale=2, train_wall=11, gb_free=2.8, wall=86907
2021-06-19 18:47:24 | INFO | train_inner | epoch 003: 1629 / 3002 loss=2.601, ppl=6.07, wps=5810, ups=0.09, wpb=64888, bsz=128, num_updates=7584, lr=9.99473e-05, gnorm=2.098, loss_scale=2, train_wall=11, gb_free=2.8, wall=86918
2021-06-19 18:47:35 | INFO | train_inner | epoch 003: 1630 / 3002 loss=2.714, ppl=6.56, wps=5800.9, ups=0.09, wpb=64698, bsz=128, num_updates=7585, lr=9.99473e-05, gnorm=2.593, loss_scale=2, train_wall=11, gb_free=2.8, wall=86929
2021-06-19 18:47:46 | INFO | train_inner | epoch 003: 1631 / 3002 loss=2.69, ppl=6.45, wps=5829, ups=0.09, wpb=64918, bsz=128, num_updates=7586, lr=9.99473e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=86940
2021-06-19 18:47:57 | INFO | train_inner | epoch 003: 1632 / 3002 loss=2.66, ppl=6.32, wps=5849.8, ups=0.09, wpb=64947, bsz=128, num_updates=7587, lr=9.99473e-05, gnorm=2.208, loss_scale=2, train_wall=11, gb_free=2.8, wall=86952
2021-06-19 18:48:08 | INFO | train_inner | epoch 003: 1633 / 3002 loss=2.644, ppl=6.25, wps=5845.5, ups=0.09, wpb=64809, bsz=128, num_updates=7588, lr=9.99473e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=86963
2021-06-19 18:48:19 | INFO | train_inner | epoch 003: 1634 / 3002 loss=2.708, ppl=6.53, wps=5896.6, ups=0.09, wpb=64794, bsz=128, num_updates=7589, lr=9.99473e-05, gnorm=2.115, loss_scale=2, train_wall=11, gb_free=2.8, wall=86974
2021-06-19 18:48:30 | INFO | train_inner | epoch 003: 1635 / 3002 loss=2.648, ppl=6.27, wps=5847.3, ups=0.09, wpb=64836, bsz=128, num_updates=7590, lr=9.99473e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=86985
2021-06-19 18:48:42 | INFO | train_inner | epoch 003: 1636 / 3002 loss=2.56, ppl=5.9, wps=5812.9, ups=0.09, wpb=64844, bsz=128, num_updates=7591, lr=9.99473e-05, gnorm=2.261, loss_scale=2, train_wall=11, gb_free=2.8, wall=86996
2021-06-19 18:48:53 | INFO | train_inner | epoch 003: 1637 / 3002 loss=2.646, ppl=6.26, wps=5811.1, ups=0.09, wpb=64809, bsz=128, num_updates=7592, lr=9.99473e-05, gnorm=2.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=87007
2021-06-19 18:49:04 | INFO | train_inner | epoch 003: 1638 / 3002 loss=2.751, ppl=6.73, wps=5817.3, ups=0.09, wpb=64824, bsz=128, num_updates=7593, lr=9.99473e-05, gnorm=2.235, loss_scale=2, train_wall=11, gb_free=2.8, wall=87018
2021-06-19 18:49:15 | INFO | train_inner | epoch 003: 1639 / 3002 loss=2.779, ppl=6.86, wps=5806.6, ups=0.09, wpb=64783, bsz=128, num_updates=7594, lr=9.99472e-05, gnorm=2.168, loss_scale=2, train_wall=11, gb_free=2.8, wall=87029
2021-06-19 18:49:26 | INFO | train_inner | epoch 003: 1640 / 3002 loss=2.877, ppl=7.34, wps=5870.9, ups=0.09, wpb=64821, bsz=128, num_updates=7595, lr=9.99472e-05, gnorm=2.921, loss_scale=2, train_wall=11, gb_free=2.8, wall=87040
2021-06-19 18:49:37 | INFO | train_inner | epoch 003: 1641 / 3002 loss=2.624, ppl=6.16, wps=5829.9, ups=0.09, wpb=64922, bsz=128, num_updates=7596, lr=9.99472e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=87052
2021-06-19 18:49:48 | INFO | train_inner | epoch 003: 1642 / 3002 loss=2.627, ppl=6.18, wps=5781.9, ups=0.09, wpb=64862, bsz=128, num_updates=7597, lr=9.99472e-05, gnorm=2.502, loss_scale=2, train_wall=11, gb_free=2.8, wall=87063
2021-06-19 18:49:59 | INFO | train_inner | epoch 003: 1643 / 3002 loss=2.702, ppl=6.5, wps=5898, ups=0.09, wpb=64853, bsz=128, num_updates=7598, lr=9.99472e-05, gnorm=2.825, loss_scale=2, train_wall=11, gb_free=2.8, wall=87074
2021-06-19 18:50:10 | INFO | train_inner | epoch 003: 1644 / 3002 loss=2.624, ppl=6.16, wps=5860, ups=0.09, wpb=64831, bsz=128, num_updates=7599, lr=9.99472e-05, gnorm=3.547, loss_scale=2, train_wall=11, gb_free=2.8, wall=87085
2021-06-19 18:50:21 | INFO | train_inner | epoch 003: 1645 / 3002 loss=2.664, ppl=6.34, wps=5894.4, ups=0.09, wpb=64833, bsz=128, num_updates=7600, lr=9.99472e-05, gnorm=8.556, loss_scale=2, train_wall=11, gb_free=2.8, wall=87096
2021-06-19 18:50:33 | INFO | train_inner | epoch 003: 1646 / 3002 loss=2.527, ppl=5.76, wps=5829.8, ups=0.09, wpb=64775, bsz=128, num_updates=7601, lr=9.99472e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=87107
2021-06-19 18:50:44 | INFO | train_inner | epoch 003: 1647 / 3002 loss=2.708, ppl=6.54, wps=5819.7, ups=0.09, wpb=64866, bsz=128, num_updates=7602, lr=9.99472e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=87118
2021-06-19 18:50:55 | INFO | train_inner | epoch 003: 1648 / 3002 loss=2.637, ppl=6.22, wps=5909.3, ups=0.09, wpb=64784, bsz=128, num_updates=7603, lr=9.99472e-05, gnorm=2.108, loss_scale=2, train_wall=11, gb_free=2.8, wall=87129
2021-06-19 18:51:06 | INFO | train_inner | epoch 003: 1649 / 3002 loss=2.65, ppl=6.28, wps=5825.2, ups=0.09, wpb=64830, bsz=128, num_updates=7604, lr=9.99472e-05, gnorm=2.419, loss_scale=2, train_wall=11, gb_free=2.8, wall=87140
2021-06-19 18:51:17 | INFO | train_inner | epoch 003: 1650 / 3002 loss=2.623, ppl=6.16, wps=5811.4, ups=0.09, wpb=64857, bsz=128, num_updates=7605, lr=9.99472e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=87151
2021-06-19 18:51:28 | INFO | train_inner | epoch 003: 1651 / 3002 loss=2.624, ppl=6.16, wps=5833.8, ups=0.09, wpb=64905, bsz=128, num_updates=7606, lr=9.99471e-05, gnorm=4.276, loss_scale=2, train_wall=11, gb_free=2.8, wall=87162
2021-06-19 18:51:39 | INFO | train_inner | epoch 003: 1652 / 3002 loss=2.71, ppl=6.54, wps=5757.6, ups=0.09, wpb=64788, bsz=128, num_updates=7607, lr=9.99471e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=87174
2021-06-19 18:51:51 | INFO | train_inner | epoch 003: 1653 / 3002 loss=2.688, ppl=6.45, wps=5789.3, ups=0.09, wpb=64791, bsz=128, num_updates=7608, lr=9.99471e-05, gnorm=2.681, loss_scale=2, train_wall=11, gb_free=2.8, wall=87185
2021-06-19 18:52:02 | INFO | train_inner | epoch 003: 1654 / 3002 loss=2.584, ppl=6, wps=5829.3, ups=0.09, wpb=64786, bsz=128, num_updates=7609, lr=9.99471e-05, gnorm=2.846, loss_scale=2, train_wall=11, gb_free=2.8, wall=87196
2021-06-19 18:52:13 | INFO | train_inner | epoch 003: 1655 / 3002 loss=2.713, ppl=6.56, wps=5908.2, ups=0.09, wpb=64845, bsz=128, num_updates=7610, lr=9.99471e-05, gnorm=2.157, loss_scale=2, train_wall=10, gb_free=2.8, wall=87207
2021-06-19 18:52:24 | INFO | train_inner | epoch 003: 1656 / 3002 loss=2.701, ppl=6.5, wps=5844.4, ups=0.09, wpb=64891, bsz=128, num_updates=7611, lr=9.99471e-05, gnorm=8.015, loss_scale=2, train_wall=11, gb_free=2.8, wall=87218
2021-06-19 18:52:35 | INFO | train_inner | epoch 003: 1657 / 3002 loss=2.534, ppl=5.79, wps=5810.8, ups=0.09, wpb=64769, bsz=128, num_updates=7612, lr=9.99471e-05, gnorm=2.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=87229
2021-06-19 18:52:46 | INFO | train_inner | epoch 003: 1658 / 3002 loss=2.518, ppl=5.73, wps=5786.6, ups=0.09, wpb=64897, bsz=128, num_updates=7613, lr=9.99471e-05, gnorm=4.319, loss_scale=2, train_wall=11, gb_free=2.8, wall=87240
2021-06-19 18:52:57 | INFO | train_inner | epoch 003: 1659 / 3002 loss=2.652, ppl=6.29, wps=5861, ups=0.09, wpb=64838, bsz=128, num_updates=7614, lr=9.99471e-05, gnorm=2.149, loss_scale=2, train_wall=11, gb_free=2.8, wall=87252
2021-06-19 18:53:09 | INFO | train_inner | epoch 003: 1660 / 3002 loss=2.628, ppl=6.18, wps=5699.4, ups=0.09, wpb=64851, bsz=128, num_updates=7615, lr=9.99471e-05, gnorm=2.171, loss_scale=2, train_wall=11, gb_free=2.8, wall=87263
2021-06-19 18:53:20 | INFO | train_inner | epoch 003: 1661 / 3002 loss=2.566, ppl=5.92, wps=5843.1, ups=0.09, wpb=64836, bsz=128, num_updates=7616, lr=9.99471e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=87274
2021-06-19 18:53:31 | INFO | train_inner | epoch 003: 1662 / 3002 loss=2.571, ppl=5.94, wps=5903.3, ups=0.09, wpb=64898, bsz=128, num_updates=7617, lr=9.99471e-05, gnorm=18.783, loss_scale=2, train_wall=11, gb_free=2.8, wall=87285
2021-06-19 18:53:42 | INFO | train_inner | epoch 003: 1663 / 3002 loss=2.628, ppl=6.18, wps=5734.5, ups=0.09, wpb=64799, bsz=128, num_updates=7618, lr=9.99471e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=87296
2021-06-19 18:53:53 | INFO | train_inner | epoch 003: 1664 / 3002 loss=2.597, ppl=6.05, wps=5835, ups=0.09, wpb=64847, bsz=128, num_updates=7619, lr=9.9947e-05, gnorm=2.841, loss_scale=2, train_wall=11, gb_free=2.8, wall=87307
2021-06-19 18:54:04 | INFO | train_inner | epoch 003: 1665 / 3002 loss=2.619, ppl=6.14, wps=5847.3, ups=0.09, wpb=64881, bsz=128, num_updates=7620, lr=9.9947e-05, gnorm=2.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=87319
2021-06-19 18:54:15 | INFO | train_inner | epoch 003: 1666 / 3002 loss=2.667, ppl=6.35, wps=6002.2, ups=0.09, wpb=64844, bsz=128, num_updates=7621, lr=9.9947e-05, gnorm=2.487, loss_scale=2, train_wall=10, gb_free=2.8, wall=87329
2021-06-19 18:54:26 | INFO | train_inner | epoch 003: 1667 / 3002 loss=2.487, ppl=5.61, wps=5925.7, ups=0.09, wpb=64890, bsz=128, num_updates=7622, lr=9.9947e-05, gnorm=2.466, loss_scale=2, train_wall=10, gb_free=2.8, wall=87340
2021-06-19 18:54:37 | INFO | train_inner | epoch 003: 1668 / 3002 loss=2.631, ppl=6.19, wps=5969.8, ups=0.09, wpb=64795, bsz=128, num_updates=7623, lr=9.9947e-05, gnorm=2.179, loss_scale=2, train_wall=10, gb_free=2.8, wall=87351
2021-06-19 18:54:48 | INFO | train_inner | epoch 003: 1669 / 3002 loss=2.692, ppl=6.46, wps=5948.3, ups=0.09, wpb=64794, bsz=128, num_updates=7624, lr=9.9947e-05, gnorm=2.266, loss_scale=2, train_wall=10, gb_free=2.8, wall=87362
2021-06-19 18:54:59 | INFO | train_inner | epoch 003: 1670 / 3002 loss=2.612, ppl=6.11, wps=5850.9, ups=0.09, wpb=64826, bsz=128, num_updates=7625, lr=9.9947e-05, gnorm=2.129, loss_scale=2, train_wall=11, gb_free=2.8, wall=87373
2021-06-19 18:55:10 | INFO | train_inner | epoch 003: 1671 / 3002 loss=2.867, ppl=7.3, wps=5717.7, ups=0.09, wpb=64838, bsz=128, num_updates=7626, lr=9.9947e-05, gnorm=2.212, loss_scale=2, train_wall=11, gb_free=2.8, wall=87384
2021-06-19 18:55:21 | INFO | train_inner | epoch 003: 1672 / 3002 loss=2.683, ppl=6.42, wps=5874, ups=0.09, wpb=64819, bsz=128, num_updates=7627, lr=9.9947e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=87395
2021-06-19 18:55:32 | INFO | train_inner | epoch 003: 1673 / 3002 loss=2.694, ppl=6.47, wps=5892.7, ups=0.09, wpb=64897, bsz=128, num_updates=7628, lr=9.9947e-05, gnorm=3.795, loss_scale=2, train_wall=11, gb_free=2.8, wall=87406
2021-06-19 18:55:43 | INFO | train_inner | epoch 003: 1674 / 3002 loss=2.682, ppl=6.42, wps=5839.5, ups=0.09, wpb=64807, bsz=128, num_updates=7629, lr=9.9947e-05, gnorm=2.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=87418
2021-06-19 18:55:54 | INFO | train_inner | epoch 003: 1675 / 3002 loss=2.742, ppl=6.69, wps=5898.6, ups=0.09, wpb=64851, bsz=128, num_updates=7630, lr=9.9947e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=87429
2021-06-19 18:56:05 | INFO | train_inner | epoch 003: 1676 / 3002 loss=2.678, ppl=6.4, wps=5805.2, ups=0.09, wpb=64813, bsz=128, num_updates=7631, lr=9.99469e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=87440
2021-06-19 18:56:16 | INFO | train_inner | epoch 003: 1677 / 3002 loss=2.715, ppl=6.57, wps=5848.2, ups=0.09, wpb=64706, bsz=128, num_updates=7632, lr=9.99469e-05, gnorm=2.786, loss_scale=2, train_wall=11, gb_free=2.8, wall=87451
2021-06-19 18:56:28 | INFO | train_inner | epoch 003: 1678 / 3002 loss=2.671, ppl=6.37, wps=5838, ups=0.09, wpb=64880, bsz=128, num_updates=7633, lr=9.99469e-05, gnorm=2.381, loss_scale=2, train_wall=11, gb_free=2.8, wall=87462
2021-06-19 18:56:39 | INFO | train_inner | epoch 003: 1679 / 3002 loss=2.589, ppl=6.02, wps=5918.3, ups=0.09, wpb=64805, bsz=128, num_updates=7634, lr=9.99469e-05, gnorm=2.242, loss_scale=2, train_wall=10, gb_free=2.8, wall=87473
2021-06-19 18:56:50 | INFO | train_inner | epoch 003: 1680 / 3002 loss=2.778, ppl=6.86, wps=5828.9, ups=0.09, wpb=64777, bsz=128, num_updates=7635, lr=9.99469e-05, gnorm=2.437, loss_scale=2, train_wall=11, gb_free=2.8, wall=87484
2021-06-19 18:57:01 | INFO | train_inner | epoch 003: 1681 / 3002 loss=2.587, ppl=6.01, wps=5812.6, ups=0.09, wpb=64822, bsz=128, num_updates=7636, lr=9.99469e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=87495
2021-06-19 18:57:12 | INFO | train_inner | epoch 003: 1682 / 3002 loss=2.692, ppl=6.46, wps=5903.6, ups=0.09, wpb=64923, bsz=128, num_updates=7637, lr=9.99469e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=87506
2021-06-19 18:57:23 | INFO | train_inner | epoch 003: 1683 / 3002 loss=2.533, ppl=5.79, wps=5899.9, ups=0.09, wpb=64909, bsz=128, num_updates=7638, lr=9.99469e-05, gnorm=3.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=87517
2021-06-19 18:57:34 | INFO | train_inner | epoch 003: 1684 / 3002 loss=2.732, ppl=6.64, wps=5821.5, ups=0.09, wpb=64745, bsz=128, num_updates=7639, lr=9.99469e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=87528
2021-06-19 18:57:45 | INFO | train_inner | epoch 003: 1685 / 3002 loss=2.549, ppl=5.85, wps=5856.4, ups=0.09, wpb=64793, bsz=128, num_updates=7640, lr=9.99469e-05, gnorm=2.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=87539
2021-06-19 18:57:56 | INFO | train_inner | epoch 003: 1686 / 3002 loss=2.598, ppl=6.06, wps=5851.3, ups=0.09, wpb=64883, bsz=128, num_updates=7641, lr=9.99469e-05, gnorm=2.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=87550
2021-06-19 18:58:07 | INFO | train_inner | epoch 003: 1687 / 3002 loss=2.678, ppl=6.4, wps=5873.2, ups=0.09, wpb=64714, bsz=128, num_updates=7642, lr=9.99469e-05, gnorm=2.217, loss_scale=2, train_wall=11, gb_free=2.8, wall=87561
2021-06-19 18:58:18 | INFO | train_inner | epoch 003: 1688 / 3002 loss=2.655, ppl=6.3, wps=5784.1, ups=0.09, wpb=64801, bsz=128, num_updates=7643, lr=9.99469e-05, gnorm=7.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=87573
2021-06-19 18:58:29 | INFO | train_inner | epoch 003: 1689 / 3002 loss=2.538, ppl=5.81, wps=5843.6, ups=0.09, wpb=64865, bsz=128, num_updates=7644, lr=9.99468e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=87584
2021-06-19 18:58:40 | INFO | train_inner | epoch 003: 1690 / 3002 loss=2.654, ppl=6.29, wps=5885.2, ups=0.09, wpb=64923, bsz=128, num_updates=7645, lr=9.99468e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=87595
2021-06-19 18:58:52 | INFO | train_inner | epoch 003: 1691 / 3002 loss=2.747, ppl=6.71, wps=5744.6, ups=0.09, wpb=64761, bsz=128, num_updates=7646, lr=9.99468e-05, gnorm=5.714, loss_scale=2, train_wall=11, gb_free=2.8, wall=87606
2021-06-19 18:59:03 | INFO | train_inner | epoch 003: 1692 / 3002 loss=2.785, ppl=6.89, wps=5820.8, ups=0.09, wpb=64828, bsz=128, num_updates=7647, lr=9.99468e-05, gnorm=2.078, loss_scale=2, train_wall=11, gb_free=2.8, wall=87617
2021-06-19 18:59:14 | INFO | train_inner | epoch 003: 1693 / 3002 loss=2.674, ppl=6.38, wps=5888.4, ups=0.09, wpb=64862, bsz=128, num_updates=7648, lr=9.99468e-05, gnorm=3.773, loss_scale=2, train_wall=11, gb_free=2.8, wall=87628
2021-06-19 18:59:25 | INFO | train_inner | epoch 003: 1694 / 3002 loss=2.615, ppl=6.13, wps=5885.9, ups=0.09, wpb=64880, bsz=128, num_updates=7649, lr=9.99468e-05, gnorm=2.309, loss_scale=2, train_wall=11, gb_free=2.8, wall=87639
2021-06-19 18:59:36 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-19 18:59:47 | INFO | train_inner | epoch 003: 1696 / 3002 loss=2.618, ppl=6.14, wps=2896.6, ups=0.04, wpb=64723, bsz=128, num_updates=7650, lr=9.99468e-05, gnorm=6.973, loss_scale=1, train_wall=21, gb_free=2.8, wall=87662
2021-06-19 18:59:58 | INFO | train_inner | epoch 003: 1697 / 3002 loss=2.637, ppl=6.22, wps=5836.4, ups=0.09, wpb=64872, bsz=128, num_updates=7651, lr=9.99468e-05, gnorm=2.387, loss_scale=1, train_wall=11, gb_free=2.8, wall=87673
2021-06-19 19:00:09 | INFO | train_inner | epoch 003: 1698 / 3002 loss=2.693, ppl=6.47, wps=5935.3, ups=0.09, wpb=64799, bsz=128, num_updates=7652, lr=9.99468e-05, gnorm=2.801, loss_scale=1, train_wall=10, gb_free=2.8, wall=87684
2021-06-19 19:00:20 | INFO | train_inner | epoch 003: 1699 / 3002 loss=2.537, ppl=5.81, wps=5876.3, ups=0.09, wpb=64822, bsz=128, num_updates=7653, lr=9.99468e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=87695
2021-06-19 19:00:31 | INFO | train_inner | epoch 003: 1700 / 3002 loss=2.681, ppl=6.41, wps=5914.7, ups=0.09, wpb=64875, bsz=128, num_updates=7654, lr=9.99468e-05, gnorm=2.253, loss_scale=1, train_wall=11, gb_free=2.8, wall=87706
2021-06-19 19:00:42 | INFO | train_inner | epoch 003: 1701 / 3002 loss=2.598, ppl=6.06, wps=5829.9, ups=0.09, wpb=64812, bsz=128, num_updates=7655, lr=9.99468e-05, gnorm=2.201, loss_scale=1, train_wall=11, gb_free=2.8, wall=87717
2021-06-19 19:00:54 | INFO | train_inner | epoch 003: 1702 / 3002 loss=2.771, ppl=6.82, wps=5789.9, ups=0.09, wpb=64761, bsz=128, num_updates=7656, lr=9.99467e-05, gnorm=2.39, loss_scale=1, train_wall=11, gb_free=2.8, wall=87728
2021-06-19 19:01:05 | INFO | train_inner | epoch 003: 1703 / 3002 loss=2.694, ppl=6.47, wps=5830, ups=0.09, wpb=64922, bsz=128, num_updates=7657, lr=9.99467e-05, gnorm=2.448, loss_scale=1, train_wall=11, gb_free=2.8, wall=87739
2021-06-19 19:01:16 | INFO | train_inner | epoch 003: 1704 / 3002 loss=2.717, ppl=6.57, wps=5768.9, ups=0.09, wpb=64847, bsz=128, num_updates=7658, lr=9.99467e-05, gnorm=3.489, loss_scale=1, train_wall=11, gb_free=2.8, wall=87750
2021-06-19 19:01:27 | INFO | train_inner | epoch 003: 1705 / 3002 loss=2.611, ppl=6.11, wps=5668.9, ups=0.09, wpb=64767, bsz=128, num_updates=7659, lr=9.99467e-05, gnorm=2.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=87762
2021-06-19 19:01:38 | INFO | train_inner | epoch 003: 1706 / 3002 loss=2.666, ppl=6.35, wps=5891, ups=0.09, wpb=64874, bsz=128, num_updates=7660, lr=9.99467e-05, gnorm=3.019, loss_scale=1, train_wall=11, gb_free=2.8, wall=87773
2021-06-19 19:01:49 | INFO | train_inner | epoch 003: 1707 / 3002 loss=2.808, ppl=7, wps=5898.1, ups=0.09, wpb=64885, bsz=128, num_updates=7661, lr=9.99467e-05, gnorm=2.551, loss_scale=1, train_wall=11, gb_free=2.8, wall=87784
2021-06-19 19:02:00 | INFO | train_inner | epoch 003: 1708 / 3002 loss=2.808, ppl=7, wps=5984.1, ups=0.09, wpb=64877, bsz=128, num_updates=7662, lr=9.99467e-05, gnorm=2.319, loss_scale=1, train_wall=10, gb_free=2.8, wall=87795
2021-06-19 19:02:11 | INFO | train_inner | epoch 003: 1709 / 3002 loss=2.535, ppl=5.79, wps=5859.2, ups=0.09, wpb=64814, bsz=128, num_updates=7663, lr=9.99467e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=87806
2021-06-19 19:02:22 | INFO | train_inner | epoch 003: 1710 / 3002 loss=2.64, ppl=6.23, wps=5821.1, ups=0.09, wpb=64814, bsz=128, num_updates=7664, lr=9.99467e-05, gnorm=2.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=87817
2021-06-19 19:02:34 | INFO | train_inner | epoch 003: 1711 / 3002 loss=2.711, ppl=6.55, wps=5761, ups=0.09, wpb=64880, bsz=128, num_updates=7665, lr=9.99467e-05, gnorm=3.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=87828
2021-06-19 19:02:45 | INFO | train_inner | epoch 003: 1712 / 3002 loss=2.668, ppl=6.36, wps=5847.9, ups=0.09, wpb=64958, bsz=128, num_updates=7666, lr=9.99467e-05, gnorm=2.471, loss_scale=1, train_wall=11, gb_free=2.8, wall=87839
2021-06-19 19:02:56 | INFO | train_inner | epoch 003: 1713 / 3002 loss=2.501, ppl=5.66, wps=6005.5, ups=0.09, wpb=64851, bsz=128, num_updates=7667, lr=9.99467e-05, gnorm=2.541, loss_scale=1, train_wall=10, gb_free=2.8, wall=87850
2021-06-19 19:03:06 | INFO | train_inner | epoch 003: 1714 / 3002 loss=2.603, ppl=6.07, wps=6011.9, ups=0.09, wpb=64726, bsz=128, num_updates=7668, lr=9.99467e-05, gnorm=3.316, loss_scale=1, train_wall=10, gb_free=2.8, wall=87861
2021-06-19 19:03:17 | INFO | train_inner | epoch 003: 1715 / 3002 loss=2.54, ppl=5.82, wps=5855.9, ups=0.09, wpb=64805, bsz=128, num_updates=7669, lr=9.99466e-05, gnorm=2.213, loss_scale=1, train_wall=11, gb_free=2.8, wall=87872
2021-06-19 19:03:28 | INFO | train_inner | epoch 003: 1716 / 3002 loss=2.631, ppl=6.19, wps=5989.7, ups=0.09, wpb=64948, bsz=128, num_updates=7670, lr=9.99466e-05, gnorm=2.181, loss_scale=1, train_wall=10, gb_free=2.8, wall=87883
2021-06-19 19:03:39 | INFO | train_inner | epoch 003: 1717 / 3002 loss=2.648, ppl=6.27, wps=5861.8, ups=0.09, wpb=64797, bsz=128, num_updates=7671, lr=9.99466e-05, gnorm=2.47, loss_scale=1, train_wall=11, gb_free=2.8, wall=87894
2021-06-19 19:03:50 | INFO | train_inner | epoch 003: 1718 / 3002 loss=2.522, ppl=5.74, wps=5874.4, ups=0.09, wpb=64872, bsz=128, num_updates=7672, lr=9.99466e-05, gnorm=2.213, loss_scale=1, train_wall=11, gb_free=2.8, wall=87905
2021-06-19 19:04:02 | INFO | train_inner | epoch 003: 1719 / 3002 loss=2.673, ppl=6.38, wps=5803.6, ups=0.09, wpb=64927, bsz=128, num_updates=7673, lr=9.99466e-05, gnorm=2.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=87916
2021-06-19 19:04:12 | INFO | train_inner | epoch 003: 1720 / 3002 loss=2.715, ppl=6.57, wps=5922.5, ups=0.09, wpb=64817, bsz=128, num_updates=7674, lr=9.99466e-05, gnorm=3.086, loss_scale=1, train_wall=10, gb_free=2.8, wall=87927
2021-06-19 19:04:24 | INFO | train_inner | epoch 003: 1721 / 3002 loss=2.68, ppl=6.41, wps=5786.2, ups=0.09, wpb=64840, bsz=128, num_updates=7675, lr=9.99466e-05, gnorm=2.646, loss_scale=1, train_wall=11, gb_free=2.8, wall=87938
2021-06-19 19:04:35 | INFO | train_inner | epoch 003: 1722 / 3002 loss=2.727, ppl=6.62, wps=5776.2, ups=0.09, wpb=64760, bsz=128, num_updates=7676, lr=9.99466e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=87949
2021-06-19 19:04:46 | INFO | train_inner | epoch 003: 1723 / 3002 loss=2.588, ppl=6.01, wps=5759.7, ups=0.09, wpb=64867, bsz=128, num_updates=7677, lr=9.99466e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=87961
2021-06-19 19:04:57 | INFO | train_inner | epoch 003: 1724 / 3002 loss=2.815, ppl=7.04, wps=5792, ups=0.09, wpb=64828, bsz=128, num_updates=7678, lr=9.99466e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=87972
2021-06-19 19:05:08 | INFO | train_inner | epoch 003: 1725 / 3002 loss=2.616, ppl=6.13, wps=5984.3, ups=0.09, wpb=64856, bsz=128, num_updates=7679, lr=9.99466e-05, gnorm=3.459, loss_scale=1, train_wall=10, gb_free=2.8, wall=87983
2021-06-19 19:05:19 | INFO | train_inner | epoch 003: 1726 / 3002 loss=2.588, ppl=6.01, wps=5884.4, ups=0.09, wpb=64889, bsz=128, num_updates=7680, lr=9.99466e-05, gnorm=2.157, loss_scale=1, train_wall=11, gb_free=2.8, wall=87994
2021-06-19 19:05:30 | INFO | train_inner | epoch 003: 1727 / 3002 loss=2.695, ppl=6.48, wps=6072.4, ups=0.09, wpb=64938, bsz=128, num_updates=7681, lr=9.99465e-05, gnorm=2.18, loss_scale=1, train_wall=10, gb_free=2.8, wall=88004
2021-06-19 19:05:41 | INFO | train_inner | epoch 003: 1728 / 3002 loss=2.577, ppl=5.97, wps=5899.1, ups=0.09, wpb=64812, bsz=128, num_updates=7682, lr=9.99465e-05, gnorm=2.136, loss_scale=1, train_wall=11, gb_free=2.8, wall=88015
2021-06-19 19:05:52 | INFO | train_inner | epoch 003: 1729 / 3002 loss=2.714, ppl=6.56, wps=5885.3, ups=0.09, wpb=64889, bsz=128, num_updates=7683, lr=9.99465e-05, gnorm=2.229, loss_scale=1, train_wall=11, gb_free=2.8, wall=88026
2021-06-19 19:06:03 | INFO | train_inner | epoch 003: 1730 / 3002 loss=2.744, ppl=6.7, wps=5810.9, ups=0.09, wpb=64827, bsz=128, num_updates=7684, lr=9.99465e-05, gnorm=5.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=88037
2021-06-19 19:06:14 | INFO | train_inner | epoch 003: 1731 / 3002 loss=2.776, ppl=6.85, wps=5929.5, ups=0.09, wpb=64746, bsz=128, num_updates=7685, lr=9.99465e-05, gnorm=2.105, loss_scale=1, train_wall=10, gb_free=2.8, wall=88048
2021-06-19 19:06:25 | INFO | train_inner | epoch 003: 1732 / 3002 loss=2.663, ppl=6.34, wps=5771.4, ups=0.09, wpb=64799, bsz=128, num_updates=7686, lr=9.99465e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=88060
2021-06-19 19:06:36 | INFO | train_inner | epoch 003: 1733 / 3002 loss=2.579, ppl=5.98, wps=5858.7, ups=0.09, wpb=64882, bsz=128, num_updates=7687, lr=9.99465e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=88071
2021-06-19 19:06:47 | INFO | train_inner | epoch 003: 1734 / 3002 loss=2.833, ppl=7.13, wps=5889, ups=0.09, wpb=64800, bsz=128, num_updates=7688, lr=9.99465e-05, gnorm=2.224, loss_scale=1, train_wall=11, gb_free=2.8, wall=88082
2021-06-19 19:06:58 | INFO | train_inner | epoch 003: 1735 / 3002 loss=2.63, ppl=6.19, wps=5846.9, ups=0.09, wpb=64798, bsz=128, num_updates=7689, lr=9.99465e-05, gnorm=2.285, loss_scale=1, train_wall=11, gb_free=2.8, wall=88093
2021-06-19 19:07:09 | INFO | train_inner | epoch 003: 1736 / 3002 loss=2.552, ppl=5.86, wps=5980.2, ups=0.09, wpb=64764, bsz=128, num_updates=7690, lr=9.99465e-05, gnorm=2.314, loss_scale=1, train_wall=10, gb_free=2.8, wall=88104
2021-06-19 19:07:20 | INFO | train_inner | epoch 003: 1737 / 3002 loss=2.672, ppl=6.38, wps=5803.4, ups=0.09, wpb=64893, bsz=128, num_updates=7691, lr=9.99465e-05, gnorm=4.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=88115
2021-06-19 19:07:32 | INFO | train_inner | epoch 003: 1738 / 3002 loss=2.744, ppl=6.7, wps=5830.6, ups=0.09, wpb=64786, bsz=128, num_updates=7692, lr=9.99465e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=88126
2021-06-19 19:07:43 | INFO | train_inner | epoch 003: 1739 / 3002 loss=2.53, ppl=5.77, wps=5839.5, ups=0.09, wpb=64771, bsz=128, num_updates=7693, lr=9.99465e-05, gnorm=2.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=88137
2021-06-19 19:07:54 | INFO | train_inner | epoch 003: 1740 / 3002 loss=2.548, ppl=5.85, wps=5688.3, ups=0.09, wpb=64825, bsz=128, num_updates=7694, lr=9.99464e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=88148
2021-06-19 19:08:05 | INFO | train_inner | epoch 003: 1741 / 3002 loss=2.753, ppl=6.74, wps=5938.8, ups=0.09, wpb=64763, bsz=128, num_updates=7695, lr=9.99464e-05, gnorm=2.141, loss_scale=1, train_wall=10, gb_free=2.8, wall=88159
2021-06-19 19:08:16 | INFO | train_inner | epoch 003: 1742 / 3002 loss=2.748, ppl=6.72, wps=5834.4, ups=0.09, wpb=64743, bsz=128, num_updates=7696, lr=9.99464e-05, gnorm=2.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=88170
2021-06-19 19:08:27 | INFO | train_inner | epoch 003: 1743 / 3002 loss=2.81, ppl=7.01, wps=5796.7, ups=0.09, wpb=64802, bsz=128, num_updates=7697, lr=9.99464e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=88182
2021-06-19 19:08:38 | INFO | train_inner | epoch 003: 1744 / 3002 loss=2.581, ppl=5.98, wps=5993.3, ups=0.09, wpb=64912, bsz=128, num_updates=7698, lr=9.99464e-05, gnorm=2.035, loss_scale=1, train_wall=10, gb_free=2.8, wall=88192
2021-06-19 19:08:49 | INFO | train_inner | epoch 003: 1745 / 3002 loss=2.706, ppl=6.52, wps=5900.8, ups=0.09, wpb=64751, bsz=128, num_updates=7699, lr=9.99464e-05, gnorm=5.684, loss_scale=1, train_wall=11, gb_free=2.8, wall=88203
2021-06-19 19:09:00 | INFO | train_inner | epoch 003: 1746 / 3002 loss=2.715, ppl=6.57, wps=5891, ups=0.09, wpb=64863, bsz=128, num_updates=7700, lr=9.99464e-05, gnorm=2.245, loss_scale=1, train_wall=11, gb_free=2.8, wall=88214
2021-06-19 19:09:11 | INFO | train_inner | epoch 003: 1747 / 3002 loss=2.593, ppl=6.03, wps=5813.9, ups=0.09, wpb=64883, bsz=128, num_updates=7701, lr=9.99464e-05, gnorm=2.375, loss_scale=1, train_wall=11, gb_free=2.8, wall=88226
2021-06-19 19:09:22 | INFO | train_inner | epoch 003: 1748 / 3002 loss=2.571, ppl=5.94, wps=5967.4, ups=0.09, wpb=64889, bsz=128, num_updates=7702, lr=9.99464e-05, gnorm=3.187, loss_scale=1, train_wall=10, gb_free=2.8, wall=88236
2021-06-19 19:09:33 | INFO | train_inner | epoch 003: 1749 / 3002 loss=2.662, ppl=6.33, wps=5775.5, ups=0.09, wpb=64827, bsz=128, num_updates=7703, lr=9.99464e-05, gnorm=2.352, loss_scale=1, train_wall=11, gb_free=2.8, wall=88248
2021-06-19 19:09:44 | INFO | train_inner | epoch 003: 1750 / 3002 loss=2.7, ppl=6.5, wps=5865.2, ups=0.09, wpb=64880, bsz=128, num_updates=7704, lr=9.99464e-05, gnorm=2.302, loss_scale=1, train_wall=11, gb_free=2.8, wall=88259
2021-06-19 19:09:56 | INFO | train_inner | epoch 003: 1751 / 3002 loss=2.666, ppl=6.34, wps=5731.6, ups=0.09, wpb=64941, bsz=128, num_updates=7705, lr=9.99464e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=88270
2021-06-19 19:10:07 | INFO | train_inner | epoch 003: 1752 / 3002 loss=2.782, ppl=6.88, wps=5803.9, ups=0.09, wpb=64782, bsz=128, num_updates=7706, lr=9.99463e-05, gnorm=3.387, loss_scale=1, train_wall=11, gb_free=2.8, wall=88281
2021-06-19 19:10:18 | INFO | train_inner | epoch 003: 1753 / 3002 loss=2.571, ppl=5.94, wps=5847.6, ups=0.09, wpb=64832, bsz=128, num_updates=7707, lr=9.99463e-05, gnorm=2.159, loss_scale=1, train_wall=11, gb_free=2.8, wall=88292
2021-06-19 19:10:29 | INFO | train_inner | epoch 003: 1754 / 3002 loss=2.583, ppl=5.99, wps=5823, ups=0.09, wpb=64828, bsz=128, num_updates=7708, lr=9.99463e-05, gnorm=2.289, loss_scale=1, train_wall=11, gb_free=2.8, wall=88303
2021-06-19 19:10:40 | INFO | train_inner | epoch 003: 1755 / 3002 loss=2.617, ppl=6.13, wps=6008, ups=0.09, wpb=64829, bsz=128, num_updates=7709, lr=9.99463e-05, gnorm=2.997, loss_scale=1, train_wall=10, gb_free=2.8, wall=88314
2021-06-19 19:10:51 | INFO | train_inner | epoch 003: 1756 / 3002 loss=2.599, ppl=6.06, wps=5877.8, ups=0.09, wpb=64915, bsz=128, num_updates=7710, lr=9.99463e-05, gnorm=2.122, loss_scale=1, train_wall=11, gb_free=2.8, wall=88325
2021-06-19 19:11:02 | INFO | train_inner | epoch 003: 1757 / 3002 loss=2.56, ppl=5.9, wps=5775.5, ups=0.09, wpb=64770, bsz=128, num_updates=7711, lr=9.99463e-05, gnorm=2.657, loss_scale=1, train_wall=11, gb_free=2.8, wall=88336
2021-06-19 19:11:13 | INFO | train_inner | epoch 003: 1758 / 3002 loss=2.466, ppl=5.53, wps=5769.3, ups=0.09, wpb=64889, bsz=128, num_updates=7712, lr=9.99463e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=88348
2021-06-19 19:11:25 | INFO | train_inner | epoch 003: 1759 / 3002 loss=2.665, ppl=6.34, wps=5793.4, ups=0.09, wpb=64787, bsz=128, num_updates=7713, lr=9.99463e-05, gnorm=3.741, loss_scale=1, train_wall=11, gb_free=2.8, wall=88359
2021-06-19 19:11:36 | INFO | train_inner | epoch 003: 1760 / 3002 loss=2.816, ppl=7.04, wps=5834.6, ups=0.09, wpb=64756, bsz=128, num_updates=7714, lr=9.99463e-05, gnorm=2.289, loss_scale=1, train_wall=11, gb_free=2.8, wall=88370
2021-06-19 19:11:47 | INFO | train_inner | epoch 003: 1761 / 3002 loss=2.716, ppl=6.57, wps=5709.9, ups=0.09, wpb=64844, bsz=128, num_updates=7715, lr=9.99463e-05, gnorm=2.425, loss_scale=1, train_wall=11, gb_free=2.8, wall=88381
2021-06-19 19:11:58 | INFO | train_inner | epoch 003: 1762 / 3002 loss=2.48, ppl=5.58, wps=5895.4, ups=0.09, wpb=64738, bsz=128, num_updates=7716, lr=9.99463e-05, gnorm=2.242, loss_scale=1, train_wall=11, gb_free=2.8, wall=88392
2021-06-19 19:12:09 | INFO | train_inner | epoch 003: 1763 / 3002 loss=2.734, ppl=6.65, wps=5956.3, ups=0.09, wpb=64877, bsz=128, num_updates=7717, lr=9.99463e-05, gnorm=2.343, loss_scale=1, train_wall=10, gb_free=2.8, wall=88403
2021-06-19 19:12:20 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-19 19:12:31 | INFO | train_inner | epoch 003: 1765 / 3002 loss=2.757, ppl=6.76, wps=2924, ups=0.05, wpb=64761, bsz=128, num_updates=7718, lr=9.99463e-05, gnorm=2.227, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=88425
2021-06-19 19:12:42 | INFO | train_inner | epoch 003: 1766 / 3002 loss=2.673, ppl=6.38, wps=5733.3, ups=0.09, wpb=64818, bsz=128, num_updates=7719, lr=9.99462e-05, gnorm=2.106, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88437
2021-06-19 19:12:54 | INFO | train_inner | epoch 003: 1767 / 3002 loss=2.517, ppl=5.72, wps=5767.7, ups=0.09, wpb=64864, bsz=128, num_updates=7720, lr=9.99462e-05, gnorm=2.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88448
2021-06-19 19:13:04 | INFO | train_inner | epoch 003: 1768 / 3002 loss=2.695, ppl=6.47, wps=5966.3, ups=0.09, wpb=64896, bsz=128, num_updates=7721, lr=9.99462e-05, gnorm=2.958, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88459
2021-06-19 19:13:15 | INFO | train_inner | epoch 003: 1769 / 3002 loss=2.563, ppl=5.91, wps=5923.6, ups=0.09, wpb=64850, bsz=128, num_updates=7722, lr=9.99462e-05, gnorm=2.491, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88470
2021-06-19 19:13:27 | INFO | train_inner | epoch 003: 1770 / 3002 loss=2.604, ppl=6.08, wps=5783.8, ups=0.09, wpb=64839, bsz=128, num_updates=7723, lr=9.99462e-05, gnorm=2.213, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88481
2021-06-19 19:13:38 | INFO | train_inner | epoch 003: 1771 / 3002 loss=2.676, ppl=6.39, wps=5874.8, ups=0.09, wpb=64889, bsz=128, num_updates=7724, lr=9.99462e-05, gnorm=2.137, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88492
2021-06-19 19:13:49 | INFO | train_inner | epoch 003: 1772 / 3002 loss=2.557, ppl=5.88, wps=5919.6, ups=0.09, wpb=64771, bsz=128, num_updates=7725, lr=9.99462e-05, gnorm=2.098, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88503
2021-06-19 19:14:00 | INFO | train_inner | epoch 003: 1773 / 3002 loss=2.739, ppl=6.68, wps=5776.2, ups=0.09, wpb=64833, bsz=128, num_updates=7726, lr=9.99462e-05, gnorm=3.379, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88514
2021-06-19 19:14:11 | INFO | train_inner | epoch 003: 1774 / 3002 loss=2.471, ppl=5.55, wps=5847.6, ups=0.09, wpb=64844, bsz=128, num_updates=7727, lr=9.99462e-05, gnorm=2.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88525
2021-06-19 19:14:22 | INFO | train_inner | epoch 003: 1775 / 3002 loss=2.683, ppl=6.42, wps=5823, ups=0.09, wpb=64837, bsz=128, num_updates=7728, lr=9.99462e-05, gnorm=3.16, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88536
2021-06-19 19:14:33 | INFO | train_inner | epoch 003: 1776 / 3002 loss=2.727, ppl=6.62, wps=5905.8, ups=0.09, wpb=64867, bsz=128, num_updates=7729, lr=9.99462e-05, gnorm=2.163, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88547
2021-06-19 19:14:44 | INFO | train_inner | epoch 003: 1777 / 3002 loss=2.464, ppl=5.52, wps=5760.7, ups=0.09, wpb=64819, bsz=128, num_updates=7730, lr=9.99462e-05, gnorm=2.331, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88559
2021-06-19 19:14:55 | INFO | train_inner | epoch 003: 1778 / 3002 loss=2.737, ppl=6.67, wps=5935.7, ups=0.09, wpb=64923, bsz=128, num_updates=7731, lr=9.99461e-05, gnorm=2.247, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88570
2021-06-19 19:15:06 | INFO | train_inner | epoch 003: 1779 / 3002 loss=2.591, ppl=6.03, wps=5847, ups=0.09, wpb=64857, bsz=128, num_updates=7732, lr=9.99461e-05, gnorm=2.14, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88581
2021-06-19 19:15:17 | INFO | train_inner | epoch 003: 1780 / 3002 loss=2.607, ppl=6.09, wps=6079.9, ups=0.09, wpb=64861, bsz=128, num_updates=7733, lr=9.99461e-05, gnorm=2.4, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88591
2021-06-19 19:15:28 | INFO | train_inner | epoch 003: 1781 / 3002 loss=2.71, ppl=6.54, wps=5815.8, ups=0.09, wpb=64787, bsz=128, num_updates=7734, lr=9.99461e-05, gnorm=2.244, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88602
2021-06-19 19:15:39 | INFO | train_inner | epoch 003: 1782 / 3002 loss=2.688, ppl=6.45, wps=5742.5, ups=0.09, wpb=64853, bsz=128, num_updates=7735, lr=9.99461e-05, gnorm=2.269, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88614
2021-06-19 19:15:51 | INFO | train_inner | epoch 003: 1783 / 3002 loss=2.684, ppl=6.43, wps=5788.9, ups=0.09, wpb=64833, bsz=128, num_updates=7736, lr=9.99461e-05, gnorm=2.231, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88625
2021-06-19 19:16:02 | INFO | train_inner | epoch 003: 1784 / 3002 loss=2.633, ppl=6.21, wps=5749.4, ups=0.09, wpb=64779, bsz=128, num_updates=7737, lr=9.99461e-05, gnorm=2.156, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88636
2021-06-19 19:16:13 | INFO | train_inner | epoch 003: 1785 / 3002 loss=2.618, ppl=6.14, wps=5784.8, ups=0.09, wpb=64851, bsz=128, num_updates=7738, lr=9.99461e-05, gnorm=2.191, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88647
2021-06-19 19:16:24 | INFO | train_inner | epoch 003: 1786 / 3002 loss=2.597, ppl=6.05, wps=5790.9, ups=0.09, wpb=64853, bsz=128, num_updates=7739, lr=9.99461e-05, gnorm=2.079, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88659
2021-06-19 19:16:36 | INFO | train_inner | epoch 003: 1787 / 3002 loss=2.57, ppl=5.94, wps=5730.8, ups=0.09, wpb=64716, bsz=128, num_updates=7740, lr=9.99461e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88670
2021-06-19 19:16:47 | INFO | train_inner | epoch 003: 1788 / 3002 loss=2.673, ppl=6.38, wps=5869.7, ups=0.09, wpb=64846, bsz=128, num_updates=7741, lr=9.99461e-05, gnorm=2.304, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88681
2021-06-19 19:16:58 | INFO | train_inner | epoch 003: 1789 / 3002 loss=2.697, ppl=6.49, wps=5866.4, ups=0.09, wpb=64837, bsz=128, num_updates=7742, lr=9.99461e-05, gnorm=2.247, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88692
2021-06-19 19:17:09 | INFO | train_inner | epoch 003: 1790 / 3002 loss=2.669, ppl=6.36, wps=5886, ups=0.09, wpb=64863, bsz=128, num_updates=7743, lr=9.99461e-05, gnorm=2.273, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88703
2021-06-19 19:17:20 | INFO | train_inner | epoch 003: 1791 / 3002 loss=2.535, ppl=5.8, wps=5806.2, ups=0.09, wpb=64886, bsz=128, num_updates=7744, lr=9.9946e-05, gnorm=2.108, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88714
2021-06-19 19:17:31 | INFO | train_inner | epoch 003: 1792 / 3002 loss=2.569, ppl=5.93, wps=5835.4, ups=0.09, wpb=64757, bsz=128, num_updates=7745, lr=9.9946e-05, gnorm=2.061, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88725
2021-06-19 19:17:42 | INFO | train_inner | epoch 003: 1793 / 3002 loss=2.714, ppl=6.56, wps=5897.2, ups=0.09, wpb=64842, bsz=128, num_updates=7746, lr=9.9946e-05, gnorm=2.193, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88736
2021-06-19 19:17:53 | INFO | train_inner | epoch 003: 1794 / 3002 loss=2.627, ppl=6.18, wps=5860.9, ups=0.09, wpb=64849, bsz=128, num_updates=7747, lr=9.9946e-05, gnorm=2.269, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88747
2021-06-19 19:18:04 | INFO | train_inner | epoch 003: 1795 / 3002 loss=2.697, ppl=6.48, wps=5905.5, ups=0.09, wpb=64781, bsz=128, num_updates=7748, lr=9.9946e-05, gnorm=2.497, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88758
2021-06-19 19:18:15 | INFO | train_inner | epoch 003: 1796 / 3002 loss=2.469, ppl=5.54, wps=5804, ups=0.09, wpb=64803, bsz=128, num_updates=7749, lr=9.9946e-05, gnorm=2.285, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88769
2021-06-19 19:18:26 | INFO | train_inner | epoch 003: 1797 / 3002 loss=2.584, ppl=5.99, wps=5733.3, ups=0.09, wpb=64824, bsz=128, num_updates=7750, lr=9.9946e-05, gnorm=6.936, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88781
2021-06-19 19:18:37 | INFO | train_inner | epoch 003: 1798 / 3002 loss=2.659, ppl=6.31, wps=5875.7, ups=0.09, wpb=64822, bsz=128, num_updates=7751, lr=9.9946e-05, gnorm=2.163, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88792
2021-06-19 19:18:49 | INFO | train_inner | epoch 003: 1799 / 3002 loss=2.632, ppl=6.2, wps=5821.8, ups=0.09, wpb=64829, bsz=128, num_updates=7752, lr=9.9946e-05, gnorm=6.507, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88803
2021-06-19 19:19:00 | INFO | train_inner | epoch 003: 1800 / 3002 loss=2.637, ppl=6.22, wps=5868.9, ups=0.09, wpb=64893, bsz=128, num_updates=7753, lr=9.9946e-05, gnorm=2.157, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88814
2021-06-19 19:19:11 | INFO | train_inner | epoch 003: 1801 / 3002 loss=2.675, ppl=6.38, wps=5801.8, ups=0.09, wpb=64777, bsz=128, num_updates=7754, lr=9.9946e-05, gnorm=2.325, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88825
2021-06-19 19:19:22 | INFO | train_inner | epoch 003: 1802 / 3002 loss=2.605, ppl=6.08, wps=5831.2, ups=0.09, wpb=64843, bsz=128, num_updates=7755, lr=9.9946e-05, gnorm=2.027, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88836
2021-06-19 19:19:33 | INFO | train_inner | epoch 003: 1803 / 3002 loss=2.797, ppl=6.95, wps=5856.4, ups=0.09, wpb=64877, bsz=128, num_updates=7756, lr=9.99459e-05, gnorm=2.162, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88847
2021-06-19 19:19:44 | INFO | train_inner | epoch 003: 1804 / 3002 loss=2.501, ppl=5.66, wps=5820.3, ups=0.09, wpb=64857, bsz=128, num_updates=7757, lr=9.99459e-05, gnorm=2.072, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88859
2021-06-19 19:19:55 | INFO | train_inner | epoch 003: 1805 / 3002 loss=2.592, ppl=6.03, wps=5770.9, ups=0.09, wpb=64817, bsz=128, num_updates=7758, lr=9.99459e-05, gnorm=2.83, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88870
2021-06-19 19:20:07 | INFO | train_inner | epoch 003: 1806 / 3002 loss=2.697, ppl=6.48, wps=5851.9, ups=0.09, wpb=64815, bsz=128, num_updates=7759, lr=9.99459e-05, gnorm=2.297, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88881
2021-06-19 19:20:18 | INFO | train_inner | epoch 003: 1807 / 3002 loss=2.698, ppl=6.49, wps=5810.5, ups=0.09, wpb=64875, bsz=128, num_updates=7760, lr=9.99459e-05, gnorm=6.609, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88892
2021-06-19 19:20:29 | INFO | train_inner | epoch 003: 1808 / 3002 loss=2.673, ppl=6.38, wps=5883.9, ups=0.09, wpb=64837, bsz=128, num_updates=7761, lr=9.99459e-05, gnorm=2.192, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88903
2021-06-19 19:20:40 | INFO | train_inner | epoch 003: 1809 / 3002 loss=2.63, ppl=6.19, wps=5910.9, ups=0.09, wpb=64838, bsz=128, num_updates=7762, lr=9.99459e-05, gnorm=2.197, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88914
2021-06-19 19:20:51 | INFO | train_inner | epoch 003: 1810 / 3002 loss=2.71, ppl=6.54, wps=5832.4, ups=0.09, wpb=64864, bsz=128, num_updates=7763, lr=9.99459e-05, gnorm=2.104, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88925
2021-06-19 19:21:02 | INFO | train_inner | epoch 003: 1811 / 3002 loss=2.557, ppl=5.89, wps=5790.7, ups=0.09, wpb=64750, bsz=128, num_updates=7764, lr=9.99459e-05, gnorm=2.161, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88936
2021-06-19 19:21:13 | INFO | train_inner | epoch 003: 1812 / 3002 loss=2.815, ppl=7.04, wps=5851.1, ups=0.09, wpb=64881, bsz=128, num_updates=7765, lr=9.99459e-05, gnorm=2.16, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88947
2021-06-19 19:21:24 | INFO | train_inner | epoch 003: 1813 / 3002 loss=2.614, ppl=6.12, wps=5894.8, ups=0.09, wpb=64931, bsz=128, num_updates=7766, lr=9.99459e-05, gnorm=2.187, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88958
2021-06-19 19:21:35 | INFO | train_inner | epoch 003: 1814 / 3002 loss=2.586, ppl=6, wps=5912.7, ups=0.09, wpb=64830, bsz=128, num_updates=7767, lr=9.99459e-05, gnorm=9.168, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88969
2021-06-19 19:21:46 | INFO | train_inner | epoch 003: 1815 / 3002 loss=2.632, ppl=6.2, wps=5862.3, ups=0.09, wpb=64854, bsz=128, num_updates=7768, lr=9.99459e-05, gnorm=2.932, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=88980
2021-06-19 19:21:57 | INFO | train_inner | epoch 003: 1816 / 3002 loss=2.718, ppl=6.58, wps=6002, ups=0.09, wpb=64818, bsz=128, num_updates=7769, lr=9.99458e-05, gnorm=4.272, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=88991
2021-06-19 19:22:08 | INFO | train_inner | epoch 003: 1817 / 3002 loss=2.768, ppl=6.81, wps=5711.8, ups=0.09, wpb=64803, bsz=128, num_updates=7770, lr=9.99458e-05, gnorm=2.368, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89003
2021-06-19 19:22:19 | INFO | train_inner | epoch 003: 1818 / 3002 loss=2.594, ppl=6.04, wps=5884.7, ups=0.09, wpb=64927, bsz=128, num_updates=7771, lr=9.99458e-05, gnorm=2.14, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89014
2021-06-19 19:22:30 | INFO | train_inner | epoch 003: 1819 / 3002 loss=2.713, ppl=6.56, wps=5820.1, ups=0.09, wpb=64847, bsz=128, num_updates=7772, lr=9.99458e-05, gnorm=2.258, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89025
2021-06-19 19:22:42 | INFO | train_inner | epoch 003: 1820 / 3002 loss=2.633, ppl=6.2, wps=5833, ups=0.09, wpb=64890, bsz=128, num_updates=7773, lr=9.99458e-05, gnorm=2.256, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89036
2021-06-19 19:22:52 | INFO | train_inner | epoch 003: 1821 / 3002 loss=2.605, ppl=6.08, wps=6002, ups=0.09, wpb=64917, bsz=128, num_updates=7774, lr=9.99458e-05, gnorm=2.173, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89047
2021-06-19 19:23:03 | INFO | train_inner | epoch 003: 1822 / 3002 loss=2.606, ppl=6.09, wps=5850.6, ups=0.09, wpb=64742, bsz=128, num_updates=7775, lr=9.99458e-05, gnorm=2.182, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89058
2021-06-19 19:23:15 | INFO | train_inner | epoch 003: 1823 / 3002 loss=2.624, ppl=6.16, wps=5746.2, ups=0.09, wpb=64838, bsz=128, num_updates=7776, lr=9.99458e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89069
2021-06-19 19:23:26 | INFO | train_inner | epoch 003: 1824 / 3002 loss=2.751, ppl=6.73, wps=5893.1, ups=0.09, wpb=64886, bsz=128, num_updates=7777, lr=9.99458e-05, gnorm=2.344, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89080
2021-06-19 19:23:37 | INFO | train_inner | epoch 003: 1825 / 3002 loss=2.704, ppl=6.52, wps=5894.6, ups=0.09, wpb=64787, bsz=128, num_updates=7778, lr=9.99458e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89091
2021-06-19 19:23:48 | INFO | train_inner | epoch 003: 1826 / 3002 loss=2.701, ppl=6.5, wps=5793.7, ups=0.09, wpb=64841, bsz=128, num_updates=7779, lr=9.99458e-05, gnorm=2.191, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89102
2021-06-19 19:23:59 | INFO | train_inner | epoch 003: 1827 / 3002 loss=2.582, ppl=5.99, wps=5982.7, ups=0.09, wpb=64827, bsz=128, num_updates=7780, lr=9.99458e-05, gnorm=2.316, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89113
2021-06-19 19:24:10 | INFO | train_inner | epoch 003: 1828 / 3002 loss=2.69, ppl=6.45, wps=5729.4, ups=0.09, wpb=64805, bsz=128, num_updates=7781, lr=9.99457e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89124
2021-06-19 19:24:21 | INFO | train_inner | epoch 003: 1829 / 3002 loss=2.527, ppl=5.76, wps=5885.8, ups=0.09, wpb=64909, bsz=128, num_updates=7782, lr=9.99457e-05, gnorm=2.196, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89135
2021-06-19 19:24:32 | INFO | train_inner | epoch 003: 1830 / 3002 loss=2.619, ppl=6.14, wps=5803.3, ups=0.09, wpb=64882, bsz=128, num_updates=7783, lr=9.99457e-05, gnorm=2.181, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89147
2021-06-19 19:24:43 | INFO | train_inner | epoch 003: 1831 / 3002 loss=2.612, ppl=6.11, wps=5926, ups=0.09, wpb=64842, bsz=128, num_updates=7784, lr=9.99457e-05, gnorm=2.23, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89158
2021-06-19 19:24:54 | INFO | train_inner | epoch 003: 1832 / 3002 loss=2.74, ppl=6.68, wps=5923.9, ups=0.09, wpb=64790, bsz=128, num_updates=7785, lr=9.99457e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89168
2021-06-19 19:25:05 | INFO | train_inner | epoch 003: 1833 / 3002 loss=2.476, ppl=5.56, wps=5766.4, ups=0.09, wpb=64905, bsz=128, num_updates=7786, lr=9.99457e-05, gnorm=2.157, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89180
2021-06-19 19:25:17 | INFO | train_inner | epoch 003: 1834 / 3002 loss=2.677, ppl=6.39, wps=5778.7, ups=0.09, wpb=64793, bsz=128, num_updates=7787, lr=9.99457e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89191
2021-06-19 19:25:28 | INFO | train_inner | epoch 003: 1835 / 3002 loss=2.619, ppl=6.14, wps=5881.1, ups=0.09, wpb=64912, bsz=128, num_updates=7788, lr=9.99457e-05, gnorm=2.168, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89202
2021-06-19 19:25:39 | INFO | train_inner | epoch 003: 1836 / 3002 loss=2.61, ppl=6.11, wps=5807.7, ups=0.09, wpb=64867, bsz=128, num_updates=7789, lr=9.99457e-05, gnorm=2.166, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89213
2021-06-19 19:25:50 | INFO | train_inner | epoch 003: 1837 / 3002 loss=2.657, ppl=6.31, wps=5910.7, ups=0.09, wpb=64812, bsz=128, num_updates=7790, lr=9.99457e-05, gnorm=2.339, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89224
2021-06-19 19:26:01 | INFO | train_inner | epoch 003: 1838 / 3002 loss=2.601, ppl=6.07, wps=5873, ups=0.09, wpb=64767, bsz=128, num_updates=7791, lr=9.99457e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89235
2021-06-19 19:26:12 | INFO | train_inner | epoch 003: 1839 / 3002 loss=2.616, ppl=6.13, wps=5800.1, ups=0.09, wpb=64801, bsz=128, num_updates=7792, lr=9.99457e-05, gnorm=2.111, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89246
2021-06-19 19:26:23 | INFO | train_inner | epoch 003: 1840 / 3002 loss=2.58, ppl=5.98, wps=5939.1, ups=0.09, wpb=64817, bsz=128, num_updates=7793, lr=9.99457e-05, gnorm=2.149, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89257
2021-06-19 19:26:34 | INFO | train_inner | epoch 003: 1841 / 3002 loss=2.655, ppl=6.3, wps=5760.6, ups=0.09, wpb=64756, bsz=128, num_updates=7794, lr=9.99456e-05, gnorm=2.229, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89268
2021-06-19 19:26:45 | INFO | train_inner | epoch 003: 1842 / 3002 loss=2.498, ppl=5.65, wps=5813.7, ups=0.09, wpb=64806, bsz=128, num_updates=7795, lr=9.99456e-05, gnorm=2.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89280
2021-06-19 19:26:57 | INFO | train_inner | epoch 003: 1843 / 3002 loss=2.612, ppl=6.11, wps=5743.7, ups=0.09, wpb=64912, bsz=128, num_updates=7796, lr=9.99456e-05, gnorm=2.051, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89291
2021-06-19 19:27:08 | INFO | train_inner | epoch 003: 1844 / 3002 loss=2.616, ppl=6.13, wps=5795.7, ups=0.09, wpb=64729, bsz=128, num_updates=7797, lr=9.99456e-05, gnorm=2.21, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89302
2021-06-19 19:27:19 | INFO | train_inner | epoch 003: 1845 / 3002 loss=2.61, ppl=6.11, wps=5748.8, ups=0.09, wpb=64837, bsz=128, num_updates=7798, lr=9.99456e-05, gnorm=2.162, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89313
2021-06-19 19:27:30 | INFO | train_inner | epoch 003: 1846 / 3002 loss=2.755, ppl=6.75, wps=5814.4, ups=0.09, wpb=64730, bsz=128, num_updates=7799, lr=9.99456e-05, gnorm=2.331, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89325
2021-06-19 19:27:41 | INFO | train_inner | epoch 003: 1847 / 3002 loss=2.644, ppl=6.25, wps=5913.3, ups=0.09, wpb=64878, bsz=128, num_updates=7800, lr=9.99456e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89335
2021-06-19 19:27:52 | INFO | train_inner | epoch 003: 1848 / 3002 loss=2.546, ppl=5.84, wps=5861.9, ups=0.09, wpb=64902, bsz=128, num_updates=7801, lr=9.99456e-05, gnorm=5.917, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89347
2021-06-19 19:28:03 | INFO | train_inner | epoch 003: 1849 / 3002 loss=2.583, ppl=5.99, wps=5953.4, ups=0.09, wpb=64794, bsz=128, num_updates=7802, lr=9.99456e-05, gnorm=2.802, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89357
2021-06-19 19:28:14 | INFO | train_inner | epoch 003: 1850 / 3002 loss=2.505, ppl=5.68, wps=5748, ups=0.09, wpb=64788, bsz=128, num_updates=7803, lr=9.99456e-05, gnorm=2.652, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89369
2021-06-19 19:28:25 | INFO | train_inner | epoch 003: 1851 / 3002 loss=2.509, ppl=5.69, wps=5853.2, ups=0.09, wpb=64817, bsz=128, num_updates=7804, lr=9.99456e-05, gnorm=2.675, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89380
2021-06-19 19:28:36 | INFO | train_inner | epoch 003: 1852 / 3002 loss=2.494, ppl=5.63, wps=5899, ups=0.09, wpb=64795, bsz=128, num_updates=7805, lr=9.99456e-05, gnorm=2.204, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89391
2021-06-19 19:28:47 | INFO | train_inner | epoch 003: 1853 / 3002 loss=2.596, ppl=6.05, wps=5902.1, ups=0.09, wpb=64856, bsz=128, num_updates=7806, lr=9.99455e-05, gnorm=2.103, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89402
2021-06-19 19:28:58 | INFO | train_inner | epoch 003: 1854 / 3002 loss=2.607, ppl=6.09, wps=5897.3, ups=0.09, wpb=64840, bsz=128, num_updates=7807, lr=9.99455e-05, gnorm=2.091, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89413
2021-06-19 19:29:09 | INFO | train_inner | epoch 003: 1855 / 3002 loss=2.578, ppl=5.97, wps=5877.5, ups=0.09, wpb=64893, bsz=128, num_updates=7808, lr=9.99455e-05, gnorm=2, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89424
2021-06-19 19:29:21 | INFO | train_inner | epoch 003: 1856 / 3002 loss=2.701, ppl=6.5, wps=5857.2, ups=0.09, wpb=64895, bsz=128, num_updates=7809, lr=9.99455e-05, gnorm=2.062, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89435
2021-06-19 19:29:32 | INFO | train_inner | epoch 003: 1857 / 3002 loss=2.624, ppl=6.16, wps=5893.7, ups=0.09, wpb=64794, bsz=128, num_updates=7810, lr=9.99455e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89446
2021-06-19 19:29:43 | INFO | train_inner | epoch 003: 1858 / 3002 loss=2.55, ppl=5.85, wps=5841.2, ups=0.09, wpb=64826, bsz=128, num_updates=7811, lr=9.99455e-05, gnorm=2.227, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89457
2021-06-19 19:29:54 | INFO | train_inner | epoch 003: 1859 / 3002 loss=2.621, ppl=6.15, wps=5893.1, ups=0.09, wpb=64851, bsz=128, num_updates=7812, lr=9.99455e-05, gnorm=2.206, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89468
2021-06-19 19:30:05 | INFO | train_inner | epoch 003: 1860 / 3002 loss=2.364, ppl=5.15, wps=5920.9, ups=0.09, wpb=64821, bsz=128, num_updates=7813, lr=9.99455e-05, gnorm=2.122, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89479
2021-06-19 19:30:16 | INFO | train_inner | epoch 003: 1861 / 3002 loss=2.608, ppl=6.1, wps=5927, ups=0.09, wpb=64843, bsz=128, num_updates=7814, lr=9.99455e-05, gnorm=7.6, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89490
2021-06-19 19:30:27 | INFO | train_inner | epoch 003: 1862 / 3002 loss=2.679, ppl=6.4, wps=5716.6, ups=0.09, wpb=64852, bsz=128, num_updates=7815, lr=9.99455e-05, gnorm=2.119, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89501
2021-06-19 19:30:38 | INFO | train_inner | epoch 003: 1863 / 3002 loss=2.582, ppl=5.99, wps=5985.8, ups=0.09, wpb=64864, bsz=128, num_updates=7816, lr=9.99455e-05, gnorm=2.793, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89512
2021-06-19 19:30:49 | INFO | train_inner | epoch 003: 1864 / 3002 loss=2.495, ppl=5.64, wps=5781.9, ups=0.09, wpb=64824, bsz=128, num_updates=7817, lr=9.99455e-05, gnorm=2.058, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89523
2021-06-19 19:31:00 | INFO | train_inner | epoch 003: 1865 / 3002 loss=2.644, ppl=6.25, wps=5848, ups=0.09, wpb=64826, bsz=128, num_updates=7818, lr=9.99455e-05, gnorm=2.284, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89534
2021-06-19 19:31:11 | INFO | train_inner | epoch 003: 1866 / 3002 loss=2.638, ppl=6.23, wps=5948.1, ups=0.09, wpb=64838, bsz=128, num_updates=7819, lr=9.99454e-05, gnorm=2.141, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89545
2021-06-19 19:31:22 | INFO | train_inner | epoch 003: 1867 / 3002 loss=2.673, ppl=6.38, wps=5906.8, ups=0.09, wpb=64884, bsz=128, num_updates=7820, lr=9.99454e-05, gnorm=2.093, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89556
2021-06-19 19:31:33 | INFO | train_inner | epoch 003: 1868 / 3002 loss=2.495, ppl=5.64, wps=5776.8, ups=0.09, wpb=64859, bsz=128, num_updates=7821, lr=9.99454e-05, gnorm=2.067, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89567
2021-06-19 19:31:44 | INFO | train_inner | epoch 003: 1869 / 3002 loss=2.722, ppl=6.6, wps=5867.6, ups=0.09, wpb=64713, bsz=128, num_updates=7822, lr=9.99454e-05, gnorm=2.169, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89578
2021-06-19 19:31:55 | INFO | train_inner | epoch 003: 1870 / 3002 loss=2.499, ppl=5.65, wps=5876.5, ups=0.09, wpb=64908, bsz=128, num_updates=7823, lr=9.99454e-05, gnorm=2.18, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89590
2021-06-19 19:32:06 | INFO | train_inner | epoch 003: 1871 / 3002 loss=2.581, ppl=5.98, wps=5853.7, ups=0.09, wpb=64955, bsz=128, num_updates=7824, lr=9.99454e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89601
2021-06-19 19:32:17 | INFO | train_inner | epoch 003: 1872 / 3002 loss=2.495, ppl=5.64, wps=5840.2, ups=0.09, wpb=64855, bsz=128, num_updates=7825, lr=9.99454e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89612
2021-06-19 19:32:28 | INFO | train_inner | epoch 003: 1873 / 3002 loss=2.814, ppl=7.03, wps=5911, ups=0.09, wpb=64875, bsz=128, num_updates=7826, lr=9.99454e-05, gnorm=2.317, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89623
2021-06-19 19:32:39 | INFO | train_inner | epoch 003: 1874 / 3002 loss=2.65, ppl=6.28, wps=5838, ups=0.09, wpb=64890, bsz=128, num_updates=7827, lr=9.99454e-05, gnorm=2.117, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89634
2021-06-19 19:32:51 | INFO | train_inner | epoch 003: 1875 / 3002 loss=2.543, ppl=5.83, wps=5754.1, ups=0.09, wpb=64822, bsz=128, num_updates=7828, lr=9.99454e-05, gnorm=2.17, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89645
2021-06-19 19:33:02 | INFO | train_inner | epoch 003: 1876 / 3002 loss=2.748, ppl=6.72, wps=5982.2, ups=0.09, wpb=64933, bsz=128, num_updates=7829, lr=9.99454e-05, gnorm=2.098, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89656
2021-06-19 19:33:13 | INFO | train_inner | epoch 003: 1877 / 3002 loss=2.582, ppl=5.99, wps=5858.7, ups=0.09, wpb=64856, bsz=128, num_updates=7830, lr=9.99454e-05, gnorm=2.579, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89667
2021-06-19 19:33:24 | INFO | train_inner | epoch 003: 1878 / 3002 loss=2.554, ppl=5.87, wps=5838.5, ups=0.09, wpb=64769, bsz=128, num_updates=7831, lr=9.99453e-05, gnorm=2.061, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89678
2021-06-19 19:33:35 | INFO | train_inner | epoch 003: 1879 / 3002 loss=2.465, ppl=5.52, wps=5939.2, ups=0.09, wpb=64745, bsz=128, num_updates=7832, lr=9.99453e-05, gnorm=2.128, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89689
2021-06-19 19:33:46 | INFO | train_inner | epoch 003: 1880 / 3002 loss=2.559, ppl=5.89, wps=5956.3, ups=0.09, wpb=64919, bsz=128, num_updates=7833, lr=9.99453e-05, gnorm=3.718, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89700
2021-06-19 19:33:57 | INFO | train_inner | epoch 003: 1881 / 3002 loss=2.642, ppl=6.24, wps=5797.4, ups=0.09, wpb=64763, bsz=128, num_updates=7834, lr=9.99453e-05, gnorm=2.115, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89711
2021-06-19 19:34:08 | INFO | train_inner | epoch 003: 1882 / 3002 loss=2.654, ppl=6.29, wps=5737.8, ups=0.09, wpb=64802, bsz=128, num_updates=7835, lr=9.99453e-05, gnorm=2.225, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89722
2021-06-19 19:34:19 | INFO | train_inner | epoch 003: 1883 / 3002 loss=2.631, ppl=6.2, wps=5886.4, ups=0.09, wpb=64827, bsz=128, num_updates=7836, lr=9.99453e-05, gnorm=2.398, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89733
2021-06-19 19:34:30 | INFO | train_inner | epoch 003: 1884 / 3002 loss=2.675, ppl=6.39, wps=5730.2, ups=0.09, wpb=64779, bsz=128, num_updates=7837, lr=9.99453e-05, gnorm=2.445, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89745
2021-06-19 19:34:41 | INFO | train_inner | epoch 003: 1885 / 3002 loss=2.711, ppl=6.55, wps=5873.8, ups=0.09, wpb=64852, bsz=128, num_updates=7838, lr=9.99453e-05, gnorm=7.802, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89756
2021-06-19 19:34:53 | INFO | train_inner | epoch 003: 1886 / 3002 loss=2.525, ppl=5.76, wps=5723.7, ups=0.09, wpb=64820, bsz=128, num_updates=7839, lr=9.99453e-05, gnorm=2.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89767
2021-06-19 19:35:04 | INFO | train_inner | epoch 003: 1887 / 3002 loss=2.813, ppl=7.03, wps=5888.3, ups=0.09, wpb=64802, bsz=128, num_updates=7840, lr=9.99453e-05, gnorm=2.104, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89778
2021-06-19 19:35:15 | INFO | train_inner | epoch 003: 1888 / 3002 loss=2.552, ppl=5.86, wps=5791.4, ups=0.09, wpb=64853, bsz=128, num_updates=7841, lr=9.99453e-05, gnorm=6.057, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89789
2021-06-19 19:35:26 | INFO | train_inner | epoch 003: 1889 / 3002 loss=2.768, ppl=6.81, wps=5972.7, ups=0.09, wpb=64865, bsz=128, num_updates=7842, lr=9.99453e-05, gnorm=2.321, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=89800
2021-06-19 19:35:37 | INFO | train_inner | epoch 003: 1890 / 3002 loss=2.641, ppl=6.24, wps=5883.9, ups=0.09, wpb=64932, bsz=128, num_updates=7843, lr=9.99453e-05, gnorm=3.36, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89811
2021-06-19 19:35:48 | INFO | train_inner | epoch 003: 1891 / 3002 loss=2.722, ppl=6.6, wps=5733.6, ups=0.09, wpb=64860, bsz=128, num_updates=7844, lr=9.99452e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=89822
2021-06-19 19:35:59 | INFO | train_inner | epoch 003: 1892 / 3002 loss=2.516, ppl=5.72, wps=5787.5, ups=0.09, wpb=64840, bsz=128, num_updates=7845, lr=9.99452e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=89834
2021-06-19 19:36:10 | INFO | train_inner | epoch 003: 1893 / 3002 loss=2.666, ppl=6.35, wps=5817.2, ups=0.09, wpb=64787, bsz=128, num_updates=7846, lr=9.99452e-05, gnorm=2.143, loss_scale=1, train_wall=11, gb_free=2.8, wall=89845
2021-06-19 19:36:22 | INFO | train_inner | epoch 003: 1894 / 3002 loss=2.485, ppl=5.6, wps=5830, ups=0.09, wpb=64850, bsz=128, num_updates=7847, lr=9.99452e-05, gnorm=2.094, loss_scale=1, train_wall=11, gb_free=2.8, wall=89856
2021-06-19 19:36:33 | INFO | train_inner | epoch 003: 1895 / 3002 loss=2.76, ppl=6.78, wps=5822.2, ups=0.09, wpb=64851, bsz=128, num_updates=7848, lr=9.99452e-05, gnorm=2.132, loss_scale=1, train_wall=11, gb_free=2.8, wall=89867
2021-06-19 19:36:44 | INFO | train_inner | epoch 003: 1896 / 3002 loss=2.662, ppl=6.33, wps=5878.6, ups=0.09, wpb=64845, bsz=128, num_updates=7849, lr=9.99452e-05, gnorm=2.034, loss_scale=1, train_wall=11, gb_free=2.8, wall=89878
2021-06-19 19:36:55 | INFO | train_inner | epoch 003: 1897 / 3002 loss=2.502, ppl=5.66, wps=5998.6, ups=0.09, wpb=64840, bsz=128, num_updates=7850, lr=9.99452e-05, gnorm=2.101, loss_scale=1, train_wall=10, gb_free=2.8, wall=89889
2021-06-19 19:37:06 | INFO | train_inner | epoch 003: 1898 / 3002 loss=2.647, ppl=6.27, wps=5844.9, ups=0.09, wpb=64775, bsz=128, num_updates=7851, lr=9.99452e-05, gnorm=9.615, loss_scale=1, train_wall=11, gb_free=2.8, wall=89900
2021-06-19 19:37:17 | INFO | train_inner | epoch 003: 1899 / 3002 loss=2.582, ppl=5.99, wps=5841.9, ups=0.09, wpb=64903, bsz=128, num_updates=7852, lr=9.99452e-05, gnorm=2.559, loss_scale=1, train_wall=11, gb_free=2.8, wall=89911
2021-06-19 19:37:28 | INFO | train_inner | epoch 003: 1900 / 3002 loss=2.639, ppl=6.23, wps=5694.9, ups=0.09, wpb=64781, bsz=128, num_updates=7853, lr=9.99452e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=89922
2021-06-19 19:37:39 | INFO | train_inner | epoch 003: 1901 / 3002 loss=2.705, ppl=6.52, wps=5845.4, ups=0.09, wpb=64826, bsz=128, num_updates=7854, lr=9.99452e-05, gnorm=2.13, loss_scale=1, train_wall=11, gb_free=2.8, wall=89934
2021-06-19 19:37:50 | INFO | train_inner | epoch 003: 1902 / 3002 loss=2.551, ppl=5.86, wps=5749.9, ups=0.09, wpb=64753, bsz=128, num_updates=7855, lr=9.99452e-05, gnorm=2.052, loss_scale=1, train_wall=11, gb_free=2.8, wall=89945
2021-06-19 19:38:02 | INFO | train_inner | epoch 003: 1903 / 3002 loss=2.561, ppl=5.9, wps=5754.5, ups=0.09, wpb=64845, bsz=128, num_updates=7856, lr=9.99451e-05, gnorm=2.133, loss_scale=1, train_wall=11, gb_free=2.8, wall=89956
2021-06-19 19:38:13 | INFO | train_inner | epoch 003: 1904 / 3002 loss=2.709, ppl=6.54, wps=5948, ups=0.09, wpb=64863, bsz=128, num_updates=7857, lr=9.99451e-05, gnorm=2.211, loss_scale=1, train_wall=10, gb_free=2.8, wall=89967
2021-06-19 19:38:24 | INFO | train_inner | epoch 003: 1905 / 3002 loss=2.562, ppl=5.91, wps=5729.4, ups=0.09, wpb=64818, bsz=128, num_updates=7858, lr=9.99451e-05, gnorm=2.133, loss_scale=1, train_wall=11, gb_free=2.8, wall=89978
2021-06-19 19:38:35 | INFO | train_inner | epoch 003: 1906 / 3002 loss=2.707, ppl=6.53, wps=5770.8, ups=0.09, wpb=64773, bsz=128, num_updates=7859, lr=9.99451e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=89990
2021-06-19 19:38:46 | INFO | train_inner | epoch 003: 1907 / 3002 loss=2.582, ppl=5.99, wps=5899.9, ups=0.09, wpb=64804, bsz=128, num_updates=7860, lr=9.99451e-05, gnorm=2.101, loss_scale=1, train_wall=11, gb_free=2.8, wall=90001
2021-06-19 19:38:57 | INFO | train_inner | epoch 003: 1908 / 3002 loss=2.631, ppl=6.19, wps=5786.5, ups=0.09, wpb=64790, bsz=128, num_updates=7861, lr=9.99451e-05, gnorm=2.133, loss_scale=1, train_wall=11, gb_free=2.8, wall=90012
2021-06-19 19:39:09 | INFO | train_inner | epoch 003: 1909 / 3002 loss=2.559, ppl=5.89, wps=5774.4, ups=0.09, wpb=64826, bsz=128, num_updates=7862, lr=9.99451e-05, gnorm=2.123, loss_scale=1, train_wall=11, gb_free=2.8, wall=90023
2021-06-19 19:39:20 | INFO | train_inner | epoch 003: 1910 / 3002 loss=2.602, ppl=6.07, wps=5823.4, ups=0.09, wpb=64835, bsz=128, num_updates=7863, lr=9.99451e-05, gnorm=2.126, loss_scale=1, train_wall=11, gb_free=2.8, wall=90034
2021-06-19 19:39:31 | INFO | train_inner | epoch 003: 1911 / 3002 loss=2.669, ppl=6.36, wps=5874.1, ups=0.09, wpb=64812, bsz=128, num_updates=7864, lr=9.99451e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=90045
2021-06-19 19:39:42 | INFO | train_inner | epoch 003: 1912 / 3002 loss=2.59, ppl=6.02, wps=5937.3, ups=0.09, wpb=64853, bsz=128, num_updates=7865, lr=9.99451e-05, gnorm=2.189, loss_scale=1, train_wall=10, gb_free=2.8, wall=90056
2021-06-19 19:39:53 | INFO | train_inner | epoch 003: 1913 / 3002 loss=2.745, ppl=6.7, wps=5863.5, ups=0.09, wpb=64828, bsz=128, num_updates=7866, lr=9.99451e-05, gnorm=3.818, loss_scale=1, train_wall=11, gb_free=2.8, wall=90067
2021-06-19 19:40:04 | INFO | train_inner | epoch 003: 1914 / 3002 loss=2.699, ppl=6.49, wps=5892.1, ups=0.09, wpb=64799, bsz=128, num_updates=7867, lr=9.99451e-05, gnorm=2.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=90078
2021-06-19 19:40:15 | INFO | train_inner | epoch 003: 1915 / 3002 loss=2.631, ppl=6.2, wps=5821, ups=0.09, wpb=64809, bsz=128, num_updates=7868, lr=9.99451e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=90089
2021-06-19 19:40:26 | INFO | train_inner | epoch 003: 1916 / 3002 loss=2.714, ppl=6.56, wps=5848, ups=0.09, wpb=64831, bsz=128, num_updates=7869, lr=9.9945e-05, gnorm=2.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=90100
2021-06-19 19:40:37 | INFO | train_inner | epoch 003: 1917 / 3002 loss=2.631, ppl=6.19, wps=5855.4, ups=0.09, wpb=64851, bsz=128, num_updates=7870, lr=9.9945e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=90111
2021-06-19 19:40:48 | INFO | train_inner | epoch 003: 1918 / 3002 loss=2.542, ppl=5.82, wps=5812.1, ups=0.09, wpb=64887, bsz=128, num_updates=7871, lr=9.9945e-05, gnorm=2.642, loss_scale=1, train_wall=11, gb_free=2.8, wall=90123
2021-06-19 19:40:59 | INFO | train_inner | epoch 003: 1919 / 3002 loss=2.717, ppl=6.58, wps=5872.4, ups=0.09, wpb=64817, bsz=128, num_updates=7872, lr=9.9945e-05, gnorm=2.209, loss_scale=1, train_wall=11, gb_free=2.8, wall=90134
2021-06-19 19:41:10 | INFO | train_inner | epoch 003: 1920 / 3002 loss=2.498, ppl=5.65, wps=5955.9, ups=0.09, wpb=64776, bsz=128, num_updates=7873, lr=9.9945e-05, gnorm=2.528, loss_scale=1, train_wall=10, gb_free=2.8, wall=90144
2021-06-19 19:41:21 | INFO | train_inner | epoch 003: 1921 / 3002 loss=2.855, ppl=7.23, wps=5853.8, ups=0.09, wpb=64822, bsz=128, num_updates=7874, lr=9.9945e-05, gnorm=2.144, loss_scale=1, train_wall=11, gb_free=2.8, wall=90156
2021-06-19 19:41:32 | INFO | train_inner | epoch 003: 1922 / 3002 loss=2.595, ppl=6.04, wps=5889.2, ups=0.09, wpb=64823, bsz=128, num_updates=7875, lr=9.9945e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=90167
2021-06-19 19:41:43 | INFO | train_inner | epoch 003: 1923 / 3002 loss=2.69, ppl=6.46, wps=5814.8, ups=0.09, wpb=64878, bsz=128, num_updates=7876, lr=9.9945e-05, gnorm=2.233, loss_scale=1, train_wall=11, gb_free=2.8, wall=90178
2021-06-19 19:41:55 | INFO | train_inner | epoch 003: 1924 / 3002 loss=2.539, ppl=5.81, wps=5800.3, ups=0.09, wpb=64871, bsz=128, num_updates=7877, lr=9.9945e-05, gnorm=2.667, loss_scale=1, train_wall=11, gb_free=2.8, wall=90189
2021-06-19 19:42:06 | INFO | train_inner | epoch 003: 1925 / 3002 loss=2.81, ppl=7.01, wps=5805.7, ups=0.09, wpb=64799, bsz=128, num_updates=7878, lr=9.9945e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=90200
2021-06-19 19:42:17 | INFO | train_inner | epoch 003: 1926 / 3002 loss=2.567, ppl=5.92, wps=5818.5, ups=0.09, wpb=64743, bsz=128, num_updates=7879, lr=9.9945e-05, gnorm=2.361, loss_scale=1, train_wall=11, gb_free=2.8, wall=90211
2021-06-19 19:42:28 | INFO | train_inner | epoch 003: 1927 / 3002 loss=2.732, ppl=6.64, wps=5903.7, ups=0.09, wpb=64831, bsz=128, num_updates=7880, lr=9.9945e-05, gnorm=6.456, loss_scale=1, train_wall=11, gb_free=2.8, wall=90222
2021-06-19 19:42:39 | INFO | train_inner | epoch 003: 1928 / 3002 loss=2.593, ppl=6.03, wps=5827.6, ups=0.09, wpb=64907, bsz=128, num_updates=7881, lr=9.99449e-05, gnorm=2.14, loss_scale=1, train_wall=11, gb_free=2.8, wall=90233
2021-06-19 19:42:50 | INFO | train_inner | epoch 003: 1929 / 3002 loss=2.65, ppl=6.28, wps=5921, ups=0.09, wpb=64845, bsz=128, num_updates=7882, lr=9.99449e-05, gnorm=2.146, loss_scale=1, train_wall=10, gb_free=2.8, wall=90244
2021-06-19 19:43:01 | INFO | train_inner | epoch 003: 1930 / 3002 loss=2.747, ppl=6.71, wps=5772.5, ups=0.09, wpb=64815, bsz=128, num_updates=7883, lr=9.99449e-05, gnorm=2.497, loss_scale=1, train_wall=11, gb_free=2.8, wall=90255
2021-06-19 19:43:12 | INFO | train_inner | epoch 003: 1931 / 3002 loss=2.716, ppl=6.57, wps=5894.3, ups=0.09, wpb=64923, bsz=128, num_updates=7884, lr=9.99449e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=90266
2021-06-19 19:43:23 | INFO | train_inner | epoch 003: 1932 / 3002 loss=2.5, ppl=5.66, wps=5810.9, ups=0.09, wpb=64766, bsz=128, num_updates=7885, lr=9.99449e-05, gnorm=2.194, loss_scale=1, train_wall=11, gb_free=2.8, wall=90278
2021-06-19 19:43:34 | INFO | train_inner | epoch 003: 1933 / 3002 loss=2.635, ppl=6.21, wps=5877.7, ups=0.09, wpb=64842, bsz=128, num_updates=7886, lr=9.99449e-05, gnorm=2.039, loss_scale=1, train_wall=11, gb_free=2.8, wall=90289
2021-06-19 19:43:46 | INFO | train_inner | epoch 003: 1934 / 3002 loss=2.673, ppl=6.38, wps=5771, ups=0.09, wpb=64818, bsz=128, num_updates=7887, lr=9.99449e-05, gnorm=2.055, loss_scale=1, train_wall=11, gb_free=2.8, wall=90300
2021-06-19 19:43:56 | INFO | train_inner | epoch 003: 1935 / 3002 loss=2.659, ppl=6.32, wps=6012.6, ups=0.09, wpb=64897, bsz=128, num_updates=7888, lr=9.99449e-05, gnorm=2.11, loss_scale=1, train_wall=10, gb_free=2.8, wall=90311
2021-06-19 19:44:07 | INFO | train_inner | epoch 003: 1936 / 3002 loss=2.479, ppl=5.57, wps=5914.1, ups=0.09, wpb=64770, bsz=128, num_updates=7889, lr=9.99449e-05, gnorm=2.287, loss_scale=1, train_wall=10, gb_free=2.8, wall=90322
2021-06-19 19:44:18 | INFO | train_inner | epoch 003: 1937 / 3002 loss=2.61, ppl=6.11, wps=5816.1, ups=0.09, wpb=64870, bsz=128, num_updates=7890, lr=9.99449e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=90333
2021-06-19 19:44:30 | INFO | train_inner | epoch 003: 1938 / 3002 loss=2.6, ppl=6.06, wps=5786.4, ups=0.09, wpb=64905, bsz=128, num_updates=7891, lr=9.99449e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=90344
2021-06-19 19:44:41 | INFO | train_inner | epoch 003: 1939 / 3002 loss=2.614, ppl=6.12, wps=5832.4, ups=0.09, wpb=64811, bsz=128, num_updates=7892, lr=9.99449e-05, gnorm=2.189, loss_scale=1, train_wall=11, gb_free=2.8, wall=90355
2021-06-19 19:44:52 | INFO | train_inner | epoch 003: 1940 / 3002 loss=2.6, ppl=6.06, wps=5888.5, ups=0.09, wpb=64772, bsz=128, num_updates=7893, lr=9.99449e-05, gnorm=6.39, loss_scale=1, train_wall=11, gb_free=2.8, wall=90366
2021-06-19 19:45:03 | INFO | train_inner | epoch 003: 1941 / 3002 loss=2.69, ppl=6.45, wps=5843.2, ups=0.09, wpb=64841, bsz=128, num_updates=7894, lr=9.99448e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=90377
2021-06-19 19:45:14 | INFO | train_inner | epoch 003: 1942 / 3002 loss=2.624, ppl=6.16, wps=5885.3, ups=0.09, wpb=64762, bsz=128, num_updates=7895, lr=9.99448e-05, gnorm=2.242, loss_scale=1, train_wall=11, gb_free=2.8, wall=90388
2021-06-19 19:45:25 | INFO | train_inner | epoch 003: 1943 / 3002 loss=2.659, ppl=6.32, wps=5840.4, ups=0.09, wpb=64849, bsz=128, num_updates=7896, lr=9.99448e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=90399
2021-06-19 19:45:36 | INFO | train_inner | epoch 003: 1944 / 3002 loss=2.605, ppl=6.08, wps=5894.2, ups=0.09, wpb=64771, bsz=128, num_updates=7897, lr=9.99448e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=90410
2021-06-19 19:45:47 | INFO | train_inner | epoch 003: 1945 / 3002 loss=2.527, ppl=5.76, wps=5918, ups=0.09, wpb=64764, bsz=128, num_updates=7898, lr=9.99448e-05, gnorm=2.02, loss_scale=1, train_wall=10, gb_free=2.8, wall=90421
2021-06-19 19:45:58 | INFO | train_inner | epoch 003: 1946 / 3002 loss=2.747, ppl=6.71, wps=5758.8, ups=0.09, wpb=64774, bsz=128, num_updates=7899, lr=9.99448e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=90433
2021-06-19 19:46:09 | INFO | train_inner | epoch 003: 1947 / 3002 loss=2.64, ppl=6.23, wps=5909.6, ups=0.09, wpb=64781, bsz=128, num_updates=7900, lr=9.99448e-05, gnorm=2.089, loss_scale=1, train_wall=10, gb_free=2.8, wall=90443
2021-06-19 19:46:20 | INFO | train_inner | epoch 003: 1948 / 3002 loss=2.645, ppl=6.25, wps=6025.3, ups=0.09, wpb=64930, bsz=128, num_updates=7901, lr=9.99448e-05, gnorm=2.142, loss_scale=1, train_wall=10, gb_free=2.8, wall=90454
2021-06-19 19:46:31 | INFO | train_inner | epoch 003: 1949 / 3002 loss=2.524, ppl=5.75, wps=5859.6, ups=0.09, wpb=64830, bsz=128, num_updates=7902, lr=9.99448e-05, gnorm=2.075, loss_scale=1, train_wall=11, gb_free=2.8, wall=90465
2021-06-19 19:46:42 | INFO | train_inner | epoch 003: 1950 / 3002 loss=2.655, ppl=6.3, wps=5864.1, ups=0.09, wpb=64794, bsz=128, num_updates=7903, lr=9.99448e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=90476
2021-06-19 19:46:53 | INFO | train_inner | epoch 003: 1951 / 3002 loss=2.559, ppl=5.89, wps=5824.2, ups=0.09, wpb=64816, bsz=128, num_updates=7904, lr=9.99448e-05, gnorm=2.272, loss_scale=1, train_wall=11, gb_free=2.8, wall=90488
2021-06-19 19:47:04 | INFO | train_inner | epoch 003: 1952 / 3002 loss=2.429, ppl=5.38, wps=5794.5, ups=0.09, wpb=64852, bsz=128, num_updates=7905, lr=9.99448e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=90499
2021-06-19 19:47:15 | INFO | train_inner | epoch 003: 1953 / 3002 loss=2.741, ppl=6.68, wps=5948.3, ups=0.09, wpb=64797, bsz=128, num_updates=7906, lr=9.99447e-05, gnorm=2.238, loss_scale=1, train_wall=10, gb_free=2.8, wall=90510
2021-06-19 19:47:26 | INFO | train_inner | epoch 003: 1954 / 3002 loss=2.644, ppl=6.25, wps=5768.4, ups=0.09, wpb=64833, bsz=128, num_updates=7907, lr=9.99447e-05, gnorm=2.186, loss_scale=1, train_wall=11, gb_free=2.8, wall=90521
2021-06-19 19:47:38 | INFO | train_inner | epoch 003: 1955 / 3002 loss=2.727, ppl=6.62, wps=5721.3, ups=0.09, wpb=64810, bsz=128, num_updates=7908, lr=9.99447e-05, gnorm=2.242, loss_scale=1, train_wall=11, gb_free=2.8, wall=90532
2021-06-19 19:47:49 | INFO | train_inner | epoch 003: 1956 / 3002 loss=2.649, ppl=6.27, wps=5958.4, ups=0.09, wpb=64780, bsz=128, num_updates=7909, lr=9.99447e-05, gnorm=3.367, loss_scale=1, train_wall=10, gb_free=2.8, wall=90543
2021-06-19 19:48:00 | INFO | train_inner | epoch 003: 1957 / 3002 loss=2.608, ppl=6.1, wps=5900.3, ups=0.09, wpb=64805, bsz=128, num_updates=7910, lr=9.99447e-05, gnorm=2.207, loss_scale=1, train_wall=11, gb_free=2.8, wall=90554
2021-06-19 19:48:11 | INFO | train_inner | epoch 003: 1958 / 3002 loss=2.682, ppl=6.42, wps=5920.4, ups=0.09, wpb=64877, bsz=128, num_updates=7911, lr=9.99447e-05, gnorm=2.202, loss_scale=1, train_wall=10, gb_free=2.8, wall=90565
2021-06-19 19:48:22 | INFO | train_inner | epoch 003: 1959 / 3002 loss=2.668, ppl=6.36, wps=5791.1, ups=0.09, wpb=64756, bsz=128, num_updates=7912, lr=9.99447e-05, gnorm=2.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=90576
2021-06-19 19:48:33 | INFO | train_inner | epoch 003: 1960 / 3002 loss=2.705, ppl=6.52, wps=5853.9, ups=0.09, wpb=64887, bsz=128, num_updates=7913, lr=9.99447e-05, gnorm=2.051, loss_scale=1, train_wall=11, gb_free=2.8, wall=90587
2021-06-19 19:48:44 | INFO | train_inner | epoch 003: 1961 / 3002 loss=2.804, ppl=6.98, wps=5903.2, ups=0.09, wpb=64793, bsz=128, num_updates=7914, lr=9.99447e-05, gnorm=2.23, loss_scale=1, train_wall=11, gb_free=2.8, wall=90598
2021-06-19 19:48:55 | INFO | train_inner | epoch 003: 1962 / 3002 loss=2.643, ppl=6.25, wps=5873.7, ups=0.09, wpb=64914, bsz=128, num_updates=7915, lr=9.99447e-05, gnorm=2.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=90609
2021-06-19 19:49:06 | INFO | train_inner | epoch 003: 1963 / 3002 loss=2.644, ppl=6.25, wps=5866.5, ups=0.09, wpb=64820, bsz=128, num_updates=7916, lr=9.99447e-05, gnorm=2.04, loss_scale=1, train_wall=11, gb_free=2.8, wall=90620
2021-06-19 19:49:17 | INFO | train_inner | epoch 003: 1964 / 3002 loss=2.478, ppl=5.57, wps=5854.1, ups=0.09, wpb=64822, bsz=128, num_updates=7917, lr=9.99447e-05, gnorm=2.103, loss_scale=1, train_wall=11, gb_free=2.8, wall=90631
2021-06-19 19:49:28 | INFO | train_inner | epoch 003: 1965 / 3002 loss=2.674, ppl=6.38, wps=5827, ups=0.09, wpb=64810, bsz=128, num_updates=7918, lr=9.99447e-05, gnorm=2.052, loss_scale=1, train_wall=11, gb_free=2.8, wall=90643
2021-06-19 19:49:39 | INFO | train_inner | epoch 003: 1966 / 3002 loss=2.592, ppl=6.03, wps=5838.5, ups=0.09, wpb=64887, bsz=128, num_updates=7919, lr=9.99446e-05, gnorm=2.09, loss_scale=1, train_wall=11, gb_free=2.8, wall=90654
2021-06-19 19:49:50 | INFO | train_inner | epoch 003: 1967 / 3002 loss=2.651, ppl=6.28, wps=5847.5, ups=0.09, wpb=64785, bsz=128, num_updates=7920, lr=9.99446e-05, gnorm=2.123, loss_scale=1, train_wall=11, gb_free=2.8, wall=90665
2021-06-19 19:50:02 | INFO | train_inner | epoch 003: 1968 / 3002 loss=2.6, ppl=6.06, wps=5795.7, ups=0.09, wpb=64911, bsz=128, num_updates=7921, lr=9.99446e-05, gnorm=5.107, loss_scale=1, train_wall=11, gb_free=2.8, wall=90676
2021-06-19 19:50:13 | INFO | train_inner | epoch 003: 1969 / 3002 loss=2.757, ppl=6.76, wps=5880.3, ups=0.09, wpb=64821, bsz=128, num_updates=7922, lr=9.99446e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=90687
2021-06-19 19:50:24 | INFO | train_inner | epoch 003: 1970 / 3002 loss=2.695, ppl=6.47, wps=5869.9, ups=0.09, wpb=64583, bsz=128, num_updates=7923, lr=9.99446e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=90698
2021-06-19 19:50:35 | INFO | train_inner | epoch 003: 1971 / 3002 loss=2.558, ppl=5.89, wps=5832.2, ups=0.09, wpb=64796, bsz=128, num_updates=7924, lr=9.99446e-05, gnorm=2.099, loss_scale=1, train_wall=11, gb_free=2.8, wall=90709
2021-06-19 19:50:46 | INFO | train_inner | epoch 003: 1972 / 3002 loss=2.657, ppl=6.31, wps=5872.9, ups=0.09, wpb=64892, bsz=128, num_updates=7925, lr=9.99446e-05, gnorm=2.06, loss_scale=1, train_wall=11, gb_free=2.8, wall=90720
2021-06-19 19:50:57 | INFO | train_inner | epoch 003: 1973 / 3002 loss=2.625, ppl=6.17, wps=5816.1, ups=0.09, wpb=64792, bsz=128, num_updates=7926, lr=9.99446e-05, gnorm=2.789, loss_scale=1, train_wall=11, gb_free=2.8, wall=90731
2021-06-19 19:51:08 | INFO | train_inner | epoch 003: 1974 / 3002 loss=2.49, ppl=5.62, wps=5716.4, ups=0.09, wpb=64859, bsz=128, num_updates=7927, lr=9.99446e-05, gnorm=2.099, loss_scale=1, train_wall=11, gb_free=2.8, wall=90743
2021-06-19 19:51:19 | INFO | train_inner | epoch 003: 1975 / 3002 loss=2.577, ppl=5.97, wps=5885.6, ups=0.09, wpb=64953, bsz=128, num_updates=7928, lr=9.99446e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=90754
2021-06-19 19:51:30 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-19 19:51:41 | INFO | train_inner | epoch 003: 1977 / 3002 loss=2.67, ppl=6.36, wps=2940.9, ups=0.05, wpb=64793, bsz=128, num_updates=7929, lr=9.99446e-05, gnorm=2.102, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=90776
2021-06-19 19:51:53 | INFO | train_inner | epoch 003: 1978 / 3002 loss=2.71, ppl=6.54, wps=5715.9, ups=0.09, wpb=64839, bsz=128, num_updates=7930, lr=9.99446e-05, gnorm=2.149, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90787
2021-06-19 19:52:04 | INFO | train_inner | epoch 003: 1979 / 3002 loss=2.637, ppl=6.22, wps=5811, ups=0.09, wpb=64859, bsz=128, num_updates=7931, lr=9.99445e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90798
2021-06-19 19:52:15 | INFO | train_inner | epoch 003: 1980 / 3002 loss=2.649, ppl=6.27, wps=5775.3, ups=0.09, wpb=64744, bsz=128, num_updates=7932, lr=9.99445e-05, gnorm=6.516, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90809
2021-06-19 19:52:26 | INFO | train_inner | epoch 003: 1981 / 3002 loss=2.698, ppl=6.49, wps=5848.4, ups=0.09, wpb=64891, bsz=128, num_updates=7933, lr=9.99445e-05, gnorm=2.192, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90820
2021-06-19 19:52:37 | INFO | train_inner | epoch 003: 1982 / 3002 loss=2.399, ppl=5.27, wps=5975.6, ups=0.09, wpb=64778, bsz=128, num_updates=7934, lr=9.99445e-05, gnorm=2.1, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=90831
2021-06-19 19:52:48 | INFO | train_inner | epoch 003: 1983 / 3002 loss=2.554, ppl=5.87, wps=5894.8, ups=0.09, wpb=64905, bsz=128, num_updates=7935, lr=9.99445e-05, gnorm=2.115, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90842
2021-06-19 19:52:59 | INFO | train_inner | epoch 003: 1984 / 3002 loss=2.634, ppl=6.21, wps=5783.3, ups=0.09, wpb=64834, bsz=128, num_updates=7936, lr=9.99445e-05, gnorm=2.127, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90854
2021-06-19 19:53:10 | INFO | train_inner | epoch 003: 1985 / 3002 loss=2.603, ppl=6.08, wps=5819.9, ups=0.09, wpb=64779, bsz=128, num_updates=7937, lr=9.99445e-05, gnorm=2.433, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90865
2021-06-19 19:53:21 | INFO | train_inner | epoch 003: 1986 / 3002 loss=2.624, ppl=6.17, wps=5957.3, ups=0.09, wpb=64933, bsz=128, num_updates=7938, lr=9.99445e-05, gnorm=2.093, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=90876
2021-06-19 19:53:32 | INFO | train_inner | epoch 003: 1987 / 3002 loss=2.579, ppl=5.97, wps=5831.8, ups=0.09, wpb=64776, bsz=128, num_updates=7939, lr=9.99445e-05, gnorm=2.127, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90887
2021-06-19 19:53:43 | INFO | train_inner | epoch 003: 1988 / 3002 loss=2.833, ppl=7.12, wps=5872.3, ups=0.09, wpb=64806, bsz=128, num_updates=7940, lr=9.99445e-05, gnorm=2.052, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90898
2021-06-19 19:53:54 | INFO | train_inner | epoch 003: 1989 / 3002 loss=2.66, ppl=6.32, wps=5857, ups=0.09, wpb=64848, bsz=128, num_updates=7941, lr=9.99445e-05, gnorm=2.111, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90909
2021-06-19 19:54:06 | INFO | train_inner | epoch 003: 1990 / 3002 loss=2.721, ppl=6.59, wps=5774.4, ups=0.09, wpb=64767, bsz=128, num_updates=7942, lr=9.99445e-05, gnorm=2.061, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90920
2021-06-19 19:54:17 | INFO | train_inner | epoch 003: 1991 / 3002 loss=2.616, ppl=6.13, wps=5758.8, ups=0.09, wpb=64804, bsz=128, num_updates=7943, lr=9.99445e-05, gnorm=2.444, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90931
2021-06-19 19:54:28 | INFO | train_inner | epoch 003: 1992 / 3002 loss=2.726, ppl=6.62, wps=5823.5, ups=0.09, wpb=64859, bsz=128, num_updates=7944, lr=9.99444e-05, gnorm=2.341, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90942
2021-06-19 19:54:39 | INFO | train_inner | epoch 003: 1993 / 3002 loss=2.656, ppl=6.3, wps=5827.9, ups=0.09, wpb=64823, bsz=128, num_updates=7945, lr=9.99444e-05, gnorm=2.119, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90953
2021-06-19 19:54:50 | INFO | train_inner | epoch 003: 1994 / 3002 loss=2.632, ppl=6.2, wps=5916.8, ups=0.09, wpb=64940, bsz=128, num_updates=7946, lr=9.99444e-05, gnorm=2.142, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90964
2021-06-19 19:55:01 | INFO | train_inner | epoch 003: 1995 / 3002 loss=2.783, ppl=6.88, wps=5846.8, ups=0.09, wpb=64791, bsz=128, num_updates=7947, lr=9.99444e-05, gnorm=2.253, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90976
2021-06-19 19:55:12 | INFO | train_inner | epoch 003: 1996 / 3002 loss=2.601, ppl=6.07, wps=5804.6, ups=0.09, wpb=64830, bsz=128, num_updates=7948, lr=9.99444e-05, gnorm=2.091, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90987
2021-06-19 19:55:24 | INFO | train_inner | epoch 003: 1997 / 3002 loss=2.713, ppl=6.56, wps=5729.6, ups=0.09, wpb=64831, bsz=128, num_updates=7949, lr=9.99444e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=90998
2021-06-19 19:55:35 | INFO | train_inner | epoch 003: 1998 / 3002 loss=2.753, ppl=6.74, wps=5922, ups=0.09, wpb=64830, bsz=128, num_updates=7950, lr=9.99444e-05, gnorm=2.255, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91009
2021-06-19 19:55:46 | INFO | train_inner | epoch 003: 1999 / 3002 loss=2.849, ppl=7.21, wps=5846, ups=0.09, wpb=64767, bsz=128, num_updates=7951, lr=9.99444e-05, gnorm=2.456, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91020
2021-06-19 19:55:57 | INFO | train_inner | epoch 003: 2000 / 3002 loss=2.612, ppl=6.12, wps=5864.9, ups=0.09, wpb=64737, bsz=128, num_updates=7952, lr=9.99444e-05, gnorm=2.124, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91031
2021-06-19 19:56:08 | INFO | train_inner | epoch 003: 2001 / 3002 loss=2.516, ppl=5.72, wps=5907, ups=0.09, wpb=64835, bsz=128, num_updates=7953, lr=9.99444e-05, gnorm=2.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91042
2021-06-19 19:56:19 | INFO | train_inner | epoch 003: 2002 / 3002 loss=2.615, ppl=6.13, wps=5831.2, ups=0.09, wpb=64870, bsz=128, num_updates=7954, lr=9.99444e-05, gnorm=2.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91053
2021-06-19 19:56:30 | INFO | train_inner | epoch 003: 2003 / 3002 loss=2.899, ppl=7.46, wps=5860.5, ups=0.09, wpb=64701, bsz=128, num_updates=7955, lr=9.99444e-05, gnorm=2.142, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91064
2021-06-19 19:56:41 | INFO | train_inner | epoch 003: 2004 / 3002 loss=2.748, ppl=6.72, wps=5852.7, ups=0.09, wpb=64805, bsz=128, num_updates=7956, lr=9.99443e-05, gnorm=2.11, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91075
2021-06-19 19:56:52 | INFO | train_inner | epoch 003: 2005 / 3002 loss=2.565, ppl=5.92, wps=5866.6, ups=0.09, wpb=64809, bsz=128, num_updates=7957, lr=9.99443e-05, gnorm=2.061, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91086
2021-06-19 19:57:03 | INFO | train_inner | epoch 003: 2006 / 3002 loss=2.523, ppl=5.75, wps=5907.9, ups=0.09, wpb=64853, bsz=128, num_updates=7958, lr=9.99443e-05, gnorm=91.847, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91097
2021-06-19 19:57:14 | INFO | train_inner | epoch 003: 2007 / 3002 loss=2.5, ppl=5.66, wps=5894.6, ups=0.09, wpb=64839, bsz=128, num_updates=7959, lr=9.99443e-05, gnorm=2.072, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91108
2021-06-19 19:57:25 | INFO | train_inner | epoch 003: 2008 / 3002 loss=2.631, ppl=6.19, wps=5857.7, ups=0.09, wpb=64819, bsz=128, num_updates=7960, lr=9.99443e-05, gnorm=2.15, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91119
2021-06-19 19:57:36 | INFO | train_inner | epoch 003: 2009 / 3002 loss=2.763, ppl=6.79, wps=6010.6, ups=0.09, wpb=64921, bsz=128, num_updates=7961, lr=9.99443e-05, gnorm=2.142, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91130
2021-06-19 19:57:47 | INFO | train_inner | epoch 003: 2010 / 3002 loss=2.57, ppl=5.94, wps=5781.7, ups=0.09, wpb=64858, bsz=128, num_updates=7962, lr=9.99443e-05, gnorm=2.263, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91141
2021-06-19 19:57:58 | INFO | train_inner | epoch 003: 2011 / 3002 loss=2.6, ppl=6.06, wps=5889.8, ups=0.09, wpb=64853, bsz=128, num_updates=7963, lr=9.99443e-05, gnorm=2.288, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91152
2021-06-19 19:58:09 | INFO | train_inner | epoch 003: 2012 / 3002 loss=2.651, ppl=6.28, wps=5691.3, ups=0.09, wpb=64830, bsz=128, num_updates=7964, lr=9.99443e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91164
2021-06-19 19:58:21 | INFO | train_inner | epoch 003: 2013 / 3002 loss=2.85, ppl=7.21, wps=5789.6, ups=0.09, wpb=64799, bsz=128, num_updates=7965, lr=9.99443e-05, gnorm=3.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91175
2021-06-19 19:58:32 | INFO | train_inner | epoch 003: 2014 / 3002 loss=2.651, ppl=6.28, wps=5863.8, ups=0.09, wpb=64843, bsz=128, num_updates=7966, lr=9.99443e-05, gnorm=2.193, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91186
2021-06-19 19:58:43 | INFO | train_inner | epoch 003: 2015 / 3002 loss=2.704, ppl=6.52, wps=5844.9, ups=0.09, wpb=64782, bsz=128, num_updates=7967, lr=9.99443e-05, gnorm=2.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91197
2021-06-19 19:58:54 | INFO | train_inner | epoch 003: 2016 / 3002 loss=2.747, ppl=6.71, wps=5911.1, ups=0.09, wpb=64882, bsz=128, num_updates=7968, lr=9.99443e-05, gnorm=7.11, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91208
2021-06-19 19:59:05 | INFO | train_inner | epoch 003: 2017 / 3002 loss=2.668, ppl=6.36, wps=5838.9, ups=0.09, wpb=64868, bsz=128, num_updates=7969, lr=9.99442e-05, gnorm=2.147, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91219
2021-06-19 19:59:16 | INFO | train_inner | epoch 003: 2018 / 3002 loss=2.639, ppl=6.23, wps=5851.6, ups=0.09, wpb=64811, bsz=128, num_updates=7970, lr=9.99442e-05, gnorm=2.196, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91230
2021-06-19 19:59:27 | INFO | train_inner | epoch 003: 2019 / 3002 loss=2.534, ppl=5.79, wps=5898.2, ups=0.09, wpb=64888, bsz=128, num_updates=7971, lr=9.99442e-05, gnorm=5.823, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91241
2021-06-19 19:59:38 | INFO | train_inner | epoch 003: 2020 / 3002 loss=2.64, ppl=6.23, wps=5951.3, ups=0.09, wpb=64863, bsz=128, num_updates=7972, lr=9.99442e-05, gnorm=2.382, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91252
2021-06-19 19:59:49 | INFO | train_inner | epoch 003: 2021 / 3002 loss=2.746, ppl=6.71, wps=5867.5, ups=0.09, wpb=64795, bsz=128, num_updates=7973, lr=9.99442e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91263
2021-06-19 20:00:00 | INFO | train_inner | epoch 003: 2022 / 3002 loss=2.523, ppl=5.75, wps=5851.8, ups=0.09, wpb=64781, bsz=128, num_updates=7974, lr=9.99442e-05, gnorm=2.203, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91274
2021-06-19 20:00:11 | INFO | train_inner | epoch 003: 2023 / 3002 loss=2.832, ppl=7.12, wps=5930.5, ups=0.09, wpb=64759, bsz=128, num_updates=7975, lr=9.99442e-05, gnorm=2.178, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91285
2021-06-19 20:00:22 | INFO | train_inner | epoch 003: 2024 / 3002 loss=2.617, ppl=6.13, wps=5863.7, ups=0.09, wpb=64811, bsz=128, num_updates=7976, lr=9.99442e-05, gnorm=2.125, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91296
2021-06-19 20:00:33 | INFO | train_inner | epoch 003: 2025 / 3002 loss=2.473, ppl=5.55, wps=5792.4, ups=0.09, wpb=64876, bsz=128, num_updates=7977, lr=9.99442e-05, gnorm=2.252, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91308
2021-06-19 20:00:44 | INFO | train_inner | epoch 003: 2026 / 3002 loss=2.647, ppl=6.26, wps=5836.4, ups=0.09, wpb=64858, bsz=128, num_updates=7978, lr=9.99442e-05, gnorm=2.252, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91319
2021-06-19 20:00:55 | INFO | train_inner | epoch 003: 2027 / 3002 loss=2.639, ppl=6.23, wps=5859, ups=0.09, wpb=64823, bsz=128, num_updates=7979, lr=9.99442e-05, gnorm=2.387, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91330
2021-06-19 20:01:06 | INFO | train_inner | epoch 003: 2028 / 3002 loss=2.651, ppl=6.28, wps=5866.8, ups=0.09, wpb=64837, bsz=128, num_updates=7980, lr=9.99442e-05, gnorm=2.307, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91341
2021-06-19 20:01:17 | INFO | train_inner | epoch 003: 2029 / 3002 loss=2.689, ppl=6.45, wps=5899.5, ups=0.09, wpb=64804, bsz=128, num_updates=7981, lr=9.99441e-05, gnorm=2.269, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91352
2021-06-19 20:01:28 | INFO | train_inner | epoch 003: 2030 / 3002 loss=2.856, ppl=7.24, wps=5887.1, ups=0.09, wpb=64909, bsz=128, num_updates=7982, lr=9.99441e-05, gnorm=2.816, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91363
2021-06-19 20:01:39 | INFO | train_inner | epoch 003: 2031 / 3002 loss=2.726, ppl=6.62, wps=5896.3, ups=0.09, wpb=64768, bsz=128, num_updates=7983, lr=9.99441e-05, gnorm=2.176, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91374
2021-06-19 20:01:51 | INFO | train_inner | epoch 003: 2032 / 3002 loss=2.768, ppl=6.81, wps=5753.2, ups=0.09, wpb=64793, bsz=128, num_updates=7984, lr=9.99441e-05, gnorm=2.275, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91385
2021-06-19 20:02:02 | INFO | train_inner | epoch 003: 2033 / 3002 loss=2.608, ppl=6.1, wps=5876.1, ups=0.09, wpb=64840, bsz=128, num_updates=7985, lr=9.99441e-05, gnorm=2.167, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91396
2021-06-19 20:02:13 | INFO | train_inner | epoch 003: 2034 / 3002 loss=2.562, ppl=5.91, wps=5972.3, ups=0.09, wpb=64819, bsz=128, num_updates=7986, lr=9.99441e-05, gnorm=2.161, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91407
2021-06-19 20:02:24 | INFO | train_inner | epoch 003: 2035 / 3002 loss=2.621, ppl=6.15, wps=5890.4, ups=0.09, wpb=64888, bsz=128, num_updates=7987, lr=9.99441e-05, gnorm=2.36, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91418
2021-06-19 20:02:35 | INFO | train_inner | epoch 003: 2036 / 3002 loss=2.608, ppl=6.1, wps=5837.9, ups=0.09, wpb=64818, bsz=128, num_updates=7988, lr=9.99441e-05, gnorm=2.131, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91429
2021-06-19 20:02:46 | INFO | train_inner | epoch 003: 2037 / 3002 loss=2.599, ppl=6.06, wps=5837.3, ups=0.09, wpb=64841, bsz=128, num_updates=7989, lr=9.99441e-05, gnorm=2.114, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91440
2021-06-19 20:02:57 | INFO | train_inner | epoch 003: 2038 / 3002 loss=2.534, ppl=5.79, wps=5735.7, ups=0.09, wpb=64766, bsz=128, num_updates=7990, lr=9.99441e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91451
2021-06-19 20:03:08 | INFO | train_inner | epoch 003: 2039 / 3002 loss=2.643, ppl=6.25, wps=5777.2, ups=0.09, wpb=64797, bsz=128, num_updates=7991, lr=9.99441e-05, gnorm=2.682, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91463
2021-06-19 20:03:19 | INFO | train_inner | epoch 003: 2040 / 3002 loss=2.617, ppl=6.13, wps=5962.3, ups=0.09, wpb=64823, bsz=128, num_updates=7992, lr=9.99441e-05, gnorm=2.098, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91474
2021-06-19 20:03:30 | INFO | train_inner | epoch 003: 2041 / 3002 loss=2.608, ppl=6.1, wps=5963.3, ups=0.09, wpb=64833, bsz=128, num_updates=7993, lr=9.99441e-05, gnorm=2.741, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91484
2021-06-19 20:03:41 | INFO | train_inner | epoch 003: 2042 / 3002 loss=2.617, ppl=6.13, wps=5963.7, ups=0.09, wpb=64797, bsz=128, num_updates=7994, lr=9.9944e-05, gnorm=2.106, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91495
2021-06-19 20:03:52 | INFO | train_inner | epoch 003: 2043 / 3002 loss=2.64, ppl=6.23, wps=5792, ups=0.09, wpb=64860, bsz=128, num_updates=7995, lr=9.9944e-05, gnorm=2.085, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91506
2021-06-19 20:04:03 | INFO | train_inner | epoch 003: 2044 / 3002 loss=2.719, ppl=6.58, wps=5770.1, ups=0.09, wpb=64788, bsz=128, num_updates=7996, lr=9.9944e-05, gnorm=2.381, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91518
2021-06-19 20:04:14 | INFO | train_inner | epoch 003: 2045 / 3002 loss=2.557, ppl=5.88, wps=5865.8, ups=0.09, wpb=64863, bsz=128, num_updates=7997, lr=9.9944e-05, gnorm=2.109, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91529
2021-06-19 20:04:25 | INFO | train_inner | epoch 003: 2046 / 3002 loss=2.55, ppl=5.86, wps=5843, ups=0.09, wpb=64878, bsz=128, num_updates=7998, lr=9.9944e-05, gnorm=2.129, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91540
2021-06-19 20:04:37 | INFO | train_inner | epoch 003: 2047 / 3002 loss=2.526, ppl=5.76, wps=5801.7, ups=0.09, wpb=64836, bsz=128, num_updates=7999, lr=9.9944e-05, gnorm=2.044, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91551
2021-06-19 20:04:48 | INFO | train_inner | epoch 003: 2048 / 3002 loss=2.642, ppl=6.24, wps=5967.2, ups=0.09, wpb=64909, bsz=128, num_updates=8000, lr=9.9944e-05, gnorm=2.171, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91562
2021-06-19 20:04:58 | INFO | train_inner | epoch 003: 2049 / 3002 loss=2.75, ppl=6.73, wps=5970.6, ups=0.09, wpb=64810, bsz=128, num_updates=8001, lr=9.9944e-05, gnorm=2.108, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91573
2021-06-19 20:05:09 | INFO | train_inner | epoch 003: 2050 / 3002 loss=2.669, ppl=6.36, wps=5899, ups=0.09, wpb=64889, bsz=128, num_updates=8002, lr=9.9944e-05, gnorm=2.205, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91584
2021-06-19 20:05:20 | INFO | train_inner | epoch 003: 2051 / 3002 loss=2.74, ppl=6.68, wps=5930.2, ups=0.09, wpb=64786, bsz=128, num_updates=8003, lr=9.9944e-05, gnorm=2.678, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91595
2021-06-19 20:05:31 | INFO | train_inner | epoch 003: 2052 / 3002 loss=2.644, ppl=6.25, wps=5854.1, ups=0.09, wpb=64827, bsz=128, num_updates=8004, lr=9.9944e-05, gnorm=2.206, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91606
2021-06-19 20:05:42 | INFO | train_inner | epoch 003: 2053 / 3002 loss=2.545, ppl=5.84, wps=5875, ups=0.09, wpb=64817, bsz=128, num_updates=8005, lr=9.9944e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91617
2021-06-19 20:05:53 | INFO | train_inner | epoch 003: 2054 / 3002 loss=2.598, ppl=6.05, wps=5882, ups=0.09, wpb=64739, bsz=128, num_updates=8006, lr=9.99439e-05, gnorm=2.027, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91628
2021-06-19 20:06:05 | INFO | train_inner | epoch 003: 2055 / 3002 loss=2.748, ppl=6.72, wps=5747.6, ups=0.09, wpb=64757, bsz=128, num_updates=8007, lr=9.99439e-05, gnorm=2.017, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91639
2021-06-19 20:06:16 | INFO | train_inner | epoch 003: 2056 / 3002 loss=2.768, ppl=6.81, wps=5838.8, ups=0.09, wpb=64806, bsz=128, num_updates=8008, lr=9.99439e-05, gnorm=2.124, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91650
2021-06-19 20:06:27 | INFO | train_inner | epoch 003: 2057 / 3002 loss=2.639, ppl=6.23, wps=6003.3, ups=0.09, wpb=64908, bsz=128, num_updates=8009, lr=9.99439e-05, gnorm=2.065, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91661
2021-06-19 20:06:38 | INFO | train_inner | epoch 003: 2058 / 3002 loss=2.693, ppl=6.46, wps=5904.7, ups=0.09, wpb=64770, bsz=128, num_updates=8010, lr=9.99439e-05, gnorm=2.114, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91672
2021-06-19 20:06:49 | INFO | train_inner | epoch 003: 2059 / 3002 loss=2.549, ppl=5.85, wps=5835.3, ups=0.09, wpb=64899, bsz=128, num_updates=8011, lr=9.99439e-05, gnorm=2.573, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91683
2021-06-19 20:07:00 | INFO | train_inner | epoch 003: 2060 / 3002 loss=2.566, ppl=5.92, wps=5859.1, ups=0.09, wpb=64839, bsz=128, num_updates=8012, lr=9.99439e-05, gnorm=2.108, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91694
2021-06-19 20:07:11 | INFO | train_inner | epoch 003: 2061 / 3002 loss=2.51, ppl=5.7, wps=5822.1, ups=0.09, wpb=64859, bsz=128, num_updates=8013, lr=9.99439e-05, gnorm=2.018, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91705
2021-06-19 20:07:22 | INFO | train_inner | epoch 003: 2062 / 3002 loss=2.592, ppl=6.03, wps=5791.4, ups=0.09, wpb=64868, bsz=128, num_updates=8014, lr=9.99439e-05, gnorm=2.069, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91716
2021-06-19 20:07:33 | INFO | train_inner | epoch 003: 2063 / 3002 loss=2.741, ppl=6.68, wps=5865.7, ups=0.09, wpb=64795, bsz=128, num_updates=8015, lr=9.99439e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91728
2021-06-19 20:07:44 | INFO | train_inner | epoch 003: 2064 / 3002 loss=2.658, ppl=6.31, wps=5798.8, ups=0.09, wpb=64783, bsz=128, num_updates=8016, lr=9.99439e-05, gnorm=1.98, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91739
2021-06-19 20:07:55 | INFO | train_inner | epoch 003: 2065 / 3002 loss=2.771, ppl=6.83, wps=5864.9, ups=0.09, wpb=64733, bsz=128, num_updates=8017, lr=9.99439e-05, gnorm=4.33, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91750
2021-06-19 20:08:07 | INFO | train_inner | epoch 003: 2066 / 3002 loss=2.589, ppl=6.02, wps=5820.4, ups=0.09, wpb=64744, bsz=128, num_updates=8018, lr=9.99439e-05, gnorm=2.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91761
2021-06-19 20:08:18 | INFO | train_inner | epoch 003: 2067 / 3002 loss=2.517, ppl=5.72, wps=5783.8, ups=0.09, wpb=64800, bsz=128, num_updates=8019, lr=9.99438e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91772
2021-06-19 20:08:29 | INFO | train_inner | epoch 003: 2068 / 3002 loss=2.611, ppl=6.11, wps=5738.3, ups=0.09, wpb=64830, bsz=128, num_updates=8020, lr=9.99438e-05, gnorm=2.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91783
2021-06-19 20:08:40 | INFO | train_inner | epoch 003: 2069 / 3002 loss=2.72, ppl=6.59, wps=5779, ups=0.09, wpb=64773, bsz=128, num_updates=8021, lr=9.99438e-05, gnorm=2.137, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91795
2021-06-19 20:08:51 | INFO | train_inner | epoch 003: 2070 / 3002 loss=2.424, ppl=5.37, wps=5792.9, ups=0.09, wpb=64811, bsz=128, num_updates=8022, lr=9.99438e-05, gnorm=1.973, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91806
2021-06-19 20:09:03 | INFO | train_inner | epoch 003: 2071 / 3002 loss=2.746, ppl=6.71, wps=5751.1, ups=0.09, wpb=64850, bsz=128, num_updates=8023, lr=9.99438e-05, gnorm=2.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91817
2021-06-19 20:09:14 | INFO | train_inner | epoch 003: 2072 / 3002 loss=2.731, ppl=6.64, wps=5883, ups=0.09, wpb=64830, bsz=128, num_updates=8024, lr=9.99438e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91828
2021-06-19 20:09:25 | INFO | train_inner | epoch 003: 2073 / 3002 loss=2.467, ppl=5.53, wps=5972.3, ups=0.09, wpb=64839, bsz=128, num_updates=8025, lr=9.99438e-05, gnorm=2.049, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91839
2021-06-19 20:09:36 | INFO | train_inner | epoch 003: 2074 / 3002 loss=2.465, ppl=5.52, wps=5772.6, ups=0.09, wpb=64755, bsz=128, num_updates=8026, lr=9.99438e-05, gnorm=2.013, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91850
2021-06-19 20:09:47 | INFO | train_inner | epoch 003: 2075 / 3002 loss=2.765, ppl=6.8, wps=5866.5, ups=0.09, wpb=64796, bsz=128, num_updates=8027, lr=9.99438e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91861
2021-06-19 20:09:58 | INFO | train_inner | epoch 003: 2076 / 3002 loss=2.633, ppl=6.21, wps=5745.8, ups=0.09, wpb=64788, bsz=128, num_updates=8028, lr=9.99438e-05, gnorm=2.154, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91872
2021-06-19 20:10:09 | INFO | train_inner | epoch 003: 2077 / 3002 loss=2.627, ppl=6.18, wps=5787.6, ups=0.09, wpb=64859, bsz=128, num_updates=8029, lr=9.99438e-05, gnorm=2.099, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91884
2021-06-19 20:10:20 | INFO | train_inner | epoch 003: 2078 / 3002 loss=2.517, ppl=5.73, wps=5897.3, ups=0.09, wpb=64861, bsz=128, num_updates=8030, lr=9.99438e-05, gnorm=2.106, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91895
2021-06-19 20:10:31 | INFO | train_inner | epoch 003: 2079 / 3002 loss=2.414, ppl=5.33, wps=5901.8, ups=0.09, wpb=64881, bsz=128, num_updates=8031, lr=9.99437e-05, gnorm=1.972, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91906
2021-06-19 20:10:42 | INFO | train_inner | epoch 003: 2080 / 3002 loss=2.64, ppl=6.23, wps=5918.3, ups=0.09, wpb=64859, bsz=128, num_updates=8032, lr=9.99437e-05, gnorm=2.059, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91917
2021-06-19 20:10:53 | INFO | train_inner | epoch 003: 2081 / 3002 loss=2.491, ppl=5.62, wps=5923.4, ups=0.09, wpb=64894, bsz=128, num_updates=8033, lr=9.99437e-05, gnorm=2.241, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91928
2021-06-19 20:11:04 | INFO | train_inner | epoch 003: 2082 / 3002 loss=2.63, ppl=6.19, wps=5855.7, ups=0.09, wpb=64884, bsz=128, num_updates=8034, lr=9.99437e-05, gnorm=2.048, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91939
2021-06-19 20:11:15 | INFO | train_inner | epoch 003: 2083 / 3002 loss=2.576, ppl=5.96, wps=5921.3, ups=0.09, wpb=64870, bsz=128, num_updates=8035, lr=9.99437e-05, gnorm=2.025, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=91950
2021-06-19 20:11:26 | INFO | train_inner | epoch 003: 2084 / 3002 loss=2.514, ppl=5.71, wps=5812.9, ups=0.09, wpb=64802, bsz=128, num_updates=8036, lr=9.99437e-05, gnorm=2.125, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91961
2021-06-19 20:11:37 | INFO | train_inner | epoch 003: 2085 / 3002 loss=2.631, ppl=6.19, wps=5918.1, ups=0.09, wpb=64878, bsz=128, num_updates=8037, lr=9.99437e-05, gnorm=2.065, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91972
2021-06-19 20:11:49 | INFO | train_inner | epoch 003: 2086 / 3002 loss=2.568, ppl=5.93, wps=5795.7, ups=0.09, wpb=64759, bsz=128, num_updates=8038, lr=9.99437e-05, gnorm=2.004, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91983
2021-06-19 20:12:00 | INFO | train_inner | epoch 003: 2087 / 3002 loss=2.657, ppl=6.31, wps=5869.1, ups=0.09, wpb=64756, bsz=128, num_updates=8039, lr=9.99437e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=91994
2021-06-19 20:12:11 | INFO | train_inner | epoch 003: 2088 / 3002 loss=2.597, ppl=6.05, wps=5930.5, ups=0.09, wpb=64908, bsz=128, num_updates=8040, lr=9.99437e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92005
2021-06-19 20:12:22 | INFO | train_inner | epoch 003: 2089 / 3002 loss=2.671, ppl=6.37, wps=5865.9, ups=0.09, wpb=64801, bsz=128, num_updates=8041, lr=9.99437e-05, gnorm=2.026, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92016
2021-06-19 20:12:33 | INFO | train_inner | epoch 003: 2090 / 3002 loss=2.426, ppl=5.37, wps=5787.1, ups=0.09, wpb=64851, bsz=128, num_updates=8042, lr=9.99437e-05, gnorm=2.126, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92027
2021-06-19 20:12:44 | INFO | train_inner | epoch 003: 2091 / 3002 loss=2.732, ppl=6.65, wps=5879.2, ups=0.09, wpb=64867, bsz=128, num_updates=8043, lr=9.99437e-05, gnorm=2.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92038
2021-06-19 20:12:55 | INFO | train_inner | epoch 003: 2092 / 3002 loss=2.527, ppl=5.76, wps=5880.9, ups=0.09, wpb=64857, bsz=128, num_updates=8044, lr=9.99436e-05, gnorm=2.005, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92049
2021-06-19 20:13:06 | INFO | train_inner | epoch 003: 2093 / 3002 loss=2.614, ppl=6.12, wps=5909.9, ups=0.09, wpb=64862, bsz=128, num_updates=8045, lr=9.99436e-05, gnorm=2.138, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92060
2021-06-19 20:13:17 | INFO | train_inner | epoch 003: 2094 / 3002 loss=2.47, ppl=5.54, wps=5769.2, ups=0.09, wpb=64839, bsz=128, num_updates=8046, lr=9.99436e-05, gnorm=2.043, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92071
2021-06-19 20:13:28 | INFO | train_inner | epoch 003: 2095 / 3002 loss=2.581, ppl=5.98, wps=5789.9, ups=0.09, wpb=64783, bsz=128, num_updates=8047, lr=9.99436e-05, gnorm=5.256, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92083
2021-06-19 20:13:40 | INFO | train_inner | epoch 003: 2096 / 3002 loss=2.646, ppl=6.26, wps=5744.5, ups=0.09, wpb=64832, bsz=128, num_updates=8048, lr=9.99436e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92094
2021-06-19 20:13:51 | INFO | train_inner | epoch 003: 2097 / 3002 loss=2.557, ppl=5.88, wps=5818.2, ups=0.09, wpb=64848, bsz=128, num_updates=8049, lr=9.99436e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92105
2021-06-19 20:14:02 | INFO | train_inner | epoch 003: 2098 / 3002 loss=2.596, ppl=6.05, wps=5847.5, ups=0.09, wpb=64834, bsz=128, num_updates=8050, lr=9.99436e-05, gnorm=2.046, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92116
2021-06-19 20:14:13 | INFO | train_inner | epoch 003: 2099 / 3002 loss=2.611, ppl=6.11, wps=5772.2, ups=0.09, wpb=64844, bsz=128, num_updates=8051, lr=9.99436e-05, gnorm=2.088, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92127
2021-06-19 20:14:24 | INFO | train_inner | epoch 003: 2100 / 3002 loss=2.537, ppl=5.8, wps=5876.5, ups=0.09, wpb=64911, bsz=128, num_updates=8052, lr=9.99436e-05, gnorm=2.148, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92138
2021-06-19 20:14:35 | INFO | train_inner | epoch 003: 2101 / 3002 loss=2.548, ppl=5.85, wps=5862.5, ups=0.09, wpb=64849, bsz=128, num_updates=8053, lr=9.99436e-05, gnorm=2.473, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92149
2021-06-19 20:14:46 | INFO | train_inner | epoch 003: 2102 / 3002 loss=2.674, ppl=6.38, wps=5859.3, ups=0.09, wpb=64754, bsz=128, num_updates=8054, lr=9.99436e-05, gnorm=2.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92160
2021-06-19 20:14:57 | INFO | train_inner | epoch 003: 2103 / 3002 loss=2.65, ppl=6.27, wps=5817.2, ups=0.09, wpb=64821, bsz=128, num_updates=8055, lr=9.99436e-05, gnorm=2.766, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92172
2021-06-19 20:15:08 | INFO | train_inner | epoch 003: 2104 / 3002 loss=2.675, ppl=6.39, wps=5916.2, ups=0.09, wpb=64859, bsz=128, num_updates=8056, lr=9.99435e-05, gnorm=2.066, loss_scale=1, train_wall=11, gb_free=2.8, wall=92183
2021-06-19 20:15:19 | INFO | train_inner | epoch 003: 2105 / 3002 loss=2.629, ppl=6.19, wps=5936, ups=0.09, wpb=64804, bsz=128, num_updates=8057, lr=9.99435e-05, gnorm=4.902, loss_scale=1, train_wall=10, gb_free=2.8, wall=92193
2021-06-19 20:15:30 | INFO | train_inner | epoch 003: 2106 / 3002 loss=2.825, ppl=7.09, wps=5820.6, ups=0.09, wpb=64810, bsz=128, num_updates=8058, lr=9.99435e-05, gnorm=2.262, loss_scale=1, train_wall=11, gb_free=2.8, wall=92205
2021-06-19 20:15:41 | INFO | train_inner | epoch 003: 2107 / 3002 loss=2.457, ppl=5.49, wps=5792.7, ups=0.09, wpb=64830, bsz=128, num_updates=8059, lr=9.99435e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=92216
2021-06-19 20:15:53 | INFO | train_inner | epoch 003: 2108 / 3002 loss=2.576, ppl=5.96, wps=5888.1, ups=0.09, wpb=64875, bsz=128, num_updates=8060, lr=9.99435e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=92227
2021-06-19 20:16:04 | INFO | train_inner | epoch 003: 2109 / 3002 loss=2.651, ppl=6.28, wps=5870.4, ups=0.09, wpb=64838, bsz=128, num_updates=8061, lr=9.99435e-05, gnorm=2.307, loss_scale=1, train_wall=11, gb_free=2.8, wall=92238
2021-06-19 20:16:15 | INFO | train_inner | epoch 003: 2110 / 3002 loss=2.723, ppl=6.6, wps=5841.8, ups=0.09, wpb=64818, bsz=128, num_updates=8062, lr=9.99435e-05, gnorm=6.586, loss_scale=1, train_wall=11, gb_free=2.8, wall=92249
2021-06-19 20:16:26 | INFO | train_inner | epoch 003: 2111 / 3002 loss=2.734, ppl=6.65, wps=5835.9, ups=0.09, wpb=64686, bsz=128, num_updates=8063, lr=9.99435e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=92260
2021-06-19 20:16:37 | INFO | train_inner | epoch 003: 2112 / 3002 loss=2.593, ppl=6.03, wps=5927.6, ups=0.09, wpb=64896, bsz=128, num_updates=8064, lr=9.99435e-05, gnorm=2.256, loss_scale=1, train_wall=10, gb_free=2.8, wall=92271
2021-06-19 20:16:48 | INFO | train_inner | epoch 003: 2113 / 3002 loss=2.698, ppl=6.49, wps=5927, ups=0.09, wpb=64880, bsz=128, num_updates=8065, lr=9.99435e-05, gnorm=2.336, loss_scale=1, train_wall=11, gb_free=2.8, wall=92282
2021-06-19 20:16:59 | INFO | train_inner | epoch 003: 2114 / 3002 loss=2.647, ppl=6.27, wps=5789.5, ups=0.09, wpb=64806, bsz=128, num_updates=8066, lr=9.99435e-05, gnorm=2.222, loss_scale=1, train_wall=11, gb_free=2.8, wall=92293
2021-06-19 20:17:10 | INFO | train_inner | epoch 003: 2115 / 3002 loss=2.514, ppl=5.71, wps=5799.7, ups=0.09, wpb=64846, bsz=128, num_updates=8067, lr=9.99435e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=92304
2021-06-19 20:17:21 | INFO | train_inner | epoch 003: 2116 / 3002 loss=2.586, ppl=6.01, wps=5903.8, ups=0.09, wpb=64792, bsz=128, num_updates=8068, lr=9.99435e-05, gnorm=11.013, loss_scale=1, train_wall=11, gb_free=2.8, wall=92315
2021-06-19 20:17:32 | INFO | train_inner | epoch 003: 2117 / 3002 loss=2.65, ppl=6.27, wps=5766.3, ups=0.09, wpb=64815, bsz=128, num_updates=8069, lr=9.99434e-05, gnorm=3.09, loss_scale=1, train_wall=11, gb_free=2.8, wall=92327
2021-06-19 20:17:43 | INFO | train_inner | epoch 003: 2118 / 3002 loss=2.64, ppl=6.23, wps=5833.9, ups=0.09, wpb=64768, bsz=128, num_updates=8070, lr=9.99434e-05, gnorm=4.879, loss_scale=1, train_wall=11, gb_free=2.8, wall=92338
2021-06-19 20:17:54 | INFO | train_inner | epoch 003: 2119 / 3002 loss=2.704, ppl=6.52, wps=5838.8, ups=0.09, wpb=64871, bsz=128, num_updates=8071, lr=9.99434e-05, gnorm=2.218, loss_scale=1, train_wall=11, gb_free=2.8, wall=92349
2021-06-19 20:18:06 | INFO | train_inner | epoch 003: 2120 / 3002 loss=2.635, ppl=6.21, wps=5761.3, ups=0.09, wpb=64879, bsz=128, num_updates=8072, lr=9.99434e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=92360
2021-06-19 20:18:17 | INFO | train_inner | epoch 003: 2121 / 3002 loss=2.717, ppl=6.57, wps=5749.9, ups=0.09, wpb=64816, bsz=128, num_updates=8073, lr=9.99434e-05, gnorm=2.538, loss_scale=1, train_wall=11, gb_free=2.8, wall=92371
2021-06-19 20:18:28 | INFO | train_inner | epoch 003: 2122 / 3002 loss=2.637, ppl=6.22, wps=5762.5, ups=0.09, wpb=64881, bsz=128, num_updates=8074, lr=9.99434e-05, gnorm=2.292, loss_scale=1, train_wall=11, gb_free=2.8, wall=92383
2021-06-19 20:18:39 | INFO | train_inner | epoch 003: 2123 / 3002 loss=2.718, ppl=6.58, wps=5876.9, ups=0.09, wpb=64873, bsz=128, num_updates=8075, lr=9.99434e-05, gnorm=3.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=92394
2021-06-19 20:18:51 | INFO | train_inner | epoch 003: 2124 / 3002 loss=2.701, ppl=6.5, wps=5749.3, ups=0.09, wpb=64807, bsz=128, num_updates=8076, lr=9.99434e-05, gnorm=3.349, loss_scale=1, train_wall=11, gb_free=2.8, wall=92405
2021-06-19 20:19:02 | INFO | train_inner | epoch 003: 2125 / 3002 loss=2.525, ppl=5.76, wps=5736.1, ups=0.09, wpb=64856, bsz=128, num_updates=8077, lr=9.99434e-05, gnorm=2.171, loss_scale=1, train_wall=11, gb_free=2.8, wall=92416
2021-06-19 20:19:13 | INFO | train_inner | epoch 003: 2126 / 3002 loss=2.65, ppl=6.27, wps=5833.7, ups=0.09, wpb=64833, bsz=128, num_updates=8078, lr=9.99434e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=92427
2021-06-19 20:19:24 | INFO | train_inner | epoch 003: 2127 / 3002 loss=2.749, ppl=6.72, wps=5811.3, ups=0.09, wpb=64834, bsz=128, num_updates=8079, lr=9.99434e-05, gnorm=2.186, loss_scale=1, train_wall=11, gb_free=2.8, wall=92438
2021-06-19 20:19:35 | INFO | train_inner | epoch 003: 2128 / 3002 loss=2.602, ppl=6.07, wps=5839.2, ups=0.09, wpb=64887, bsz=128, num_updates=8080, lr=9.99434e-05, gnorm=2.207, loss_scale=1, train_wall=11, gb_free=2.8, wall=92450
2021-06-19 20:19:46 | INFO | train_inner | epoch 003: 2129 / 3002 loss=2.753, ppl=6.74, wps=5875.3, ups=0.09, wpb=64817, bsz=128, num_updates=8081, lr=9.99433e-05, gnorm=2.12, loss_scale=1, train_wall=11, gb_free=2.8, wall=92461
2021-06-19 20:19:57 | INFO | train_inner | epoch 003: 2130 / 3002 loss=2.655, ppl=6.3, wps=5939.7, ups=0.09, wpb=64846, bsz=128, num_updates=8082, lr=9.99433e-05, gnorm=2.153, loss_scale=1, train_wall=10, gb_free=2.8, wall=92472
2021-06-19 20:20:08 | INFO | train_inner | epoch 003: 2131 / 3002 loss=2.625, ppl=6.17, wps=5914.5, ups=0.09, wpb=64895, bsz=128, num_updates=8083, lr=9.99433e-05, gnorm=2.345, loss_scale=1, train_wall=10, gb_free=2.8, wall=92482
2021-06-19 20:20:19 | INFO | train_inner | epoch 003: 2132 / 3002 loss=2.736, ppl=6.66, wps=5737.3, ups=0.09, wpb=64761, bsz=128, num_updates=8084, lr=9.99433e-05, gnorm=2.297, loss_scale=1, train_wall=11, gb_free=2.8, wall=92494
2021-06-19 20:20:31 | INFO | train_inner | epoch 003: 2133 / 3002 loss=2.635, ppl=6.21, wps=5725.7, ups=0.09, wpb=64863, bsz=128, num_updates=8085, lr=9.99433e-05, gnorm=2.261, loss_scale=1, train_wall=11, gb_free=2.8, wall=92505
2021-06-19 20:20:42 | INFO | train_inner | epoch 003: 2134 / 3002 loss=2.564, ppl=5.91, wps=5912.8, ups=0.09, wpb=64849, bsz=128, num_updates=8086, lr=9.99433e-05, gnorm=2.139, loss_scale=1, train_wall=11, gb_free=2.8, wall=92516
2021-06-19 20:20:53 | INFO | train_inner | epoch 003: 2135 / 3002 loss=2.551, ppl=5.86, wps=5732.3, ups=0.09, wpb=64834, bsz=128, num_updates=8087, lr=9.99433e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=92527
2021-06-19 20:21:04 | INFO | train_inner | epoch 003: 2136 / 3002 loss=2.707, ppl=6.53, wps=5816.4, ups=0.09, wpb=64768, bsz=128, num_updates=8088, lr=9.99433e-05, gnorm=2.294, loss_scale=1, train_wall=11, gb_free=2.8, wall=92539
2021-06-19 20:21:15 | INFO | train_inner | epoch 003: 2137 / 3002 loss=2.653, ppl=6.29, wps=5791.3, ups=0.09, wpb=64766, bsz=128, num_updates=8089, lr=9.99433e-05, gnorm=2.534, loss_scale=1, train_wall=11, gb_free=2.8, wall=92550
2021-06-19 20:21:26 | INFO | train_inner | epoch 003: 2138 / 3002 loss=2.74, ppl=6.68, wps=5854.4, ups=0.09, wpb=64875, bsz=128, num_updates=8090, lr=9.99433e-05, gnorm=2.182, loss_scale=1, train_wall=11, gb_free=2.8, wall=92561
2021-06-19 20:21:38 | INFO | train_inner | epoch 003: 2139 / 3002 loss=2.617, ppl=6.14, wps=5730, ups=0.09, wpb=64822, bsz=128, num_updates=8091, lr=9.99433e-05, gnorm=2.097, loss_scale=1, train_wall=11, gb_free=2.8, wall=92572
2021-06-19 20:21:49 | INFO | train_inner | epoch 003: 2140 / 3002 loss=2.797, ppl=6.95, wps=5762.8, ups=0.09, wpb=64686, bsz=128, num_updates=8092, lr=9.99433e-05, gnorm=3.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=92583
2021-06-19 20:22:00 | INFO | train_inner | epoch 003: 2141 / 3002 loss=2.699, ppl=6.49, wps=5797.2, ups=0.09, wpb=64786, bsz=128, num_updates=8093, lr=9.99433e-05, gnorm=2.205, loss_scale=1, train_wall=11, gb_free=2.8, wall=92594
2021-06-19 20:22:11 | INFO | train_inner | epoch 003: 2142 / 3002 loss=2.482, ppl=5.59, wps=5865.2, ups=0.09, wpb=64882, bsz=128, num_updates=8094, lr=9.99432e-05, gnorm=2.205, loss_scale=1, train_wall=11, gb_free=2.8, wall=92606
2021-06-19 20:22:22 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-19 20:22:34 | INFO | train_inner | epoch 003: 2144 / 3002 loss=2.632, ppl=6.2, wps=2907.8, ups=0.04, wpb=64805, bsz=128, num_updates=8095, lr=9.99432e-05, gnorm=2.351, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=92628
2021-06-19 20:22:45 | INFO | train_inner | epoch 003: 2145 / 3002 loss=2.756, ppl=6.76, wps=5861.3, ups=0.09, wpb=64833, bsz=128, num_updates=8096, lr=9.99432e-05, gnorm=2.383, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92639
2021-06-19 20:22:56 | INFO | train_inner | epoch 003: 2146 / 3002 loss=2.623, ppl=6.16, wps=5774.3, ups=0.09, wpb=64865, bsz=128, num_updates=8097, lr=9.99432e-05, gnorm=2.054, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92650
2021-06-19 20:23:07 | INFO | train_inner | epoch 003: 2147 / 3002 loss=2.618, ppl=6.14, wps=5812.4, ups=0.09, wpb=64774, bsz=128, num_updates=8098, lr=9.99432e-05, gnorm=2.138, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92661
2021-06-19 20:23:18 | INFO | train_inner | epoch 003: 2148 / 3002 loss=2.541, ppl=5.82, wps=5863.9, ups=0.09, wpb=64884, bsz=128, num_updates=8099, lr=9.99432e-05, gnorm=2.045, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92672
2021-06-19 20:23:29 | INFO | train_inner | epoch 003: 2149 / 3002 loss=2.648, ppl=6.27, wps=5790.6, ups=0.09, wpb=64755, bsz=128, num_updates=8100, lr=9.99432e-05, gnorm=2.147, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92684
2021-06-19 20:23:40 | INFO | train_inner | epoch 003: 2150 / 3002 loss=2.598, ppl=6.05, wps=5896.3, ups=0.09, wpb=64811, bsz=128, num_updates=8101, lr=9.99432e-05, gnorm=2.11, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92695
2021-06-19 20:23:51 | INFO | train_inner | epoch 003: 2151 / 3002 loss=2.716, ppl=6.57, wps=5789.4, ups=0.09, wpb=64782, bsz=128, num_updates=8102, lr=9.99432e-05, gnorm=2.251, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92706
2021-06-19 20:24:03 | INFO | train_inner | epoch 003: 2152 / 3002 loss=2.845, ppl=7.18, wps=5805, ups=0.09, wpb=64747, bsz=128, num_updates=8103, lr=9.99432e-05, gnorm=2.116, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92717
2021-06-19 20:24:14 | INFO | train_inner | epoch 003: 2153 / 3002 loss=2.541, ppl=5.82, wps=5903.5, ups=0.09, wpb=64792, bsz=128, num_updates=8104, lr=9.99432e-05, gnorm=2.418, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92728
2021-06-19 20:24:25 | INFO | train_inner | epoch 003: 2154 / 3002 loss=2.476, ppl=5.56, wps=5895.6, ups=0.09, wpb=64980, bsz=128, num_updates=8105, lr=9.99432e-05, gnorm=2.143, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92739
2021-06-19 20:24:36 | INFO | train_inner | epoch 003: 2155 / 3002 loss=2.581, ppl=5.99, wps=5828.6, ups=0.09, wpb=64824, bsz=128, num_updates=8106, lr=9.99431e-05, gnorm=2.151, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92750
2021-06-19 20:24:47 | INFO | train_inner | epoch 003: 2156 / 3002 loss=2.647, ppl=6.26, wps=5742.1, ups=0.09, wpb=64784, bsz=128, num_updates=8107, lr=9.99431e-05, gnorm=2.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92761
2021-06-19 20:24:58 | INFO | train_inner | epoch 003: 2157 / 3002 loss=2.792, ppl=6.93, wps=5922.8, ups=0.09, wpb=64854, bsz=128, num_updates=8108, lr=9.99431e-05, gnorm=2.217, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92772
2021-06-19 20:25:09 | INFO | train_inner | epoch 003: 2158 / 3002 loss=2.52, ppl=5.74, wps=5838.8, ups=0.09, wpb=64765, bsz=128, num_updates=8109, lr=9.99431e-05, gnorm=2.087, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92783
2021-06-19 20:25:20 | INFO | train_inner | epoch 003: 2159 / 3002 loss=2.606, ppl=6.09, wps=5794.4, ups=0.09, wpb=64857, bsz=128, num_updates=8110, lr=9.99431e-05, gnorm=2.487, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92795
2021-06-19 20:25:31 | INFO | train_inner | epoch 003: 2160 / 3002 loss=2.636, ppl=6.22, wps=5828.9, ups=0.09, wpb=64898, bsz=128, num_updates=8111, lr=9.99431e-05, gnorm=2.113, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92806
2021-06-19 20:25:42 | INFO | train_inner | epoch 003: 2161 / 3002 loss=2.57, ppl=5.94, wps=5935.5, ups=0.09, wpb=64741, bsz=128, num_updates=8112, lr=9.99431e-05, gnorm=2.136, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92817
2021-06-19 20:25:53 | INFO | train_inner | epoch 003: 2162 / 3002 loss=2.551, ppl=5.86, wps=5886.2, ups=0.09, wpb=64890, bsz=128, num_updates=8113, lr=9.99431e-05, gnorm=2.144, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92828
2021-06-19 20:26:04 | INFO | train_inner | epoch 003: 2163 / 3002 loss=2.717, ppl=6.57, wps=5983.3, ups=0.09, wpb=64873, bsz=128, num_updates=8114, lr=9.99431e-05, gnorm=2.158, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92838
2021-06-19 20:26:15 | INFO | train_inner | epoch 003: 2164 / 3002 loss=2.498, ppl=5.65, wps=5883.9, ups=0.09, wpb=64768, bsz=128, num_updates=8115, lr=9.99431e-05, gnorm=2.24, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92849
2021-06-19 20:26:26 | INFO | train_inner | epoch 003: 2165 / 3002 loss=2.6, ppl=6.06, wps=5900.2, ups=0.09, wpb=64797, bsz=128, num_updates=8116, lr=9.99431e-05, gnorm=39.753, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92860
2021-06-19 20:26:37 | INFO | train_inner | epoch 003: 2166 / 3002 loss=2.655, ppl=6.3, wps=5906.7, ups=0.09, wpb=64727, bsz=128, num_updates=8117, lr=9.99431e-05, gnorm=2.665, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92871
2021-06-19 20:26:48 | INFO | train_inner | epoch 003: 2167 / 3002 loss=2.607, ppl=6.09, wps=5835.9, ups=0.09, wpb=64854, bsz=128, num_updates=8118, lr=9.99431e-05, gnorm=2.074, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92882
2021-06-19 20:26:59 | INFO | train_inner | epoch 003: 2168 / 3002 loss=2.543, ppl=5.83, wps=5967, ups=0.09, wpb=64819, bsz=128, num_updates=8119, lr=9.9943e-05, gnorm=2.19, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92893
2021-06-19 20:27:10 | INFO | train_inner | epoch 003: 2169 / 3002 loss=2.604, ppl=6.08, wps=5904.9, ups=0.09, wpb=64868, bsz=128, num_updates=8120, lr=9.9943e-05, gnorm=2.138, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92904
2021-06-19 20:27:21 | INFO | train_inner | epoch 003: 2170 / 3002 loss=2.668, ppl=6.36, wps=5856.3, ups=0.09, wpb=64830, bsz=128, num_updates=8121, lr=9.9943e-05, gnorm=2.151, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92915
2021-06-19 20:27:32 | INFO | train_inner | epoch 003: 2171 / 3002 loss=2.613, ppl=6.12, wps=5807.7, ups=0.09, wpb=64879, bsz=128, num_updates=8122, lr=9.9943e-05, gnorm=2.24, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92927
2021-06-19 20:27:43 | INFO | train_inner | epoch 003: 2172 / 3002 loss=2.717, ppl=6.58, wps=5809.5, ups=0.09, wpb=64849, bsz=128, num_updates=8123, lr=9.9943e-05, gnorm=2.197, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92938
2021-06-19 20:27:55 | INFO | train_inner | epoch 003: 2173 / 3002 loss=2.619, ppl=6.14, wps=5817.9, ups=0.09, wpb=64864, bsz=128, num_updates=8124, lr=9.9943e-05, gnorm=3.018, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92949
2021-06-19 20:28:06 | INFO | train_inner | epoch 003: 2174 / 3002 loss=2.683, ppl=6.42, wps=5727.7, ups=0.09, wpb=64847, bsz=128, num_updates=8125, lr=9.9943e-05, gnorm=2.329, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92960
2021-06-19 20:28:17 | INFO | train_inner | epoch 003: 2175 / 3002 loss=2.53, ppl=5.78, wps=5706.5, ups=0.09, wpb=64784, bsz=128, num_updates=8126, lr=9.9943e-05, gnorm=8.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92972
2021-06-19 20:28:28 | INFO | train_inner | epoch 003: 2176 / 3002 loss=2.621, ppl=6.15, wps=5928.3, ups=0.09, wpb=64840, bsz=128, num_updates=8127, lr=9.9943e-05, gnorm=8.159, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=92982
2021-06-19 20:28:39 | INFO | train_inner | epoch 003: 2177 / 3002 loss=2.647, ppl=6.26, wps=5853, ups=0.09, wpb=64874, bsz=128, num_updates=8128, lr=9.9943e-05, gnorm=2.158, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=92994
2021-06-19 20:28:50 | INFO | train_inner | epoch 003: 2178 / 3002 loss=2.685, ppl=6.43, wps=5886, ups=0.09, wpb=64789, bsz=128, num_updates=8129, lr=9.9943e-05, gnorm=2.2, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93005
2021-06-19 20:29:01 | INFO | train_inner | epoch 003: 2179 / 3002 loss=2.514, ppl=5.71, wps=5779.7, ups=0.09, wpb=64843, bsz=128, num_updates=8130, lr=9.9943e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93016
2021-06-19 20:29:13 | INFO | train_inner | epoch 003: 2180 / 3002 loss=2.674, ppl=6.38, wps=5815.2, ups=0.09, wpb=64851, bsz=128, num_updates=8131, lr=9.99429e-05, gnorm=2.119, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93027
2021-06-19 20:29:23 | INFO | train_inner | epoch 003: 2181 / 3002 loss=2.591, ppl=6.03, wps=5981.1, ups=0.09, wpb=64923, bsz=128, num_updates=8132, lr=9.99429e-05, gnorm=2.239, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93038
2021-06-19 20:29:35 | INFO | train_inner | epoch 003: 2182 / 3002 loss=2.602, ppl=6.07, wps=5767.6, ups=0.09, wpb=64824, bsz=128, num_updates=8133, lr=9.99429e-05, gnorm=2.167, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93049
2021-06-19 20:29:46 | INFO | train_inner | epoch 003: 2183 / 3002 loss=2.656, ppl=6.3, wps=5799.5, ups=0.09, wpb=64771, bsz=128, num_updates=8134, lr=9.99429e-05, gnorm=2.198, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93060
2021-06-19 20:29:57 | INFO | train_inner | epoch 003: 2184 / 3002 loss=2.839, ppl=7.15, wps=5838.5, ups=0.09, wpb=64860, bsz=128, num_updates=8135, lr=9.99429e-05, gnorm=2.096, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93071
2021-06-19 20:30:08 | INFO | train_inner | epoch 003: 2185 / 3002 loss=2.816, ppl=7.04, wps=5902.8, ups=0.09, wpb=64868, bsz=128, num_updates=8136, lr=9.99429e-05, gnorm=2.254, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93082
2021-06-19 20:30:19 | INFO | train_inner | epoch 003: 2186 / 3002 loss=2.697, ppl=6.48, wps=5807, ups=0.09, wpb=64861, bsz=128, num_updates=8137, lr=9.99429e-05, gnorm=2.203, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93093
2021-06-19 20:30:30 | INFO | train_inner | epoch 003: 2187 / 3002 loss=2.74, ppl=6.68, wps=5829.4, ups=0.09, wpb=64809, bsz=128, num_updates=8138, lr=9.99429e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93105
2021-06-19 20:30:41 | INFO | train_inner | epoch 003: 2188 / 3002 loss=2.605, ppl=6.09, wps=5877.8, ups=0.09, wpb=64849, bsz=128, num_updates=8139, lr=9.99429e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93116
2021-06-19 20:30:52 | INFO | train_inner | epoch 003: 2189 / 3002 loss=2.42, ppl=5.35, wps=5873.3, ups=0.09, wpb=64800, bsz=128, num_updates=8140, lr=9.99429e-05, gnorm=2.21, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93127
2021-06-19 20:31:03 | INFO | train_inner | epoch 003: 2190 / 3002 loss=2.586, ppl=6, wps=5875.4, ups=0.09, wpb=64836, bsz=128, num_updates=8141, lr=9.99429e-05, gnorm=2.123, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93138
2021-06-19 20:31:14 | INFO | train_inner | epoch 003: 2191 / 3002 loss=2.594, ppl=6.04, wps=5852, ups=0.09, wpb=64818, bsz=128, num_updates=8142, lr=9.99429e-05, gnorm=2.137, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93149
2021-06-19 20:31:25 | INFO | train_inner | epoch 003: 2192 / 3002 loss=2.537, ppl=5.8, wps=5878.7, ups=0.09, wpb=64837, bsz=128, num_updates=8143, lr=9.99429e-05, gnorm=2.325, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93160
2021-06-19 20:31:36 | INFO | train_inner | epoch 003: 2193 / 3002 loss=2.459, ppl=5.5, wps=5913.8, ups=0.09, wpb=64805, bsz=128, num_updates=8144, lr=9.99428e-05, gnorm=2.364, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93171
2021-06-19 20:31:48 | INFO | train_inner | epoch 003: 2194 / 3002 loss=2.656, ppl=6.3, wps=5817.6, ups=0.09, wpb=64832, bsz=128, num_updates=8145, lr=9.99428e-05, gnorm=2.265, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93182
2021-06-19 20:31:59 | INFO | train_inner | epoch 003: 2195 / 3002 loss=2.612, ppl=6.11, wps=5858.7, ups=0.09, wpb=64823, bsz=128, num_updates=8146, lr=9.99428e-05, gnorm=17.744, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93193
2021-06-19 20:32:10 | INFO | train_inner | epoch 003: 2196 / 3002 loss=2.579, ppl=5.98, wps=5760, ups=0.09, wpb=64876, bsz=128, num_updates=8147, lr=9.99428e-05, gnorm=2.449, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93204
2021-06-19 20:32:21 | INFO | train_inner | epoch 003: 2197 / 3002 loss=2.527, ppl=5.76, wps=5834.7, ups=0.09, wpb=64762, bsz=128, num_updates=8148, lr=9.99428e-05, gnorm=2.046, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93215
2021-06-19 20:32:32 | INFO | train_inner | epoch 003: 2198 / 3002 loss=2.654, ppl=6.29, wps=5854, ups=0.09, wpb=64797, bsz=128, num_updates=8149, lr=9.99428e-05, gnorm=2.071, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93226
2021-06-19 20:32:43 | INFO | train_inner | epoch 003: 2199 / 3002 loss=2.61, ppl=6.11, wps=5853.9, ups=0.09, wpb=64808, bsz=128, num_updates=8150, lr=9.99428e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93237
2021-06-19 20:32:54 | INFO | train_inner | epoch 003: 2200 / 3002 loss=2.654, ppl=6.29, wps=5877.3, ups=0.09, wpb=64798, bsz=128, num_updates=8151, lr=9.99428e-05, gnorm=2.067, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93249
2021-06-19 20:33:05 | INFO | train_inner | epoch 003: 2201 / 3002 loss=2.777, ppl=6.85, wps=5896.2, ups=0.09, wpb=64813, bsz=128, num_updates=8152, lr=9.99428e-05, gnorm=2.05, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93260
2021-06-19 20:33:16 | INFO | train_inner | epoch 003: 2202 / 3002 loss=2.682, ppl=6.42, wps=5850.6, ups=0.09, wpb=64819, bsz=128, num_updates=8153, lr=9.99428e-05, gnorm=2.156, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93271
2021-06-19 20:33:27 | INFO | train_inner | epoch 003: 2203 / 3002 loss=2.708, ppl=6.53, wps=5948.2, ups=0.09, wpb=64829, bsz=128, num_updates=8154, lr=9.99428e-05, gnorm=2.109, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93281
2021-06-19 20:33:38 | INFO | train_inner | epoch 003: 2204 / 3002 loss=2.647, ppl=6.26, wps=5871.1, ups=0.09, wpb=64804, bsz=128, num_updates=8155, lr=9.99428e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93293
2021-06-19 20:33:49 | INFO | train_inner | epoch 003: 2205 / 3002 loss=2.551, ppl=5.86, wps=5745.3, ups=0.09, wpb=64817, bsz=128, num_updates=8156, lr=9.99427e-05, gnorm=2.169, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93304
2021-06-19 20:34:01 | INFO | train_inner | epoch 003: 2206 / 3002 loss=2.665, ppl=6.34, wps=5847.1, ups=0.09, wpb=64875, bsz=128, num_updates=8157, lr=9.99427e-05, gnorm=2.084, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93315
2021-06-19 20:34:12 | INFO | train_inner | epoch 003: 2207 / 3002 loss=2.503, ppl=5.67, wps=5759.2, ups=0.09, wpb=64892, bsz=128, num_updates=8158, lr=9.99427e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93326
2021-06-19 20:34:23 | INFO | train_inner | epoch 003: 2208 / 3002 loss=2.648, ppl=6.27, wps=6052.9, ups=0.09, wpb=64975, bsz=128, num_updates=8159, lr=9.99427e-05, gnorm=2.262, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93337
2021-06-19 20:34:34 | INFO | train_inner | epoch 003: 2209 / 3002 loss=2.573, ppl=5.95, wps=5797.7, ups=0.09, wpb=64903, bsz=128, num_updates=8160, lr=9.99427e-05, gnorm=2.274, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93348
2021-06-19 20:34:45 | INFO | train_inner | epoch 003: 2210 / 3002 loss=2.549, ppl=5.85, wps=5863.8, ups=0.09, wpb=64785, bsz=128, num_updates=8161, lr=9.99427e-05, gnorm=4.683, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93359
2021-06-19 20:34:56 | INFO | train_inner | epoch 003: 2211 / 3002 loss=2.804, ppl=6.98, wps=5804.6, ups=0.09, wpb=64742, bsz=128, num_updates=8162, lr=9.99427e-05, gnorm=2.205, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93370
2021-06-19 20:35:07 | INFO | train_inner | epoch 003: 2212 / 3002 loss=2.695, ppl=6.47, wps=5894.5, ups=0.09, wpb=64898, bsz=128, num_updates=8163, lr=9.99427e-05, gnorm=2.231, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93381
2021-06-19 20:35:18 | INFO | train_inner | epoch 003: 2213 / 3002 loss=2.63, ppl=6.19, wps=5853.9, ups=0.09, wpb=64852, bsz=128, num_updates=8164, lr=9.99427e-05, gnorm=3.986, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93392
2021-06-19 20:35:29 | INFO | train_inner | epoch 003: 2214 / 3002 loss=2.775, ppl=6.85, wps=5875.8, ups=0.09, wpb=64863, bsz=128, num_updates=8165, lr=9.99427e-05, gnorm=2.133, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93403
2021-06-19 20:35:40 | INFO | train_inner | epoch 003: 2215 / 3002 loss=2.787, ppl=6.9, wps=5864.6, ups=0.09, wpb=64843, bsz=128, num_updates=8166, lr=9.99427e-05, gnorm=5.294, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93414
2021-06-19 20:35:51 | INFO | train_inner | epoch 003: 2216 / 3002 loss=2.719, ppl=6.59, wps=5979.9, ups=0.09, wpb=64937, bsz=128, num_updates=8167, lr=9.99427e-05, gnorm=2.177, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93425
2021-06-19 20:36:02 | INFO | train_inner | epoch 003: 2217 / 3002 loss=2.79, ppl=6.92, wps=5796.8, ups=0.09, wpb=64842, bsz=128, num_updates=8168, lr=9.99427e-05, gnorm=2.249, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93437
2021-06-19 20:36:13 | INFO | train_inner | epoch 003: 2218 / 3002 loss=2.844, ppl=7.18, wps=5861.1, ups=0.09, wpb=64763, bsz=128, num_updates=8169, lr=9.99426e-05, gnorm=2.251, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93448
2021-06-19 20:36:24 | INFO | train_inner | epoch 003: 2219 / 3002 loss=2.59, ppl=6.02, wps=5810.5, ups=0.09, wpb=64880, bsz=128, num_updates=8170, lr=9.99426e-05, gnorm=10.445, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93459
2021-06-19 20:36:35 | INFO | train_inner | epoch 003: 2220 / 3002 loss=2.563, ppl=5.91, wps=5935.7, ups=0.09, wpb=64862, bsz=128, num_updates=8171, lr=9.99426e-05, gnorm=2.156, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93470
2021-06-19 20:36:46 | INFO | train_inner | epoch 003: 2221 / 3002 loss=2.586, ppl=6, wps=5903.3, ups=0.09, wpb=64907, bsz=128, num_updates=8172, lr=9.99426e-05, gnorm=2.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93481
2021-06-19 20:36:57 | INFO | train_inner | epoch 003: 2222 / 3002 loss=2.668, ppl=6.36, wps=5836.4, ups=0.09, wpb=64820, bsz=128, num_updates=8173, lr=9.99426e-05, gnorm=2.239, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93492
2021-06-19 20:37:09 | INFO | train_inner | epoch 003: 2223 / 3002 loss=2.607, ppl=6.09, wps=5708.3, ups=0.09, wpb=64775, bsz=128, num_updates=8174, lr=9.99426e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93503
2021-06-19 20:37:20 | INFO | train_inner | epoch 003: 2224 / 3002 loss=2.594, ppl=6.04, wps=5818.9, ups=0.09, wpb=64867, bsz=128, num_updates=8175, lr=9.99426e-05, gnorm=2.332, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93514
2021-06-19 20:37:31 | INFO | train_inner | epoch 003: 2225 / 3002 loss=2.608, ppl=6.1, wps=5833.9, ups=0.09, wpb=64916, bsz=128, num_updates=8176, lr=9.99426e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93525
2021-06-19 20:37:42 | INFO | train_inner | epoch 003: 2226 / 3002 loss=2.692, ppl=6.46, wps=5729.6, ups=0.09, wpb=64829, bsz=128, num_updates=8177, lr=9.99426e-05, gnorm=2.435, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93537
2021-06-19 20:37:54 | INFO | train_inner | epoch 003: 2227 / 3002 loss=2.71, ppl=6.54, wps=5770.5, ups=0.09, wpb=64809, bsz=128, num_updates=8178, lr=9.99426e-05, gnorm=2.194, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93548
2021-06-19 20:38:05 | INFO | train_inner | epoch 003: 2228 / 3002 loss=2.588, ppl=6.01, wps=5849.1, ups=0.09, wpb=64830, bsz=128, num_updates=8179, lr=9.99426e-05, gnorm=2.504, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93559
2021-06-19 20:38:16 | INFO | train_inner | epoch 003: 2229 / 3002 loss=2.461, ppl=5.51, wps=5887.8, ups=0.09, wpb=64820, bsz=128, num_updates=8180, lr=9.99426e-05, gnorm=2.142, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93570
2021-06-19 20:38:27 | INFO | train_inner | epoch 003: 2230 / 3002 loss=2.712, ppl=6.55, wps=5836.1, ups=0.09, wpb=64473, bsz=128, num_updates=8181, lr=9.99425e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93581
2021-06-19 20:38:38 | INFO | train_inner | epoch 003: 2231 / 3002 loss=2.741, ppl=6.69, wps=5839, ups=0.09, wpb=64761, bsz=128, num_updates=8182, lr=9.99425e-05, gnorm=2.918, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93592
2021-06-19 20:38:49 | INFO | train_inner | epoch 003: 2232 / 3002 loss=2.676, ppl=6.39, wps=5826.4, ups=0.09, wpb=64792, bsz=128, num_updates=8183, lr=9.99425e-05, gnorm=2.324, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93603
2021-06-19 20:39:00 | INFO | train_inner | epoch 003: 2233 / 3002 loss=2.646, ppl=6.26, wps=5900.5, ups=0.09, wpb=64875, bsz=128, num_updates=8184, lr=9.99425e-05, gnorm=4.895, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93614
2021-06-19 20:39:11 | INFO | train_inner | epoch 003: 2234 / 3002 loss=2.723, ppl=6.6, wps=5706.2, ups=0.09, wpb=64821, bsz=128, num_updates=8185, lr=9.99425e-05, gnorm=2.249, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93626
2021-06-19 20:39:23 | INFO | train_inner | epoch 003: 2235 / 3002 loss=2.749, ppl=6.72, wps=5677.6, ups=0.09, wpb=64782, bsz=128, num_updates=8186, lr=9.99425e-05, gnorm=2.087, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93637
2021-06-19 20:39:34 | INFO | train_inner | epoch 003: 2236 / 3002 loss=2.654, ppl=6.29, wps=5928.2, ups=0.09, wpb=64812, bsz=128, num_updates=8187, lr=9.99425e-05, gnorm=1.99, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93648
2021-06-19 20:39:45 | INFO | train_inner | epoch 003: 2237 / 3002 loss=2.516, ppl=5.72, wps=5817.6, ups=0.09, wpb=64900, bsz=128, num_updates=8188, lr=9.99425e-05, gnorm=2.271, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93659
2021-06-19 20:39:56 | INFO | train_inner | epoch 003: 2238 / 3002 loss=2.551, ppl=5.86, wps=6052.9, ups=0.09, wpb=64904, bsz=128, num_updates=8189, lr=9.99425e-05, gnorm=2.173, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93670
2021-06-19 20:40:07 | INFO | train_inner | epoch 003: 2239 / 3002 loss=2.562, ppl=5.91, wps=5893.8, ups=0.09, wpb=64861, bsz=128, num_updates=8190, lr=9.99425e-05, gnorm=2.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93681
2021-06-19 20:40:18 | INFO | train_inner | epoch 003: 2240 / 3002 loss=2.637, ppl=6.22, wps=5768.1, ups=0.09, wpb=64785, bsz=128, num_updates=8191, lr=9.99425e-05, gnorm=2.281, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93692
2021-06-19 20:40:29 | INFO | train_inner | epoch 003: 2241 / 3002 loss=2.656, ppl=6.3, wps=5818.5, ups=0.09, wpb=64809, bsz=128, num_updates=8192, lr=9.99425e-05, gnorm=2.332, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93703
2021-06-19 20:40:40 | INFO | train_inner | epoch 003: 2242 / 3002 loss=2.656, ppl=6.3, wps=5790.6, ups=0.09, wpb=64877, bsz=128, num_updates=8193, lr=9.99425e-05, gnorm=2.211, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93714
2021-06-19 20:40:51 | INFO | train_inner | epoch 003: 2243 / 3002 loss=2.583, ppl=5.99, wps=5794.6, ups=0.09, wpb=64730, bsz=128, num_updates=8194, lr=9.99424e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93726
2021-06-19 20:41:02 | INFO | train_inner | epoch 003: 2244 / 3002 loss=2.759, ppl=6.77, wps=5855.2, ups=0.09, wpb=64858, bsz=128, num_updates=8195, lr=9.99424e-05, gnorm=2.215, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93737
2021-06-19 20:41:13 | INFO | train_inner | epoch 003: 2245 / 3002 loss=2.702, ppl=6.51, wps=5852.3, ups=0.09, wpb=64856, bsz=128, num_updates=8196, lr=9.99424e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93748
2021-06-19 20:41:25 | INFO | train_inner | epoch 003: 2246 / 3002 loss=2.707, ppl=6.53, wps=5742.5, ups=0.09, wpb=64814, bsz=128, num_updates=8197, lr=9.99424e-05, gnorm=2.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93759
2021-06-19 20:41:36 | INFO | train_inner | epoch 003: 2247 / 3002 loss=2.595, ppl=6.04, wps=5923.4, ups=0.09, wpb=64883, bsz=128, num_updates=8198, lr=9.99424e-05, gnorm=2.286, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93770
2021-06-19 20:41:47 | INFO | train_inner | epoch 003: 2248 / 3002 loss=2.64, ppl=6.23, wps=5976.2, ups=0.09, wpb=64862, bsz=128, num_updates=8199, lr=9.99424e-05, gnorm=2.171, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93781
2021-06-19 20:41:58 | INFO | train_inner | epoch 003: 2249 / 3002 loss=2.809, ppl=7.01, wps=5863.5, ups=0.09, wpb=64713, bsz=128, num_updates=8200, lr=9.99424e-05, gnorm=2.335, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93792
2021-06-19 20:42:09 | INFO | train_inner | epoch 003: 2250 / 3002 loss=2.581, ppl=5.99, wps=5914.1, ups=0.09, wpb=64752, bsz=128, num_updates=8201, lr=9.99424e-05, gnorm=2.264, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93803
2021-06-19 20:42:20 | INFO | train_inner | epoch 003: 2251 / 3002 loss=2.556, ppl=5.88, wps=5809.1, ups=0.09, wpb=64799, bsz=128, num_updates=8202, lr=9.99424e-05, gnorm=2.212, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93814
2021-06-19 20:42:31 | INFO | train_inner | epoch 003: 2252 / 3002 loss=2.647, ppl=6.26, wps=5918.6, ups=0.09, wpb=64805, bsz=128, num_updates=8203, lr=9.99424e-05, gnorm=2.146, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93825
2021-06-19 20:42:42 | INFO | train_inner | epoch 003: 2253 / 3002 loss=2.729, ppl=6.63, wps=5877.1, ups=0.09, wpb=64860, bsz=128, num_updates=8204, lr=9.99424e-05, gnorm=2.099, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93836
2021-06-19 20:42:53 | INFO | train_inner | epoch 003: 2254 / 3002 loss=2.696, ppl=6.48, wps=5718.6, ups=0.09, wpb=64765, bsz=128, num_updates=8205, lr=9.99424e-05, gnorm=2.185, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93847
2021-06-19 20:43:04 | INFO | train_inner | epoch 003: 2255 / 3002 loss=2.669, ppl=6.36, wps=5894.4, ups=0.09, wpb=64854, bsz=128, num_updates=8206, lr=9.99423e-05, gnorm=2.092, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93858
2021-06-19 20:43:15 | INFO | train_inner | epoch 003: 2256 / 3002 loss=2.621, ppl=6.15, wps=5948.4, ups=0.09, wpb=64862, bsz=128, num_updates=8207, lr=9.99423e-05, gnorm=2.329, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93869
2021-06-19 20:43:26 | INFO | train_inner | epoch 003: 2257 / 3002 loss=2.481, ppl=5.58, wps=5833.5, ups=0.09, wpb=64823, bsz=128, num_updates=8208, lr=9.99423e-05, gnorm=2.487, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93880
2021-06-19 20:43:37 | INFO | train_inner | epoch 003: 2258 / 3002 loss=2.709, ppl=6.54, wps=5871.8, ups=0.09, wpb=64882, bsz=128, num_updates=8209, lr=9.99423e-05, gnorm=6.786, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93891
2021-06-19 20:43:48 | INFO | train_inner | epoch 003: 2259 / 3002 loss=2.716, ppl=6.57, wps=5886, ups=0.09, wpb=64837, bsz=128, num_updates=8210, lr=9.99423e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93902
2021-06-19 20:43:59 | INFO | train_inner | epoch 003: 2260 / 3002 loss=2.69, ppl=6.45, wps=5791.9, ups=0.09, wpb=64765, bsz=128, num_updates=8211, lr=9.99423e-05, gnorm=2.11, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93914
2021-06-19 20:44:10 | INFO | train_inner | epoch 003: 2261 / 3002 loss=2.52, ppl=5.73, wps=5833.5, ups=0.09, wpb=64771, bsz=128, num_updates=8212, lr=9.99423e-05, gnorm=2.242, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93925
2021-06-19 20:44:22 | INFO | train_inner | epoch 003: 2262 / 3002 loss=2.689, ppl=6.45, wps=5834.7, ups=0.09, wpb=64937, bsz=128, num_updates=8213, lr=9.99423e-05, gnorm=2.246, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93936
2021-06-19 20:44:32 | INFO | train_inner | epoch 003: 2263 / 3002 loss=2.652, ppl=6.29, wps=5992.6, ups=0.09, wpb=64875, bsz=128, num_updates=8214, lr=9.99423e-05, gnorm=6.825, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93947
2021-06-19 20:44:43 | INFO | train_inner | epoch 003: 2264 / 3002 loss=2.666, ppl=6.34, wps=5826.2, ups=0.09, wpb=64919, bsz=128, num_updates=8215, lr=9.99423e-05, gnorm=2.279, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93958
2021-06-19 20:44:55 | INFO | train_inner | epoch 003: 2265 / 3002 loss=2.648, ppl=6.27, wps=5755.9, ups=0.09, wpb=64825, bsz=128, num_updates=8216, lr=9.99423e-05, gnorm=2.717, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93969
2021-06-19 20:45:06 | INFO | train_inner | epoch 003: 2266 / 3002 loss=2.733, ppl=6.65, wps=5824.7, ups=0.09, wpb=64841, bsz=128, num_updates=8217, lr=9.99423e-05, gnorm=2.854, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=93980
2021-06-19 20:45:17 | INFO | train_inner | epoch 003: 2267 / 3002 loss=2.655, ppl=6.3, wps=6011.6, ups=0.09, wpb=64880, bsz=128, num_updates=8218, lr=9.99423e-05, gnorm=2.357, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=93991
2021-06-19 20:45:28 | INFO | train_inner | epoch 003: 2268 / 3002 loss=2.693, ppl=6.46, wps=5878.8, ups=0.09, wpb=64822, bsz=128, num_updates=8219, lr=9.99422e-05, gnorm=2.5, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=94002
2021-06-19 20:45:39 | INFO | train_inner | epoch 003: 2269 / 3002 loss=2.911, ppl=7.52, wps=5769.8, ups=0.09, wpb=64856, bsz=128, num_updates=8220, lr=9.99422e-05, gnorm=4.351, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=94013
2021-06-19 20:45:50 | INFO | train_inner | epoch 003: 2270 / 3002 loss=2.657, ppl=6.31, wps=5840.3, ups=0.09, wpb=64775, bsz=128, num_updates=8221, lr=9.99422e-05, gnorm=2.23, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=94024
2021-06-19 20:46:01 | INFO | train_inner | epoch 003: 2271 / 3002 loss=2.702, ppl=6.51, wps=5805.1, ups=0.09, wpb=64781, bsz=128, num_updates=8222, lr=9.99422e-05, gnorm=2.218, loss_scale=1, train_wall=11, gb_free=2.8, wall=94036
2021-06-19 20:46:12 | INFO | train_inner | epoch 003: 2272 / 3002 loss=2.536, ppl=5.8, wps=5737, ups=0.09, wpb=64824, bsz=128, num_updates=8223, lr=9.99422e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=94047
2021-06-19 20:46:24 | INFO | train_inner | epoch 003: 2273 / 3002 loss=2.634, ppl=6.21, wps=5828.6, ups=0.09, wpb=64806, bsz=128, num_updates=8224, lr=9.99422e-05, gnorm=2.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=94058
2021-06-19 20:46:35 | INFO | train_inner | epoch 003: 2274 / 3002 loss=2.681, ppl=6.41, wps=5842.9, ups=0.09, wpb=64790, bsz=128, num_updates=8225, lr=9.99422e-05, gnorm=3.763, loss_scale=1, train_wall=11, gb_free=2.8, wall=94069
2021-06-19 20:46:46 | INFO | train_inner | epoch 003: 2275 / 3002 loss=2.62, ppl=6.15, wps=5861.8, ups=0.09, wpb=64824, bsz=128, num_updates=8226, lr=9.99422e-05, gnorm=2.484, loss_scale=1, train_wall=11, gb_free=2.8, wall=94080
2021-06-19 20:46:57 | INFO | train_inner | epoch 003: 2276 / 3002 loss=2.533, ppl=5.79, wps=5778, ups=0.09, wpb=64934, bsz=128, num_updates=8227, lr=9.99422e-05, gnorm=2.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=94091
2021-06-19 20:47:08 | INFO | train_inner | epoch 003: 2277 / 3002 loss=2.473, ppl=5.55, wps=5811.5, ups=0.09, wpb=64806, bsz=128, num_updates=8228, lr=9.99422e-05, gnorm=2.427, loss_scale=1, train_wall=11, gb_free=2.8, wall=94102
2021-06-19 20:47:19 | INFO | train_inner | epoch 003: 2278 / 3002 loss=2.704, ppl=6.52, wps=5909.5, ups=0.09, wpb=64875, bsz=128, num_updates=8229, lr=9.99422e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=94113
2021-06-19 20:47:30 | INFO | train_inner | epoch 003: 2279 / 3002 loss=2.647, ppl=6.26, wps=5717.1, ups=0.09, wpb=64855, bsz=128, num_updates=8230, lr=9.99422e-05, gnorm=2.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=94125
2021-06-19 20:47:42 | INFO | train_inner | epoch 003: 2280 / 3002 loss=2.662, ppl=6.33, wps=5865.9, ups=0.09, wpb=64828, bsz=128, num_updates=8231, lr=9.99421e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=94136
2021-06-19 20:47:53 | INFO | train_inner | epoch 003: 2281 / 3002 loss=2.554, ppl=5.87, wps=5767.3, ups=0.09, wpb=64931, bsz=128, num_updates=8232, lr=9.99421e-05, gnorm=6.642, loss_scale=1, train_wall=11, gb_free=2.8, wall=94147
2021-06-19 20:48:04 | INFO | train_inner | epoch 003: 2282 / 3002 loss=2.682, ppl=6.42, wps=5891.1, ups=0.09, wpb=64879, bsz=128, num_updates=8233, lr=9.99421e-05, gnorm=2.136, loss_scale=1, train_wall=11, gb_free=2.8, wall=94158
2021-06-19 20:48:15 | INFO | train_inner | epoch 003: 2283 / 3002 loss=2.45, ppl=5.47, wps=5908.5, ups=0.09, wpb=64879, bsz=128, num_updates=8234, lr=9.99421e-05, gnorm=2.088, loss_scale=1, train_wall=11, gb_free=2.8, wall=94169
2021-06-19 20:48:26 | INFO | train_inner | epoch 003: 2284 / 3002 loss=2.672, ppl=6.38, wps=5782.3, ups=0.09, wpb=64816, bsz=128, num_updates=8235, lr=9.99421e-05, gnorm=2.542, loss_scale=1, train_wall=11, gb_free=2.8, wall=94180
2021-06-19 20:48:37 | INFO | train_inner | epoch 003: 2285 / 3002 loss=2.716, ppl=6.57, wps=5976.3, ups=0.09, wpb=64761, bsz=128, num_updates=8236, lr=9.99421e-05, gnorm=2.13, loss_scale=1, train_wall=10, gb_free=2.8, wall=94191
2021-06-19 20:48:48 | INFO | train_inner | epoch 003: 2286 / 3002 loss=2.591, ppl=6.03, wps=5905.4, ups=0.09, wpb=64898, bsz=128, num_updates=8237, lr=9.99421e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=94202
2021-06-19 20:48:59 | INFO | train_inner | epoch 003: 2287 / 3002 loss=2.61, ppl=6.11, wps=5797, ups=0.09, wpb=64902, bsz=128, num_updates=8238, lr=9.99421e-05, gnorm=2.788, loss_scale=1, train_wall=11, gb_free=2.8, wall=94213
2021-06-19 20:49:10 | INFO | train_inner | epoch 003: 2288 / 3002 loss=2.575, ppl=5.96, wps=5849.2, ups=0.09, wpb=64748, bsz=128, num_updates=8239, lr=9.99421e-05, gnorm=2.264, loss_scale=1, train_wall=11, gb_free=2.8, wall=94224
2021-06-19 20:49:21 | INFO | train_inner | epoch 003: 2289 / 3002 loss=2.711, ppl=6.55, wps=5844.1, ups=0.09, wpb=64838, bsz=128, num_updates=8240, lr=9.99421e-05, gnorm=2.622, loss_scale=1, train_wall=11, gb_free=2.8, wall=94235
2021-06-19 20:49:32 | INFO | train_inner | epoch 003: 2290 / 3002 loss=2.759, ppl=6.77, wps=5853.2, ups=0.09, wpb=64859, bsz=128, num_updates=8241, lr=9.99421e-05, gnorm=2.239, loss_scale=1, train_wall=11, gb_free=2.8, wall=94247
2021-06-19 20:49:43 | INFO | train_inner | epoch 003: 2291 / 3002 loss=2.667, ppl=6.35, wps=5812.4, ups=0.09, wpb=64810, bsz=128, num_updates=8242, lr=9.99421e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=94258
2021-06-19 20:49:55 | INFO | train_inner | epoch 003: 2292 / 3002 loss=2.61, ppl=6.1, wps=5811.8, ups=0.09, wpb=64856, bsz=128, num_updates=8243, lr=9.99421e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=94269
2021-06-19 20:50:06 | INFO | train_inner | epoch 003: 2293 / 3002 loss=2.569, ppl=5.94, wps=5769.5, ups=0.09, wpb=64910, bsz=128, num_updates=8244, lr=9.9942e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=94280
2021-06-19 20:50:17 | INFO | train_inner | epoch 003: 2294 / 3002 loss=2.567, ppl=5.93, wps=5765, ups=0.09, wpb=64899, bsz=128, num_updates=8245, lr=9.9942e-05, gnorm=2.053, loss_scale=1, train_wall=11, gb_free=2.8, wall=94291
2021-06-19 20:50:28 | INFO | train_inner | epoch 003: 2295 / 3002 loss=2.682, ppl=6.42, wps=5862.2, ups=0.09, wpb=64844, bsz=128, num_updates=8246, lr=9.9942e-05, gnorm=2.111, loss_scale=1, train_wall=11, gb_free=2.8, wall=94302
2021-06-19 20:50:39 | INFO | train_inner | epoch 003: 2296 / 3002 loss=2.641, ppl=6.24, wps=5774.9, ups=0.09, wpb=64848, bsz=128, num_updates=8247, lr=9.9942e-05, gnorm=2.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=94314
2021-06-19 20:50:51 | INFO | train_inner | epoch 003: 2297 / 3002 loss=2.627, ppl=6.18, wps=5801.7, ups=0.09, wpb=64775, bsz=128, num_updates=8248, lr=9.9942e-05, gnorm=2.217, loss_scale=1, train_wall=11, gb_free=2.8, wall=94325
2021-06-19 20:51:02 | INFO | train_inner | epoch 003: 2298 / 3002 loss=2.542, ppl=5.82, wps=5892.5, ups=0.09, wpb=64838, bsz=128, num_updates=8249, lr=9.9942e-05, gnorm=2.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=94336
2021-06-19 20:51:12 | INFO | train_inner | epoch 003: 2299 / 3002 loss=2.62, ppl=6.15, wps=5947.7, ups=0.09, wpb=64894, bsz=128, num_updates=8250, lr=9.9942e-05, gnorm=2.116, loss_scale=1, train_wall=10, gb_free=2.8, wall=94347
2021-06-19 20:51:23 | INFO | train_inner | epoch 003: 2300 / 3002 loss=2.596, ppl=6.04, wps=5915.8, ups=0.09, wpb=64854, bsz=128, num_updates=8251, lr=9.9942e-05, gnorm=2.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=94358
2021-06-19 20:51:35 | INFO | train_inner | epoch 003: 2301 / 3002 loss=2.552, ppl=5.86, wps=5721.6, ups=0.09, wpb=64755, bsz=128, num_updates=8252, lr=9.9942e-05, gnorm=2.746, loss_scale=1, train_wall=11, gb_free=2.8, wall=94369
2021-06-19 20:51:46 | INFO | train_inner | epoch 003: 2302 / 3002 loss=2.524, ppl=5.75, wps=5908.5, ups=0.09, wpb=64732, bsz=128, num_updates=8253, lr=9.9942e-05, gnorm=2.17, loss_scale=1, train_wall=11, gb_free=2.8, wall=94380
2021-06-19 20:51:56 | INFO | train_inner | epoch 003: 2303 / 3002 loss=2.54, ppl=5.81, wps=6009.3, ups=0.09, wpb=64872, bsz=128, num_updates=8254, lr=9.9942e-05, gnorm=2.216, loss_scale=1, train_wall=10, gb_free=2.8, wall=94391
2021-06-19 20:52:08 | INFO | train_inner | epoch 003: 2304 / 3002 loss=2.533, ppl=5.79, wps=5822.5, ups=0.09, wpb=64898, bsz=128, num_updates=8255, lr=9.9942e-05, gnorm=2.159, loss_scale=1, train_wall=11, gb_free=2.8, wall=94402
2021-06-19 20:52:19 | INFO | train_inner | epoch 003: 2305 / 3002 loss=2.633, ppl=6.2, wps=5806.8, ups=0.09, wpb=64764, bsz=128, num_updates=8256, lr=9.99419e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=94413
2021-06-19 20:52:30 | INFO | train_inner | epoch 003: 2306 / 3002 loss=2.53, ppl=5.78, wps=5819.9, ups=0.09, wpb=64883, bsz=128, num_updates=8257, lr=9.99419e-05, gnorm=2.116, loss_scale=1, train_wall=11, gb_free=2.8, wall=94424
2021-06-19 20:52:41 | INFO | train_inner | epoch 003: 2307 / 3002 loss=2.686, ppl=6.44, wps=5911.2, ups=0.09, wpb=64848, bsz=128, num_updates=8258, lr=9.99419e-05, gnorm=2.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=94435
2021-06-19 20:52:52 | INFO | train_inner | epoch 003: 2308 / 3002 loss=2.582, ppl=5.99, wps=5782.8, ups=0.09, wpb=64820, bsz=128, num_updates=8259, lr=9.99419e-05, gnorm=2.167, loss_scale=1, train_wall=11, gb_free=2.8, wall=94446
2021-06-19 20:53:03 | INFO | train_inner | epoch 003: 2309 / 3002 loss=2.646, ppl=6.26, wps=5781.9, ups=0.09, wpb=64823, bsz=128, num_updates=8260, lr=9.99419e-05, gnorm=2.15, loss_scale=1, train_wall=11, gb_free=2.8, wall=94458
2021-06-19 20:53:14 | INFO | train_inner | epoch 003: 2310 / 3002 loss=2.632, ppl=6.2, wps=5926.5, ups=0.09, wpb=64807, bsz=128, num_updates=8261, lr=9.99419e-05, gnorm=2.608, loss_scale=1, train_wall=10, gb_free=2.8, wall=94469
2021-06-19 20:53:25 | INFO | train_inner | epoch 003: 2311 / 3002 loss=2.562, ppl=5.91, wps=5859.9, ups=0.09, wpb=64715, bsz=128, num_updates=8262, lr=9.99419e-05, gnorm=2.014, loss_scale=1, train_wall=11, gb_free=2.8, wall=94480
2021-06-19 20:53:36 | INFO | train_inner | epoch 003: 2312 / 3002 loss=2.653, ppl=6.29, wps=5808.4, ups=0.09, wpb=64888, bsz=128, num_updates=8263, lr=9.99419e-05, gnorm=2.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=94491
2021-06-19 20:53:47 | INFO | train_inner | epoch 003: 2313 / 3002 loss=2.746, ppl=6.71, wps=5887.2, ups=0.09, wpb=64841, bsz=128, num_updates=8264, lr=9.99419e-05, gnorm=2.076, loss_scale=1, train_wall=11, gb_free=2.8, wall=94502
2021-06-19 20:53:59 | INFO | train_inner | epoch 003: 2314 / 3002 loss=2.491, ppl=5.62, wps=5735.1, ups=0.09, wpb=64744, bsz=128, num_updates=8265, lr=9.99419e-05, gnorm=2.091, loss_scale=1, train_wall=11, gb_free=2.8, wall=94513
2021-06-19 20:54:10 | INFO | train_inner | epoch 003: 2315 / 3002 loss=2.424, ppl=5.37, wps=5916.3, ups=0.09, wpb=64789, bsz=128, num_updates=8266, lr=9.99419e-05, gnorm=2.073, loss_scale=1, train_wall=10, gb_free=2.8, wall=94524
2021-06-19 20:54:21 | INFO | train_inner | epoch 003: 2316 / 3002 loss=2.572, ppl=5.95, wps=5864.5, ups=0.09, wpb=64842, bsz=128, num_updates=8267, lr=9.99419e-05, gnorm=3.552, loss_scale=1, train_wall=11, gb_free=2.8, wall=94535
2021-06-19 20:54:32 | INFO | train_inner | epoch 003: 2317 / 3002 loss=2.535, ppl=5.8, wps=5839.4, ups=0.09, wpb=64865, bsz=128, num_updates=8268, lr=9.99419e-05, gnorm=2.169, loss_scale=1, train_wall=11, gb_free=2.8, wall=94546
2021-06-19 20:54:43 | INFO | train_inner | epoch 003: 2318 / 3002 loss=2.643, ppl=6.25, wps=5721.1, ups=0.09, wpb=64739, bsz=128, num_updates=8269, lr=9.99418e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=94558
2021-06-19 20:54:54 | INFO | train_inner | epoch 003: 2319 / 3002 loss=2.829, ppl=7.11, wps=5787.7, ups=0.09, wpb=64889, bsz=128, num_updates=8270, lr=9.99418e-05, gnorm=2.218, loss_scale=1, train_wall=11, gb_free=2.8, wall=94569
2021-06-19 20:55:06 | INFO | train_inner | epoch 003: 2320 / 3002 loss=2.693, ppl=6.47, wps=5839.5, ups=0.09, wpb=64813, bsz=128, num_updates=8271, lr=9.99418e-05, gnorm=2.191, loss_scale=1, train_wall=11, gb_free=2.8, wall=94580
2021-06-19 20:55:16 | INFO | train_inner | epoch 003: 2321 / 3002 loss=2.603, ppl=6.07, wps=5937.5, ups=0.09, wpb=64811, bsz=128, num_updates=8272, lr=9.99418e-05, gnorm=2.158, loss_scale=1, train_wall=10, gb_free=2.8, wall=94591
2021-06-19 20:55:27 | INFO | train_inner | epoch 003: 2322 / 3002 loss=2.666, ppl=6.35, wps=5981.6, ups=0.09, wpb=64842, bsz=128, num_updates=8273, lr=9.99418e-05, gnorm=2.156, loss_scale=1, train_wall=10, gb_free=2.8, wall=94602
2021-06-19 20:55:38 | INFO | train_inner | epoch 003: 2323 / 3002 loss=2.352, ppl=5.1, wps=5834.6, ups=0.09, wpb=64889, bsz=128, num_updates=8274, lr=9.99418e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=94613
2021-06-19 20:55:50 | INFO | train_inner | epoch 003: 2324 / 3002 loss=2.657, ppl=6.31, wps=5788.7, ups=0.09, wpb=64688, bsz=128, num_updates=8275, lr=9.99418e-05, gnorm=2.177, loss_scale=1, train_wall=11, gb_free=2.8, wall=94624
2021-06-19 20:56:00 | INFO | train_inner | epoch 003: 2325 / 3002 loss=2.572, ppl=5.95, wps=5941.8, ups=0.09, wpb=64802, bsz=128, num_updates=8276, lr=9.99418e-05, gnorm=2.331, loss_scale=1, train_wall=10, gb_free=2.8, wall=94635
2021-06-19 20:56:12 | INFO | train_inner | epoch 003: 2326 / 3002 loss=2.683, ppl=6.42, wps=5812.3, ups=0.09, wpb=64820, bsz=128, num_updates=8277, lr=9.99418e-05, gnorm=2.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=94646
2021-06-19 20:56:23 | INFO | train_inner | epoch 003: 2327 / 3002 loss=2.449, ppl=5.46, wps=5856.8, ups=0.09, wpb=64897, bsz=128, num_updates=8278, lr=9.99418e-05, gnorm=2.019, loss_scale=1, train_wall=11, gb_free=2.8, wall=94657
2021-06-19 20:56:34 | INFO | train_inner | epoch 003: 2328 / 3002 loss=2.576, ppl=5.96, wps=5819.5, ups=0.09, wpb=64829, bsz=128, num_updates=8279, lr=9.99418e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=94668
2021-06-19 20:56:45 | INFO | train_inner | epoch 003: 2329 / 3002 loss=2.757, ppl=6.76, wps=5845.7, ups=0.09, wpb=64765, bsz=128, num_updates=8280, lr=9.99418e-05, gnorm=2.048, loss_scale=1, train_wall=11, gb_free=2.8, wall=94679
2021-06-19 20:56:56 | INFO | train_inner | epoch 003: 2330 / 3002 loss=2.62, ppl=6.15, wps=5888, ups=0.09, wpb=64886, bsz=128, num_updates=8281, lr=9.99417e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=94690
2021-06-19 20:57:07 | INFO | train_inner | epoch 003: 2331 / 3002 loss=2.667, ppl=6.35, wps=5985.6, ups=0.09, wpb=64758, bsz=128, num_updates=8282, lr=9.99417e-05, gnorm=2.236, loss_scale=1, train_wall=10, gb_free=2.8, wall=94701
2021-06-19 20:57:18 | INFO | train_inner | epoch 003: 2332 / 3002 loss=2.604, ppl=6.08, wps=5892.7, ups=0.09, wpb=64840, bsz=128, num_updates=8283, lr=9.99417e-05, gnorm=2.077, loss_scale=1, train_wall=11, gb_free=2.8, wall=94712
2021-06-19 20:57:29 | INFO | train_inner | epoch 003: 2333 / 3002 loss=2.682, ppl=6.42, wps=5804.6, ups=0.09, wpb=64826, bsz=128, num_updates=8284, lr=9.99417e-05, gnorm=2.14, loss_scale=1, train_wall=11, gb_free=2.8, wall=94723
2021-06-19 20:57:40 | INFO | train_inner | epoch 003: 2334 / 3002 loss=2.569, ppl=5.93, wps=5791.7, ups=0.09, wpb=64776, bsz=128, num_updates=8285, lr=9.99417e-05, gnorm=2.294, loss_scale=1, train_wall=11, gb_free=2.8, wall=94734
2021-06-19 20:57:51 | INFO | train_inner | epoch 003: 2335 / 3002 loss=2.62, ppl=6.15, wps=5878.1, ups=0.09, wpb=64860, bsz=128, num_updates=8286, lr=9.99417e-05, gnorm=2.132, loss_scale=1, train_wall=11, gb_free=2.8, wall=94745
2021-06-19 20:58:02 | INFO | train_inner | epoch 003: 2336 / 3002 loss=2.713, ppl=6.56, wps=5875.4, ups=0.09, wpb=64830, bsz=128, num_updates=8287, lr=9.99417e-05, gnorm=2.424, loss_scale=1, train_wall=11, gb_free=2.8, wall=94757
2021-06-19 20:58:13 | INFO | train_inner | epoch 003: 2337 / 3002 loss=2.707, ppl=6.53, wps=5783.8, ups=0.09, wpb=64850, bsz=128, num_updates=8288, lr=9.99417e-05, gnorm=2.101, loss_scale=1, train_wall=11, gb_free=2.8, wall=94768
2021-06-19 20:58:25 | INFO | train_inner | epoch 003: 2338 / 3002 loss=2.723, ppl=6.6, wps=5815.3, ups=0.09, wpb=64856, bsz=128, num_updates=8289, lr=9.99417e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=94779
2021-06-19 20:58:36 | INFO | train_inner | epoch 003: 2339 / 3002 loss=2.604, ppl=6.08, wps=5857.5, ups=0.09, wpb=64856, bsz=128, num_updates=8290, lr=9.99417e-05, gnorm=2.201, loss_scale=1, train_wall=11, gb_free=2.8, wall=94790
2021-06-19 20:58:47 | INFO | train_inner | epoch 003: 2340 / 3002 loss=2.45, ppl=5.47, wps=5793, ups=0.09, wpb=64813, bsz=128, num_updates=8291, lr=9.99417e-05, gnorm=2.621, loss_scale=1, train_wall=11, gb_free=2.8, wall=94801
2021-06-19 20:58:58 | INFO | train_inner | epoch 003: 2341 / 3002 loss=2.644, ppl=6.25, wps=5839.4, ups=0.09, wpb=64902, bsz=128, num_updates=8292, lr=9.99417e-05, gnorm=2.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=94812
2021-06-19 20:59:09 | INFO | train_inner | epoch 003: 2342 / 3002 loss=2.692, ppl=6.46, wps=5865.4, ups=0.09, wpb=64762, bsz=128, num_updates=8293, lr=9.99417e-05, gnorm=2.886, loss_scale=1, train_wall=11, gb_free=2.8, wall=94823
2021-06-19 20:59:20 | INFO | train_inner | epoch 003: 2343 / 3002 loss=2.455, ppl=5.48, wps=5907.1, ups=0.09, wpb=64886, bsz=128, num_updates=8294, lr=9.99416e-05, gnorm=2.183, loss_scale=1, train_wall=11, gb_free=2.8, wall=94834
2021-06-19 20:59:31 | INFO | train_inner | epoch 003: 2344 / 3002 loss=2.53, ppl=5.78, wps=5855.3, ups=0.09, wpb=64774, bsz=128, num_updates=8295, lr=9.99416e-05, gnorm=2.112, loss_scale=1, train_wall=11, gb_free=2.8, wall=94845
2021-06-19 20:59:42 | INFO | train_inner | epoch 003: 2345 / 3002 loss=2.666, ppl=6.35, wps=5888.7, ups=0.09, wpb=64846, bsz=128, num_updates=8296, lr=9.99416e-05, gnorm=2.477, loss_scale=1, train_wall=11, gb_free=2.8, wall=94856
2021-06-19 20:59:53 | INFO | train_inner | epoch 003: 2346 / 3002 loss=2.502, ppl=5.66, wps=5807.1, ups=0.09, wpb=64771, bsz=128, num_updates=8297, lr=9.99416e-05, gnorm=2.219, loss_scale=1, train_wall=11, gb_free=2.8, wall=94868
2021-06-19 21:00:04 | INFO | train_inner | epoch 003: 2347 / 3002 loss=2.777, ppl=6.86, wps=5839.1, ups=0.09, wpb=64830, bsz=128, num_updates=8298, lr=9.99416e-05, gnorm=2.374, loss_scale=1, train_wall=11, gb_free=2.8, wall=94879
2021-06-19 21:00:15 | INFO | train_inner | epoch 003: 2348 / 3002 loss=2.58, ppl=5.98, wps=5803.9, ups=0.09, wpb=64800, bsz=128, num_updates=8299, lr=9.99416e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=94890
2021-06-19 21:00:27 | INFO | train_inner | epoch 003: 2349 / 3002 loss=2.55, ppl=5.86, wps=5855.6, ups=0.09, wpb=64861, bsz=128, num_updates=8300, lr=9.99416e-05, gnorm=2.146, loss_scale=1, train_wall=11, gb_free=2.8, wall=94901
2021-06-19 21:00:38 | INFO | train_inner | epoch 003: 2350 / 3002 loss=2.548, ppl=5.85, wps=5801.9, ups=0.09, wpb=64765, bsz=128, num_updates=8301, lr=9.99416e-05, gnorm=4.819, loss_scale=1, train_wall=11, gb_free=2.8, wall=94912
2021-06-19 21:00:49 | INFO | train_inner | epoch 003: 2351 / 3002 loss=2.709, ppl=6.54, wps=5925.6, ups=0.09, wpb=64821, bsz=128, num_updates=8302, lr=9.99416e-05, gnorm=2.142, loss_scale=1, train_wall=10, gb_free=2.8, wall=94923
2021-06-19 21:01:00 | INFO | train_inner | epoch 003: 2352 / 3002 loss=2.512, ppl=5.71, wps=5939, ups=0.09, wpb=64880, bsz=128, num_updates=8303, lr=9.99416e-05, gnorm=2.167, loss_scale=1, train_wall=10, gb_free=2.8, wall=94934
2021-06-19 21:01:11 | INFO | train_inner | epoch 003: 2353 / 3002 loss=2.691, ppl=6.46, wps=5756, ups=0.09, wpb=64824, bsz=128, num_updates=8304, lr=9.99416e-05, gnorm=2.108, loss_scale=1, train_wall=11, gb_free=2.8, wall=94945
2021-06-19 21:01:22 | INFO | train_inner | epoch 003: 2354 / 3002 loss=2.725, ppl=6.61, wps=5962.8, ups=0.09, wpb=64775, bsz=128, num_updates=8305, lr=9.99416e-05, gnorm=3.869, loss_scale=1, train_wall=10, gb_free=2.8, wall=94956
2021-06-19 21:01:33 | INFO | train_inner | epoch 003: 2355 / 3002 loss=2.709, ppl=6.54, wps=5749.3, ups=0.09, wpb=64809, bsz=128, num_updates=8306, lr=9.99415e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=94967
2021-06-19 21:01:44 | INFO | train_inner | epoch 003: 2356 / 3002 loss=2.589, ppl=6.02, wps=5927, ups=0.09, wpb=64772, bsz=128, num_updates=8307, lr=9.99415e-05, gnorm=2.114, loss_scale=1, train_wall=10, gb_free=2.8, wall=94978
2021-06-19 21:01:55 | INFO | train_inner | epoch 003: 2357 / 3002 loss=2.482, ppl=5.58, wps=5770.4, ups=0.09, wpb=64883, bsz=128, num_updates=8308, lr=9.99415e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=94989
2021-06-19 21:02:06 | INFO | train_inner | epoch 003: 2358 / 3002 loss=2.843, ppl=7.18, wps=5848.7, ups=0.09, wpb=64829, bsz=128, num_updates=8309, lr=9.99415e-05, gnorm=2.139, loss_scale=1, train_wall=11, gb_free=2.8, wall=95001
2021-06-19 21:02:17 | INFO | train_inner | epoch 003: 2359 / 3002 loss=2.646, ppl=6.26, wps=5790.3, ups=0.09, wpb=64803, bsz=128, num_updates=8310, lr=9.99415e-05, gnorm=2.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=95012
2021-06-19 21:02:28 | INFO | train_inner | epoch 003: 2360 / 3002 loss=2.731, ppl=6.64, wps=5840.8, ups=0.09, wpb=64746, bsz=128, num_updates=8311, lr=9.99415e-05, gnorm=2.049, loss_scale=1, train_wall=11, gb_free=2.8, wall=95023
2021-06-19 21:02:40 | INFO | train_inner | epoch 003: 2361 / 3002 loss=2.51, ppl=5.7, wps=5811.8, ups=0.09, wpb=64898, bsz=128, num_updates=8312, lr=9.99415e-05, gnorm=2.131, loss_scale=1, train_wall=11, gb_free=2.8, wall=95034
2021-06-19 21:02:51 | INFO | train_inner | epoch 003: 2362 / 3002 loss=2.594, ppl=6.04, wps=5859.7, ups=0.09, wpb=64860, bsz=128, num_updates=8313, lr=9.99415e-05, gnorm=2.064, loss_scale=1, train_wall=11, gb_free=2.8, wall=95045
2021-06-19 21:03:02 | INFO | train_inner | epoch 003: 2363 / 3002 loss=2.647, ppl=6.26, wps=5910.8, ups=0.09, wpb=64864, bsz=128, num_updates=8314, lr=9.99415e-05, gnorm=2.327, loss_scale=1, train_wall=11, gb_free=2.8, wall=95056
2021-06-19 21:03:13 | INFO | train_inner | epoch 003: 2364 / 3002 loss=2.551, ppl=5.86, wps=5840, ups=0.09, wpb=64883, bsz=128, num_updates=8315, lr=9.99415e-05, gnorm=2.37, loss_scale=1, train_wall=11, gb_free=2.8, wall=95067
2021-06-19 21:03:24 | INFO | train_inner | epoch 003: 2365 / 3002 loss=2.623, ppl=6.16, wps=5873.4, ups=0.09, wpb=64914, bsz=128, num_updates=8316, lr=9.99415e-05, gnorm=2.078, loss_scale=1, train_wall=11, gb_free=2.8, wall=95078
2021-06-19 21:03:35 | INFO | train_inner | epoch 003: 2366 / 3002 loss=2.768, ppl=6.81, wps=5828.1, ups=0.09, wpb=64856, bsz=128, num_updates=8317, lr=9.99415e-05, gnorm=2.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=95089
2021-06-19 21:03:46 | INFO | train_inner | epoch 003: 2367 / 3002 loss=2.604, ppl=6.08, wps=5853.8, ups=0.09, wpb=64831, bsz=128, num_updates=8318, lr=9.99415e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=95100
2021-06-19 21:03:57 | INFO | train_inner | epoch 003: 2368 / 3002 loss=2.64, ppl=6.23, wps=5765, ups=0.09, wpb=64821, bsz=128, num_updates=8319, lr=9.99414e-05, gnorm=2.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=95112
2021-06-19 21:04:09 | INFO | train_inner | epoch 003: 2369 / 3002 loss=2.666, ppl=6.35, wps=5741.2, ups=0.09, wpb=64863, bsz=128, num_updates=8320, lr=9.99414e-05, gnorm=2.211, loss_scale=1, train_wall=11, gb_free=2.8, wall=95123
2021-06-19 21:04:20 | INFO | train_inner | epoch 003: 2370 / 3002 loss=2.701, ppl=6.5, wps=5890.1, ups=0.09, wpb=64848, bsz=128, num_updates=8321, lr=9.99414e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=95134
2021-06-19 21:04:31 | INFO | train_inner | epoch 003: 2371 / 3002 loss=2.628, ppl=6.18, wps=5830.7, ups=0.09, wpb=64830, bsz=128, num_updates=8322, lr=9.99414e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=95145
2021-06-19 21:04:42 | INFO | train_inner | epoch 003: 2372 / 3002 loss=2.586, ppl=6, wps=5819.9, ups=0.09, wpb=64798, bsz=128, num_updates=8323, lr=9.99414e-05, gnorm=2.053, loss_scale=1, train_wall=11, gb_free=2.8, wall=95156
2021-06-19 21:04:53 | INFO | train_inner | epoch 003: 2373 / 3002 loss=2.644, ppl=6.25, wps=5877.1, ups=0.09, wpb=64789, bsz=128, num_updates=8324, lr=9.99414e-05, gnorm=2.068, loss_scale=1, train_wall=11, gb_free=2.8, wall=95167
2021-06-19 21:05:04 | INFO | train_inner | epoch 003: 2374 / 3002 loss=2.581, ppl=5.98, wps=5821, ups=0.09, wpb=64773, bsz=128, num_updates=8325, lr=9.99414e-05, gnorm=2.191, loss_scale=1, train_wall=11, gb_free=2.8, wall=95178
2021-06-19 21:05:15 | INFO | train_inner | epoch 003: 2375 / 3002 loss=2.611, ppl=6.11, wps=5702.5, ups=0.09, wpb=64848, bsz=128, num_updates=8326, lr=9.99414e-05, gnorm=2.097, loss_scale=1, train_wall=11, gb_free=2.8, wall=95190
2021-06-19 21:05:26 | INFO | train_inner | epoch 003: 2376 / 3002 loss=2.684, ppl=6.43, wps=5919, ups=0.09, wpb=64876, bsz=128, num_updates=8327, lr=9.99414e-05, gnorm=2.068, loss_scale=1, train_wall=11, gb_free=2.8, wall=95201
2021-06-19 21:05:37 | INFO | train_inner | epoch 003: 2377 / 3002 loss=2.661, ppl=6.33, wps=5832.4, ups=0.09, wpb=64854, bsz=128, num_updates=8328, lr=9.99414e-05, gnorm=2.09, loss_scale=1, train_wall=11, gb_free=2.8, wall=95212
2021-06-19 21:05:49 | INFO | train_inner | epoch 003: 2378 / 3002 loss=2.524, ppl=5.75, wps=5819.7, ups=0.09, wpb=64750, bsz=128, num_updates=8329, lr=9.99414e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=95223
2021-06-19 21:06:00 | INFO | train_inner | epoch 003: 2379 / 3002 loss=2.632, ppl=6.2, wps=5819.4, ups=0.09, wpb=64825, bsz=128, num_updates=8330, lr=9.99414e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=95234
2021-06-19 21:06:11 | INFO | train_inner | epoch 003: 2380 / 3002 loss=2.625, ppl=6.17, wps=5850.9, ups=0.09, wpb=64858, bsz=128, num_updates=8331, lr=9.99413e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=95245
2021-06-19 21:06:22 | INFO | train_inner | epoch 003: 2381 / 3002 loss=2.618, ppl=6.14, wps=5956.6, ups=0.09, wpb=64846, bsz=128, num_updates=8332, lr=9.99413e-05, gnorm=2.133, loss_scale=1, train_wall=10, gb_free=2.8, wall=95256
2021-06-19 21:06:33 | INFO | train_inner | epoch 003: 2382 / 3002 loss=2.609, ppl=6.1, wps=5776.6, ups=0.09, wpb=64763, bsz=128, num_updates=8333, lr=9.99413e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=95267
2021-06-19 21:06:44 | INFO | train_inner | epoch 003: 2383 / 3002 loss=2.768, ppl=6.81, wps=5876.4, ups=0.09, wpb=64805, bsz=128, num_updates=8334, lr=9.99413e-05, gnorm=2.17, loss_scale=1, train_wall=11, gb_free=2.8, wall=95278
2021-06-19 21:06:55 | INFO | train_inner | epoch 003: 2384 / 3002 loss=2.548, ppl=5.85, wps=5903.8, ups=0.09, wpb=64886, bsz=128, num_updates=8335, lr=9.99413e-05, gnorm=2.067, loss_scale=1, train_wall=11, gb_free=2.8, wall=95289
2021-06-19 21:07:06 | INFO | train_inner | epoch 003: 2385 / 3002 loss=2.581, ppl=5.98, wps=5916.1, ups=0.09, wpb=64867, bsz=128, num_updates=8336, lr=9.99413e-05, gnorm=2.073, loss_scale=1, train_wall=11, gb_free=2.8, wall=95300
2021-06-19 21:07:17 | INFO | train_inner | epoch 003: 2386 / 3002 loss=2.585, ppl=6, wps=5891.6, ups=0.09, wpb=64854, bsz=128, num_updates=8337, lr=9.99413e-05, gnorm=2.246, loss_scale=1, train_wall=11, gb_free=2.8, wall=95311
2021-06-19 21:07:28 | INFO | train_inner | epoch 003: 2387 / 3002 loss=2.678, ppl=6.4, wps=5808, ups=0.09, wpb=64826, bsz=128, num_updates=8338, lr=9.99413e-05, gnorm=2.838, loss_scale=1, train_wall=11, gb_free=2.8, wall=95322
2021-06-19 21:07:39 | INFO | train_inner | epoch 003: 2388 / 3002 loss=2.459, ppl=5.5, wps=5842.6, ups=0.09, wpb=64861, bsz=128, num_updates=8339, lr=9.99413e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=95334
2021-06-19 21:07:50 | INFO | train_inner | epoch 003: 2389 / 3002 loss=2.662, ppl=6.33, wps=5823.3, ups=0.09, wpb=64865, bsz=128, num_updates=8340, lr=9.99413e-05, gnorm=2.11, loss_scale=1, train_wall=11, gb_free=2.8, wall=95345
2021-06-19 21:08:02 | INFO | train_inner | epoch 003: 2390 / 3002 loss=2.568, ppl=5.93, wps=5778.5, ups=0.09, wpb=64792, bsz=128, num_updates=8341, lr=9.99413e-05, gnorm=2.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=95356
2021-06-19 21:08:12 | INFO | train_inner | epoch 003: 2391 / 3002 loss=2.6, ppl=6.06, wps=5971.6, ups=0.09, wpb=64818, bsz=128, num_updates=8342, lr=9.99413e-05, gnorm=2.177, loss_scale=1, train_wall=10, gb_free=2.8, wall=95367
2021-06-19 21:08:23 | INFO | train_inner | epoch 003: 2392 / 3002 loss=2.842, ppl=7.17, wps=5864, ups=0.09, wpb=64793, bsz=128, num_updates=8343, lr=9.99413e-05, gnorm=2.062, loss_scale=1, train_wall=11, gb_free=2.8, wall=95378
2021-06-19 21:08:35 | INFO | train_inner | epoch 003: 2393 / 3002 loss=2.539, ppl=5.81, wps=5840.2, ups=0.09, wpb=64835, bsz=128, num_updates=8344, lr=9.99412e-05, gnorm=2.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=95389
2021-06-19 21:08:46 | INFO | train_inner | epoch 003: 2394 / 3002 loss=2.818, ppl=7.05, wps=5787.4, ups=0.09, wpb=64769, bsz=128, num_updates=8345, lr=9.99412e-05, gnorm=2.056, loss_scale=1, train_wall=11, gb_free=2.8, wall=95400
2021-06-19 21:08:57 | INFO | train_inner | epoch 003: 2395 / 3002 loss=2.678, ppl=6.4, wps=5799, ups=0.09, wpb=64812, bsz=128, num_updates=8346, lr=9.99412e-05, gnorm=2.459, loss_scale=1, train_wall=11, gb_free=2.8, wall=95411
2021-06-19 21:09:08 | INFO | train_inner | epoch 003: 2396 / 3002 loss=2.499, ppl=5.65, wps=5785.6, ups=0.09, wpb=64839, bsz=128, num_updates=8347, lr=9.99412e-05, gnorm=2.01, loss_scale=1, train_wall=11, gb_free=2.8, wall=95422
2021-06-19 21:09:19 | INFO | train_inner | epoch 003: 2397 / 3002 loss=2.578, ppl=5.97, wps=5868.1, ups=0.09, wpb=64835, bsz=128, num_updates=8348, lr=9.99412e-05, gnorm=2.882, loss_scale=1, train_wall=11, gb_free=2.8, wall=95433
2021-06-19 21:09:30 | INFO | train_inner | epoch 003: 2398 / 3002 loss=2.486, ppl=5.6, wps=5794, ups=0.09, wpb=64793, bsz=128, num_updates=8349, lr=9.99412e-05, gnorm=2.017, loss_scale=1, train_wall=11, gb_free=2.8, wall=95445
2021-06-19 21:09:41 | INFO | train_inner | epoch 003: 2399 / 3002 loss=2.75, ppl=6.73, wps=5830.8, ups=0.09, wpb=64836, bsz=128, num_updates=8350, lr=9.99412e-05, gnorm=2.096, loss_scale=2, train_wall=11, gb_free=2.8, wall=95456
2021-06-19 21:09:53 | INFO | train_inner | epoch 003: 2400 / 3002 loss=2.491, ppl=5.62, wps=5866, ups=0.09, wpb=64840, bsz=128, num_updates=8351, lr=9.99412e-05, gnorm=2.244, loss_scale=2, train_wall=11, gb_free=2.8, wall=95467
2021-06-19 21:10:03 | INFO | train_inner | epoch 003: 2401 / 3002 loss=2.719, ppl=6.58, wps=5943.7, ups=0.09, wpb=64798, bsz=128, num_updates=8352, lr=9.99412e-05, gnorm=3.002, loss_scale=2, train_wall=10, gb_free=2.8, wall=95478
2021-06-19 21:10:14 | INFO | train_inner | epoch 003: 2402 / 3002 loss=2.667, ppl=6.35, wps=5963.1, ups=0.09, wpb=64915, bsz=128, num_updates=8353, lr=9.99412e-05, gnorm=2.149, loss_scale=2, train_wall=10, gb_free=2.8, wall=95489
2021-06-19 21:10:26 | INFO | train_inner | epoch 003: 2403 / 3002 loss=2.499, ppl=5.65, wps=5779.8, ups=0.09, wpb=64890, bsz=128, num_updates=8354, lr=9.99412e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=95500
2021-06-19 21:10:36 | INFO | train_inner | epoch 003: 2404 / 3002 loss=2.598, ppl=6.06, wps=5938.8, ups=0.09, wpb=64813, bsz=128, num_updates=8355, lr=9.99412e-05, gnorm=2.158, loss_scale=2, train_wall=10, gb_free=2.8, wall=95511
2021-06-19 21:10:47 | INFO | train_inner | epoch 003: 2405 / 3002 loss=2.591, ppl=6.02, wps=5929.7, ups=0.09, wpb=64876, bsz=128, num_updates=8356, lr=9.99411e-05, gnorm=2.036, loss_scale=2, train_wall=10, gb_free=2.8, wall=95522
2021-06-19 21:10:59 | INFO | train_inner | epoch 003: 2406 / 3002 loss=2.597, ppl=6.05, wps=5783.8, ups=0.09, wpb=64779, bsz=128, num_updates=8357, lr=9.99411e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=95533
2021-06-19 21:11:10 | INFO | train_inner | epoch 003: 2407 / 3002 loss=2.702, ppl=6.51, wps=5768.2, ups=0.09, wpb=64773, bsz=128, num_updates=8358, lr=9.99411e-05, gnorm=2.064, loss_scale=2, train_wall=11, gb_free=2.8, wall=95544
2021-06-19 21:11:21 | INFO | train_inner | epoch 003: 2408 / 3002 loss=2.719, ppl=6.59, wps=5665.7, ups=0.09, wpb=64847, bsz=128, num_updates=8359, lr=9.99411e-05, gnorm=2.236, loss_scale=2, train_wall=11, gb_free=2.8, wall=95556
2021-06-19 21:11:33 | INFO | train_inner | epoch 003: 2409 / 3002 loss=2.726, ppl=6.62, wps=5752.7, ups=0.09, wpb=64761, bsz=128, num_updates=8360, lr=9.99411e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=95567
2021-06-19 21:11:44 | INFO | train_inner | epoch 003: 2410 / 3002 loss=2.741, ppl=6.68, wps=5887.7, ups=0.09, wpb=64850, bsz=128, num_updates=8361, lr=9.99411e-05, gnorm=2.036, loss_scale=2, train_wall=11, gb_free=2.8, wall=95578
2021-06-19 21:11:54 | INFO | train_inner | epoch 003: 2411 / 3002 loss=2.553, ppl=5.87, wps=5960.2, ups=0.09, wpb=64915, bsz=128, num_updates=8362, lr=9.99411e-05, gnorm=3.365, loss_scale=2, train_wall=10, gb_free=2.8, wall=95589
2021-06-19 21:12:06 | INFO | train_inner | epoch 003: 2412 / 3002 loss=2.615, ppl=6.13, wps=5847.7, ups=0.09, wpb=64835, bsz=128, num_updates=8363, lr=9.99411e-05, gnorm=2.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=95600
2021-06-19 21:12:17 | INFO | train_inner | epoch 003: 2413 / 3002 loss=2.608, ppl=6.1, wps=5802.2, ups=0.09, wpb=64781, bsz=128, num_updates=8364, lr=9.99411e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=95611
2021-06-19 21:12:28 | INFO | train_inner | epoch 003: 2414 / 3002 loss=2.626, ppl=6.17, wps=5808.1, ups=0.09, wpb=64766, bsz=128, num_updates=8365, lr=9.99411e-05, gnorm=2.118, loss_scale=2, train_wall=11, gb_free=2.8, wall=95622
2021-06-19 21:12:39 | INFO | train_inner | epoch 003: 2415 / 3002 loss=2.622, ppl=6.16, wps=5839.3, ups=0.09, wpb=64845, bsz=128, num_updates=8366, lr=9.99411e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=95633
2021-06-19 21:12:50 | INFO | train_inner | epoch 003: 2416 / 3002 loss=2.558, ppl=5.89, wps=5995.9, ups=0.09, wpb=64783, bsz=128, num_updates=8367, lr=9.99411e-05, gnorm=2.084, loss_scale=2, train_wall=10, gb_free=2.8, wall=95644
2021-06-19 21:13:01 | INFO | train_inner | epoch 003: 2417 / 3002 loss=2.695, ppl=6.48, wps=5985.8, ups=0.09, wpb=64862, bsz=128, num_updates=8368, lr=9.99411e-05, gnorm=2.183, loss_scale=2, train_wall=10, gb_free=2.8, wall=95655
2021-06-19 21:13:12 | INFO | train_inner | epoch 003: 2418 / 3002 loss=2.787, ppl=6.9, wps=5806.1, ups=0.09, wpb=64797, bsz=128, num_updates=8369, lr=9.9941e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=95666
2021-06-19 21:13:23 | INFO | train_inner | epoch 003: 2419 / 3002 loss=2.524, ppl=5.75, wps=5984.4, ups=0.09, wpb=64879, bsz=128, num_updates=8370, lr=9.9941e-05, gnorm=2.147, loss_scale=2, train_wall=10, gb_free=2.8, wall=95677
2021-06-19 21:13:33 | INFO | train_inner | epoch 003: 2420 / 3002 loss=2.633, ppl=6.2, wps=5974.4, ups=0.09, wpb=64863, bsz=128, num_updates=8371, lr=9.9941e-05, gnorm=4.48, loss_scale=2, train_wall=10, gb_free=2.8, wall=95688
2021-06-19 21:13:45 | INFO | train_inner | epoch 003: 2421 / 3002 loss=2.572, ppl=5.95, wps=5754.7, ups=0.09, wpb=64833, bsz=128, num_updates=8372, lr=9.9941e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=95699
2021-06-19 21:13:56 | INFO | train_inner | epoch 003: 2422 / 3002 loss=2.729, ppl=6.63, wps=5846.7, ups=0.09, wpb=64774, bsz=128, num_updates=8373, lr=9.9941e-05, gnorm=2.121, loss_scale=2, train_wall=11, gb_free=2.8, wall=95710
2021-06-19 21:14:07 | INFO | train_inner | epoch 003: 2423 / 3002 loss=2.773, ppl=6.84, wps=5853.5, ups=0.09, wpb=64798, bsz=128, num_updates=8374, lr=9.9941e-05, gnorm=2.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=95721
2021-06-19 21:14:18 | INFO | train_inner | epoch 003: 2424 / 3002 loss=2.491, ppl=5.62, wps=5751.3, ups=0.09, wpb=64880, bsz=128, num_updates=8375, lr=9.9941e-05, gnorm=2.761, loss_scale=2, train_wall=11, gb_free=2.8, wall=95732
2021-06-19 21:14:29 | INFO | train_inner | epoch 003: 2425 / 3002 loss=2.548, ppl=5.85, wps=5763.2, ups=0.09, wpb=64777, bsz=128, num_updates=8376, lr=9.9941e-05, gnorm=3.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=95744
2021-06-19 21:14:41 | INFO | train_inner | epoch 003: 2426 / 3002 loss=2.663, ppl=6.33, wps=5767.6, ups=0.09, wpb=64813, bsz=128, num_updates=8377, lr=9.9941e-05, gnorm=8.031, loss_scale=2, train_wall=11, gb_free=2.8, wall=95755
2021-06-19 21:14:52 | INFO | train_inner | epoch 003: 2427 / 3002 loss=2.608, ppl=6.1, wps=5885.8, ups=0.09, wpb=64899, bsz=128, num_updates=8378, lr=9.9941e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=95766
2021-06-19 21:15:03 | INFO | train_inner | epoch 003: 2428 / 3002 loss=2.571, ppl=5.94, wps=5871.9, ups=0.09, wpb=64783, bsz=128, num_updates=8379, lr=9.9941e-05, gnorm=2.045, loss_scale=2, train_wall=11, gb_free=2.8, wall=95777
2021-06-19 21:15:14 | INFO | train_inner | epoch 003: 2429 / 3002 loss=2.641, ppl=6.24, wps=5872.5, ups=0.09, wpb=64894, bsz=128, num_updates=8380, lr=9.9941e-05, gnorm=5.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=95788
2021-06-19 21:15:25 | INFO | train_inner | epoch 003: 2430 / 3002 loss=2.459, ppl=5.5, wps=5799.8, ups=0.09, wpb=64827, bsz=128, num_updates=8381, lr=9.99409e-05, gnorm=2.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=95799
2021-06-19 21:15:36 | INFO | train_inner | epoch 003: 2431 / 3002 loss=2.572, ppl=5.95, wps=5779.1, ups=0.09, wpb=64870, bsz=128, num_updates=8382, lr=9.99409e-05, gnorm=2.169, loss_scale=2, train_wall=11, gb_free=2.8, wall=95810
2021-06-19 21:15:47 | INFO | train_inner | epoch 003: 2432 / 3002 loss=2.658, ppl=6.31, wps=5835.4, ups=0.09, wpb=64774, bsz=128, num_updates=8383, lr=9.99409e-05, gnorm=2.396, loss_scale=2, train_wall=11, gb_free=2.8, wall=95822
2021-06-19 21:15:58 | INFO | train_inner | epoch 003: 2433 / 3002 loss=2.7, ppl=6.5, wps=5766.8, ups=0.09, wpb=64767, bsz=128, num_updates=8384, lr=9.99409e-05, gnorm=2.163, loss_scale=2, train_wall=11, gb_free=2.8, wall=95833
2021-06-19 21:16:09 | INFO | train_inner | epoch 003: 2434 / 3002 loss=2.556, ppl=5.88, wps=5948.5, ups=0.09, wpb=64855, bsz=128, num_updates=8385, lr=9.99409e-05, gnorm=2.089, loss_scale=2, train_wall=10, gb_free=2.8, wall=95844
2021-06-19 21:16:20 | INFO | train_inner | epoch 003: 2435 / 3002 loss=2.587, ppl=6.01, wps=5891.3, ups=0.09, wpb=64811, bsz=128, num_updates=8386, lr=9.99409e-05, gnorm=2.082, loss_scale=2, train_wall=11, gb_free=2.8, wall=95855
2021-06-19 21:16:32 | INFO | train_inner | epoch 003: 2436 / 3002 loss=2.618, ppl=6.14, wps=5788, ups=0.09, wpb=64787, bsz=128, num_updates=8387, lr=9.99409e-05, gnorm=2.149, loss_scale=2, train_wall=11, gb_free=2.8, wall=95866
2021-06-19 21:16:42 | INFO | train_inner | epoch 003: 2437 / 3002 loss=2.748, ppl=6.72, wps=5986.2, ups=0.09, wpb=64854, bsz=128, num_updates=8388, lr=9.99409e-05, gnorm=2.067, loss_scale=2, train_wall=10, gb_free=2.8, wall=95877
2021-06-19 21:16:53 | INFO | train_inner | epoch 003: 2438 / 3002 loss=2.756, ppl=6.76, wps=5951.3, ups=0.09, wpb=64785, bsz=128, num_updates=8389, lr=9.99409e-05, gnorm=2.169, loss_scale=2, train_wall=10, gb_free=2.8, wall=95888
2021-06-19 21:17:04 | INFO | train_inner | epoch 003: 2439 / 3002 loss=2.748, ppl=6.72, wps=5953.5, ups=0.09, wpb=64844, bsz=128, num_updates=8390, lr=9.99409e-05, gnorm=3.125, loss_scale=2, train_wall=10, gb_free=2.8, wall=95899
2021-06-19 21:17:15 | INFO | train_inner | epoch 003: 2440 / 3002 loss=2.63, ppl=6.19, wps=5906.7, ups=0.09, wpb=64799, bsz=128, num_updates=8391, lr=9.99409e-05, gnorm=2.076, loss_scale=2, train_wall=11, gb_free=2.8, wall=95909
2021-06-19 21:17:26 | INFO | train_inner | epoch 003: 2441 / 3002 loss=2.705, ppl=6.52, wps=5948.2, ups=0.09, wpb=64935, bsz=128, num_updates=8392, lr=9.99409e-05, gnorm=2.142, loss_scale=2, train_wall=10, gb_free=2.8, wall=95920
2021-06-19 21:17:37 | INFO | train_inner | epoch 003: 2442 / 3002 loss=2.548, ppl=5.85, wps=5885.2, ups=0.09, wpb=64878, bsz=128, num_updates=8393, lr=9.99409e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=95931
2021-06-19 21:17:48 | INFO | train_inner | epoch 003: 2443 / 3002 loss=2.493, ppl=5.63, wps=5986.5, ups=0.09, wpb=64844, bsz=128, num_updates=8394, lr=9.99408e-05, gnorm=1.961, loss_scale=2, train_wall=10, gb_free=2.8, wall=95942
2021-06-19 21:17:59 | INFO | train_inner | epoch 003: 2444 / 3002 loss=2.622, ppl=6.16, wps=5924.3, ups=0.09, wpb=64803, bsz=128, num_updates=8395, lr=9.99408e-05, gnorm=2.119, loss_scale=2, train_wall=10, gb_free=2.8, wall=95953
2021-06-19 21:18:10 | INFO | train_inner | epoch 003: 2445 / 3002 loss=2.884, ppl=7.38, wps=5877.5, ups=0.09, wpb=64807, bsz=128, num_updates=8396, lr=9.99408e-05, gnorm=2.614, loss_scale=2, train_wall=11, gb_free=2.8, wall=95964
2021-06-19 21:18:21 | INFO | train_inner | epoch 003: 2446 / 3002 loss=2.744, ppl=6.7, wps=5828, ups=0.09, wpb=64762, bsz=128, num_updates=8397, lr=9.99408e-05, gnorm=2.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=95975
2021-06-19 21:18:32 | INFO | train_inner | epoch 003: 2447 / 3002 loss=2.555, ppl=5.88, wps=5834.8, ups=0.09, wpb=64750, bsz=128, num_updates=8398, lr=9.99408e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=95986
2021-06-19 21:18:43 | INFO | train_inner | epoch 003: 2448 / 3002 loss=2.579, ppl=5.98, wps=5924.7, ups=0.09, wpb=64862, bsz=128, num_updates=8399, lr=9.99408e-05, gnorm=2.027, loss_scale=2, train_wall=10, gb_free=2.8, wall=95997
2021-06-19 21:18:54 | INFO | train_inner | epoch 003: 2449 / 3002 loss=2.656, ppl=6.3, wps=5698.1, ups=0.09, wpb=64781, bsz=128, num_updates=8400, lr=9.99408e-05, gnorm=2.19, loss_scale=2, train_wall=11, gb_free=2.8, wall=96009
2021-06-19 21:19:06 | INFO | train_inner | epoch 003: 2450 / 3002 loss=2.551, ppl=5.86, wps=5768.9, ups=0.09, wpb=64791, bsz=128, num_updates=8401, lr=9.99408e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=96020
2021-06-19 21:19:17 | INFO | train_inner | epoch 003: 2451 / 3002 loss=2.652, ppl=6.29, wps=5844.4, ups=0.09, wpb=64772, bsz=128, num_updates=8402, lr=9.99408e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=96031
2021-06-19 21:19:28 | INFO | train_inner | epoch 003: 2452 / 3002 loss=2.526, ppl=5.76, wps=5891, ups=0.09, wpb=64910, bsz=128, num_updates=8403, lr=9.99408e-05, gnorm=2.313, loss_scale=2, train_wall=11, gb_free=2.8, wall=96042
2021-06-19 21:19:39 | INFO | train_inner | epoch 003: 2453 / 3002 loss=2.701, ppl=6.5, wps=5735, ups=0.09, wpb=64765, bsz=128, num_updates=8404, lr=9.99408e-05, gnorm=2.629, loss_scale=2, train_wall=11, gb_free=2.8, wall=96053
2021-06-19 21:19:50 | INFO | train_inner | epoch 003: 2454 / 3002 loss=2.614, ppl=6.12, wps=5747.7, ups=0.09, wpb=64797, bsz=128, num_updates=8405, lr=9.99408e-05, gnorm=2.117, loss_scale=2, train_wall=11, gb_free=2.8, wall=96065
2021-06-19 21:20:01 | INFO | train_inner | epoch 003: 2455 / 3002 loss=2.434, ppl=5.4, wps=5956.9, ups=0.09, wpb=64877, bsz=128, num_updates=8406, lr=9.99407e-05, gnorm=2.107, loss_scale=2, train_wall=10, gb_free=2.8, wall=96076
2021-06-19 21:20:12 | INFO | train_inner | epoch 003: 2456 / 3002 loss=2.664, ppl=6.34, wps=5978.9, ups=0.09, wpb=64876, bsz=128, num_updates=8407, lr=9.99407e-05, gnorm=2.193, loss_scale=2, train_wall=10, gb_free=2.8, wall=96086
2021-06-19 21:20:23 | INFO | train_inner | epoch 003: 2457 / 3002 loss=2.571, ppl=5.94, wps=5701.4, ups=0.09, wpb=64776, bsz=128, num_updates=8408, lr=9.99407e-05, gnorm=2.078, loss_scale=2, train_wall=11, gb_free=2.8, wall=96098
2021-06-19 21:20:35 | INFO | train_inner | epoch 003: 2458 / 3002 loss=2.595, ppl=6.04, wps=5822.7, ups=0.09, wpb=64823, bsz=128, num_updates=8409, lr=9.99407e-05, gnorm=2.138, loss_scale=2, train_wall=11, gb_free=2.8, wall=96109
2021-06-19 21:20:46 | INFO | train_inner | epoch 003: 2459 / 3002 loss=2.632, ppl=6.2, wps=5897.2, ups=0.09, wpb=64801, bsz=128, num_updates=8410, lr=9.99407e-05, gnorm=2.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=96120
2021-06-19 21:20:57 | INFO | train_inner | epoch 003: 2460 / 3002 loss=2.568, ppl=5.93, wps=5731.7, ups=0.09, wpb=64747, bsz=128, num_updates=8411, lr=9.99407e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=96131
2021-06-19 21:21:08 | INFO | train_inner | epoch 003: 2461 / 3002 loss=2.472, ppl=5.55, wps=5946.9, ups=0.09, wpb=64895, bsz=128, num_updates=8412, lr=9.99407e-05, gnorm=2.099, loss_scale=2, train_wall=10, gb_free=2.8, wall=96142
2021-06-19 21:21:19 | INFO | train_inner | epoch 003: 2462 / 3002 loss=2.757, ppl=6.76, wps=5816.8, ups=0.09, wpb=64871, bsz=128, num_updates=8413, lr=9.99407e-05, gnorm=2.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=96153
2021-06-19 21:21:30 | INFO | train_inner | epoch 003: 2463 / 3002 loss=2.662, ppl=6.33, wps=5874.4, ups=0.09, wpb=64801, bsz=128, num_updates=8414, lr=9.99407e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=96164
2021-06-19 21:21:41 | INFO | train_inner | epoch 003: 2464 / 3002 loss=2.523, ppl=5.75, wps=5878.6, ups=0.09, wpb=64763, bsz=128, num_updates=8415, lr=9.99407e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=96175
2021-06-19 21:21:52 | INFO | train_inner | epoch 003: 2465 / 3002 loss=2.581, ppl=5.98, wps=5917, ups=0.09, wpb=64819, bsz=128, num_updates=8416, lr=9.99407e-05, gnorm=2.066, loss_scale=2, train_wall=10, gb_free=2.8, wall=96186
2021-06-19 21:22:03 | INFO | train_inner | epoch 003: 2466 / 3002 loss=2.767, ppl=6.81, wps=5772.3, ups=0.09, wpb=64688, bsz=128, num_updates=8417, lr=9.99407e-05, gnorm=2.099, loss_scale=2, train_wall=11, gb_free=2.8, wall=96197
2021-06-19 21:22:14 | INFO | train_inner | epoch 003: 2467 / 3002 loss=2.557, ppl=5.88, wps=5798.8, ups=0.09, wpb=64834, bsz=128, num_updates=8418, lr=9.99407e-05, gnorm=2.154, loss_scale=2, train_wall=11, gb_free=2.8, wall=96209
2021-06-19 21:22:25 | INFO | train_inner | epoch 003: 2468 / 3002 loss=2.623, ppl=6.16, wps=5828.4, ups=0.09, wpb=64890, bsz=128, num_updates=8419, lr=9.99406e-05, gnorm=2.211, loss_scale=2, train_wall=11, gb_free=2.8, wall=96220
2021-06-19 21:22:36 | INFO | train_inner | epoch 003: 2469 / 3002 loss=2.696, ppl=6.48, wps=5868.3, ups=0.09, wpb=64868, bsz=128, num_updates=8420, lr=9.99406e-05, gnorm=2.149, loss_scale=2, train_wall=11, gb_free=2.8, wall=96231
2021-06-19 21:22:47 | INFO | train_inner | epoch 003: 2470 / 3002 loss=2.453, ppl=5.48, wps=6012.1, ups=0.09, wpb=64839, bsz=128, num_updates=8421, lr=9.99406e-05, gnorm=2.119, loss_scale=2, train_wall=10, gb_free=2.8, wall=96242
2021-06-19 21:22:58 | INFO | train_inner | epoch 003: 2471 / 3002 loss=2.671, ppl=6.37, wps=5973, ups=0.09, wpb=64848, bsz=128, num_updates=8422, lr=9.99406e-05, gnorm=2.126, loss_scale=2, train_wall=10, gb_free=2.8, wall=96252
2021-06-19 21:23:09 | INFO | train_inner | epoch 003: 2472 / 3002 loss=2.617, ppl=6.13, wps=5795.4, ups=0.09, wpb=64865, bsz=128, num_updates=8423, lr=9.99406e-05, gnorm=2.083, loss_scale=2, train_wall=11, gb_free=2.8, wall=96264
2021-06-19 21:23:20 | INFO | train_inner | epoch 003: 2473 / 3002 loss=2.512, ppl=5.7, wps=5910.7, ups=0.09, wpb=64864, bsz=128, num_updates=8424, lr=9.99406e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=96275
2021-06-19 21:23:31 | INFO | train_inner | epoch 003: 2474 / 3002 loss=2.58, ppl=5.98, wps=5831.3, ups=0.09, wpb=64863, bsz=128, num_updates=8425, lr=9.99406e-05, gnorm=2.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=96286
2021-06-19 21:23:42 | INFO | train_inner | epoch 003: 2475 / 3002 loss=2.568, ppl=5.93, wps=5874.5, ups=0.09, wpb=64844, bsz=128, num_updates=8426, lr=9.99406e-05, gnorm=2.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=96297
2021-06-19 21:23:53 | INFO | train_inner | epoch 003: 2476 / 3002 loss=2.501, ppl=5.66, wps=5885.5, ups=0.09, wpb=64884, bsz=128, num_updates=8427, lr=9.99406e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=96308
2021-06-19 21:24:04 | INFO | train_inner | epoch 003: 2477 / 3002 loss=2.596, ppl=6.05, wps=5886.7, ups=0.09, wpb=64903, bsz=128, num_updates=8428, lr=9.99406e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=96319
2021-06-19 21:24:15 | INFO | train_inner | epoch 003: 2478 / 3002 loss=2.669, ppl=6.36, wps=5908.7, ups=0.09, wpb=64759, bsz=128, num_updates=8429, lr=9.99406e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=96330
2021-06-19 21:24:27 | INFO | train_inner | epoch 003: 2479 / 3002 loss=2.813, ppl=7.03, wps=5843.5, ups=0.09, wpb=64823, bsz=128, num_updates=8430, lr=9.99406e-05, gnorm=2.427, loss_scale=2, train_wall=11, gb_free=2.8, wall=96341
2021-06-19 21:24:38 | INFO | train_inner | epoch 003: 2480 / 3002 loss=2.681, ppl=6.41, wps=5849.6, ups=0.09, wpb=64904, bsz=128, num_updates=8431, lr=9.99405e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=96352
2021-06-19 21:24:49 | INFO | train_inner | epoch 003: 2481 / 3002 loss=2.697, ppl=6.48, wps=5864.4, ups=0.09, wpb=64825, bsz=128, num_updates=8432, lr=9.99405e-05, gnorm=2.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=96363
2021-06-19 21:25:00 | INFO | train_inner | epoch 003: 2482 / 3002 loss=2.643, ppl=6.25, wps=5794.7, ups=0.09, wpb=64758, bsz=128, num_updates=8433, lr=9.99405e-05, gnorm=2.281, loss_scale=2, train_wall=11, gb_free=2.8, wall=96374
2021-06-19 21:25:11 | INFO | train_inner | epoch 003: 2483 / 3002 loss=2.523, ppl=5.75, wps=5755.7, ups=0.09, wpb=64843, bsz=128, num_updates=8434, lr=9.99405e-05, gnorm=2.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=96385
2021-06-19 21:25:22 | INFO | train_inner | epoch 003: 2484 / 3002 loss=2.565, ppl=5.92, wps=5874.6, ups=0.09, wpb=64829, bsz=128, num_updates=8435, lr=9.99405e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=96397
2021-06-19 21:25:33 | INFO | train_inner | epoch 003: 2485 / 3002 loss=2.524, ppl=5.75, wps=5885.6, ups=0.09, wpb=64840, bsz=128, num_updates=8436, lr=9.99405e-05, gnorm=2.131, loss_scale=2, train_wall=11, gb_free=2.8, wall=96408
2021-06-19 21:25:44 | INFO | train_inner | epoch 003: 2486 / 3002 loss=2.561, ppl=5.9, wps=5796.6, ups=0.09, wpb=64910, bsz=128, num_updates=8437, lr=9.99405e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=96419
2021-06-19 21:25:55 | INFO | train_inner | epoch 003: 2487 / 3002 loss=2.509, ppl=5.69, wps=5930.5, ups=0.09, wpb=64866, bsz=128, num_updates=8438, lr=9.99405e-05, gnorm=2.157, loss_scale=2, train_wall=10, gb_free=2.8, wall=96430
2021-06-19 21:26:06 | INFO | train_inner | epoch 003: 2488 / 3002 loss=2.595, ppl=6.04, wps=5849.3, ups=0.09, wpb=64879, bsz=128, num_updates=8439, lr=9.99405e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=96441
2021-06-19 21:26:18 | INFO | train_inner | epoch 003: 2489 / 3002 loss=2.661, ppl=6.32, wps=5801.1, ups=0.09, wpb=64850, bsz=128, num_updates=8440, lr=9.99405e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=96452
2021-06-19 21:26:29 | INFO | train_inner | epoch 003: 2490 / 3002 loss=2.595, ppl=6.04, wps=5826.1, ups=0.09, wpb=64820, bsz=128, num_updates=8441, lr=9.99405e-05, gnorm=2.148, loss_scale=2, train_wall=11, gb_free=2.8, wall=96463
2021-06-19 21:26:40 | INFO | train_inner | epoch 003: 2491 / 3002 loss=2.715, ppl=6.56, wps=5791.4, ups=0.09, wpb=64781, bsz=128, num_updates=8442, lr=9.99405e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=96474
2021-06-19 21:26:51 | INFO | train_inner | epoch 003: 2492 / 3002 loss=2.65, ppl=6.28, wps=5834.2, ups=0.09, wpb=64818, bsz=128, num_updates=8443, lr=9.99405e-05, gnorm=2.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=96485
2021-06-19 21:27:02 | INFO | train_inner | epoch 003: 2493 / 3002 loss=2.702, ppl=6.51, wps=5804.2, ups=0.09, wpb=64815, bsz=128, num_updates=8444, lr=9.99404e-05, gnorm=2.448, loss_scale=2, train_wall=11, gb_free=2.8, wall=96497
2021-06-19 21:27:13 | INFO | train_inner | epoch 003: 2494 / 3002 loss=2.701, ppl=6.5, wps=5869.9, ups=0.09, wpb=64857, bsz=128, num_updates=8445, lr=9.99404e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=96508
2021-06-19 21:27:25 | INFO | train_inner | epoch 003: 2495 / 3002 loss=2.539, ppl=5.81, wps=5717.1, ups=0.09, wpb=64765, bsz=128, num_updates=8446, lr=9.99404e-05, gnorm=2.703, loss_scale=2, train_wall=11, gb_free=2.8, wall=96519
2021-06-19 21:27:36 | INFO | train_inner | epoch 003: 2496 / 3002 loss=2.694, ppl=6.47, wps=5880.3, ups=0.09, wpb=64819, bsz=128, num_updates=8447, lr=9.99404e-05, gnorm=2.141, loss_scale=2, train_wall=11, gb_free=2.8, wall=96530
2021-06-19 21:27:47 | INFO | train_inner | epoch 003: 2497 / 3002 loss=2.57, ppl=5.94, wps=5795, ups=0.09, wpb=64856, bsz=128, num_updates=8448, lr=9.99404e-05, gnorm=2.424, loss_scale=2, train_wall=11, gb_free=2.8, wall=96541
2021-06-19 21:27:58 | INFO | train_inner | epoch 003: 2498 / 3002 loss=2.774, ppl=6.84, wps=5785.1, ups=0.09, wpb=64868, bsz=128, num_updates=8449, lr=9.99404e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=96552
2021-06-19 21:28:09 | INFO | train_inner | epoch 003: 2499 / 3002 loss=2.688, ppl=6.44, wps=5857.1, ups=0.09, wpb=64857, bsz=128, num_updates=8450, lr=9.99404e-05, gnorm=2.315, loss_scale=2, train_wall=11, gb_free=2.8, wall=96563
2021-06-19 21:28:20 | INFO | train_inner | epoch 003: 2500 / 3002 loss=2.666, ppl=6.35, wps=5836.9, ups=0.09, wpb=64814, bsz=128, num_updates=8451, lr=9.99404e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=96575
2021-06-19 21:28:31 | INFO | train_inner | epoch 003: 2501 / 3002 loss=2.752, ppl=6.74, wps=5771.9, ups=0.09, wpb=64777, bsz=128, num_updates=8452, lr=9.99404e-05, gnorm=2.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=96586
2021-06-19 21:28:43 | INFO | train_inner | epoch 003: 2502 / 3002 loss=2.683, ppl=6.42, wps=5754.2, ups=0.09, wpb=64806, bsz=128, num_updates=8453, lr=9.99404e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=96597
2021-06-19 21:28:54 | INFO | train_inner | epoch 003: 2503 / 3002 loss=2.593, ppl=6.03, wps=5941.4, ups=0.09, wpb=64950, bsz=128, num_updates=8454, lr=9.99404e-05, gnorm=2.073, loss_scale=2, train_wall=10, gb_free=2.8, wall=96608
2021-06-19 21:29:05 | INFO | train_inner | epoch 003: 2504 / 3002 loss=2.641, ppl=6.24, wps=5834.7, ups=0.09, wpb=64776, bsz=128, num_updates=8455, lr=9.99404e-05, gnorm=2.806, loss_scale=2, train_wall=11, gb_free=2.8, wall=96619
2021-06-19 21:29:16 | INFO | train_inner | epoch 003: 2505 / 3002 loss=2.582, ppl=5.99, wps=5885.9, ups=0.09, wpb=64886, bsz=128, num_updates=8456, lr=9.99403e-05, gnorm=2.814, loss_scale=2, train_wall=11, gb_free=2.8, wall=96630
2021-06-19 21:29:27 | INFO | train_inner | epoch 003: 2506 / 3002 loss=2.509, ppl=5.69, wps=5734.1, ups=0.09, wpb=64774, bsz=128, num_updates=8457, lr=9.99403e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=96641
2021-06-19 21:29:38 | INFO | train_inner | epoch 003: 2507 / 3002 loss=2.538, ppl=5.81, wps=5934.3, ups=0.09, wpb=64829, bsz=128, num_updates=8458, lr=9.99403e-05, gnorm=2.101, loss_scale=2, train_wall=10, gb_free=2.8, wall=96652
2021-06-19 21:29:49 | INFO | train_inner | epoch 003: 2508 / 3002 loss=2.465, ppl=5.52, wps=5906.8, ups=0.09, wpb=64767, bsz=128, num_updates=8459, lr=9.99403e-05, gnorm=2.474, loss_scale=2, train_wall=11, gb_free=2.8, wall=96663
2021-06-19 21:30:00 | INFO | train_inner | epoch 003: 2509 / 3002 loss=2.674, ppl=6.38, wps=5897.1, ups=0.09, wpb=64824, bsz=128, num_updates=8460, lr=9.99403e-05, gnorm=2.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=96674
2021-06-19 21:30:11 | INFO | train_inner | epoch 003: 2510 / 3002 loss=2.72, ppl=6.59, wps=5843.4, ups=0.09, wpb=64845, bsz=128, num_updates=8461, lr=9.99403e-05, gnorm=2.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=96685
2021-06-19 21:30:22 | INFO | train_inner | epoch 003: 2511 / 3002 loss=2.594, ppl=6.04, wps=5801.6, ups=0.09, wpb=64887, bsz=128, num_updates=8462, lr=9.99403e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=96697
2021-06-19 21:30:33 | INFO | train_inner | epoch 003: 2512 / 3002 loss=2.592, ppl=6.03, wps=5957.5, ups=0.09, wpb=64899, bsz=128, num_updates=8463, lr=9.99403e-05, gnorm=3.1, loss_scale=2, train_wall=10, gb_free=2.8, wall=96707
2021-06-19 21:30:44 | INFO | train_inner | epoch 003: 2513 / 3002 loss=2.708, ppl=6.53, wps=5957.8, ups=0.09, wpb=64827, bsz=128, num_updates=8464, lr=9.99403e-05, gnorm=2.141, loss_scale=2, train_wall=10, gb_free=2.8, wall=96718
2021-06-19 21:30:55 | INFO | train_inner | epoch 003: 2514 / 3002 loss=2.508, ppl=5.69, wps=5700.9, ups=0.09, wpb=64869, bsz=128, num_updates=8465, lr=9.99403e-05, gnorm=2.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=96730
2021-06-19 21:31:06 | INFO | train_inner | epoch 003: 2515 / 3002 loss=2.516, ppl=5.72, wps=5830.8, ups=0.09, wpb=64841, bsz=128, num_updates=8466, lr=9.99403e-05, gnorm=2.979, loss_scale=2, train_wall=11, gb_free=2.8, wall=96741
2021-06-19 21:31:18 | INFO | train_inner | epoch 003: 2516 / 3002 loss=2.539, ppl=5.81, wps=5790.7, ups=0.09, wpb=64813, bsz=128, num_updates=8467, lr=9.99403e-05, gnorm=2.169, loss_scale=2, train_wall=11, gb_free=2.8, wall=96752
2021-06-19 21:31:29 | INFO | train_inner | epoch 003: 2517 / 3002 loss=2.552, ppl=5.86, wps=5846.8, ups=0.09, wpb=64871, bsz=128, num_updates=8468, lr=9.99403e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=96763
2021-06-19 21:31:40 | INFO | train_inner | epoch 003: 2518 / 3002 loss=2.532, ppl=5.78, wps=5801.8, ups=0.09, wpb=64887, bsz=128, num_updates=8469, lr=9.99402e-05, gnorm=2.191, loss_scale=2, train_wall=11, gb_free=2.8, wall=96774
2021-06-19 21:31:51 | INFO | train_inner | epoch 003: 2519 / 3002 loss=2.518, ppl=5.73, wps=5820, ups=0.09, wpb=64810, bsz=128, num_updates=8470, lr=9.99402e-05, gnorm=2.2, loss_scale=2, train_wall=11, gb_free=2.8, wall=96785
2021-06-19 21:32:02 | INFO | train_inner | epoch 003: 2520 / 3002 loss=2.481, ppl=5.58, wps=5854.7, ups=0.09, wpb=64924, bsz=128, num_updates=8471, lr=9.99402e-05, gnorm=3.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=96796
2021-06-19 21:32:13 | INFO | train_inner | epoch 003: 2521 / 3002 loss=2.646, ppl=6.26, wps=5928.1, ups=0.09, wpb=64804, bsz=128, num_updates=8472, lr=9.99402e-05, gnorm=2.209, loss_scale=2, train_wall=10, gb_free=2.8, wall=96807
2021-06-19 21:32:24 | INFO | train_inner | epoch 003: 2522 / 3002 loss=2.687, ppl=6.44, wps=5783.2, ups=0.09, wpb=64827, bsz=128, num_updates=8473, lr=9.99402e-05, gnorm=4.118, loss_scale=2, train_wall=11, gb_free=2.8, wall=96819
2021-06-19 21:32:36 | INFO | train_inner | epoch 003: 2523 / 3002 loss=2.682, ppl=6.42, wps=5738.4, ups=0.09, wpb=64724, bsz=128, num_updates=8474, lr=9.99402e-05, gnorm=3.57, loss_scale=2, train_wall=11, gb_free=2.8, wall=96830
2021-06-19 21:32:47 | INFO | train_inner | epoch 003: 2524 / 3002 loss=2.639, ppl=6.23, wps=5850, ups=0.09, wpb=64788, bsz=128, num_updates=8475, lr=9.99402e-05, gnorm=2.227, loss_scale=2, train_wall=11, gb_free=2.8, wall=96841
2021-06-19 21:32:58 | INFO | train_inner | epoch 003: 2525 / 3002 loss=2.734, ppl=6.65, wps=5718.8, ups=0.09, wpb=64851, bsz=128, num_updates=8476, lr=9.99402e-05, gnorm=25.712, loss_scale=2, train_wall=11, gb_free=2.8, wall=96852
2021-06-19 21:33:09 | INFO | train_inner | epoch 003: 2526 / 3002 loss=2.59, ppl=6.02, wps=5763.6, ups=0.09, wpb=64796, bsz=128, num_updates=8477, lr=9.99402e-05, gnorm=2.46, loss_scale=2, train_wall=11, gb_free=2.8, wall=96864
2021-06-19 21:33:20 | INFO | train_inner | epoch 003: 2527 / 3002 loss=2.648, ppl=6.27, wps=5867.1, ups=0.09, wpb=64939, bsz=128, num_updates=8478, lr=9.99402e-05, gnorm=2.612, loss_scale=4, train_wall=11, gb_free=2.8, wall=96875
2021-06-19 21:33:31 | INFO | train_inner | epoch 003: 2528 / 3002 loss=2.541, ppl=5.82, wps=5793, ups=0.09, wpb=64834, bsz=128, num_updates=8479, lr=9.99402e-05, gnorm=3.814, loss_scale=4, train_wall=11, gb_free=2.8, wall=96886
2021-06-19 21:33:42 | INFO | train_inner | epoch 003: 2529 / 3002 loss=2.448, ppl=5.46, wps=5936.2, ups=0.09, wpb=64852, bsz=128, num_updates=8480, lr=9.99402e-05, gnorm=2.446, loss_scale=4, train_wall=10, gb_free=2.8, wall=96897
2021-06-19 21:33:54 | INFO | train_inner | epoch 003: 2530 / 3002 loss=2.657, ppl=6.31, wps=5806.5, ups=0.09, wpb=64769, bsz=128, num_updates=8481, lr=9.99401e-05, gnorm=2.577, loss_scale=4, train_wall=11, gb_free=2.8, wall=96908
2021-06-19 21:34:05 | INFO | train_inner | epoch 003: 2531 / 3002 loss=2.797, ppl=6.95, wps=5816.9, ups=0.09, wpb=64795, bsz=128, num_updates=8482, lr=9.99401e-05, gnorm=2.569, loss_scale=4, train_wall=11, gb_free=2.8, wall=96919
2021-06-19 21:34:16 | INFO | train_inner | epoch 003: 2532 / 3002 loss=2.59, ppl=6.02, wps=5831.7, ups=0.09, wpb=64853, bsz=128, num_updates=8483, lr=9.99401e-05, gnorm=2.524, loss_scale=4, train_wall=11, gb_free=2.8, wall=96930
2021-06-19 21:34:27 | INFO | train_inner | epoch 003: 2533 / 3002 loss=2.705, ppl=6.52, wps=5785.1, ups=0.09, wpb=64744, bsz=128, num_updates=8484, lr=9.99401e-05, gnorm=2.344, loss_scale=4, train_wall=11, gb_free=2.8, wall=96941
2021-06-19 21:34:38 | INFO | train_inner | epoch 003: 2534 / 3002 loss=2.616, ppl=6.13, wps=5924.1, ups=0.09, wpb=64851, bsz=128, num_updates=8485, lr=9.99401e-05, gnorm=2.21, loss_scale=4, train_wall=10, gb_free=2.8, wall=96952
2021-06-19 21:34:49 | INFO | train_inner | epoch 003: 2535 / 3002 loss=2.593, ppl=6.03, wps=5810.7, ups=0.09, wpb=64848, bsz=128, num_updates=8486, lr=9.99401e-05, gnorm=2.353, loss_scale=4, train_wall=11, gb_free=2.8, wall=96963
2021-06-19 21:35:00 | INFO | train_inner | epoch 003: 2536 / 3002 loss=2.728, ppl=6.63, wps=5812.8, ups=0.09, wpb=64868, bsz=128, num_updates=8487, lr=9.99401e-05, gnorm=2.763, loss_scale=4, train_wall=11, gb_free=2.8, wall=96975
2021-06-19 21:35:12 | INFO | train_inner | epoch 003: 2537 / 3002 loss=2.572, ppl=5.95, wps=5772.4, ups=0.09, wpb=64828, bsz=128, num_updates=8488, lr=9.99401e-05, gnorm=2.449, loss_scale=4, train_wall=11, gb_free=2.8, wall=96986
2021-06-19 21:35:23 | INFO | train_inner | epoch 003: 2538 / 3002 loss=2.775, ppl=6.85, wps=5851.7, ups=0.09, wpb=64803, bsz=128, num_updates=8489, lr=9.99401e-05, gnorm=2.836, loss_scale=4, train_wall=11, gb_free=2.8, wall=96997
2021-06-19 21:35:34 | INFO | train_inner | epoch 003: 2539 / 3002 loss=2.656, ppl=6.3, wps=5788.2, ups=0.09, wpb=64719, bsz=128, num_updates=8490, lr=9.99401e-05, gnorm=2.414, loss_scale=4, train_wall=11, gb_free=2.8, wall=97008
2021-06-19 21:35:45 | INFO | train_inner | epoch 003: 2540 / 3002 loss=2.784, ppl=6.89, wps=5996.2, ups=0.09, wpb=64732, bsz=128, num_updates=8491, lr=9.99401e-05, gnorm=2.637, loss_scale=4, train_wall=10, gb_free=2.8, wall=97019
2021-06-19 21:35:56 | INFO | train_inner | epoch 003: 2541 / 3002 loss=2.799, ppl=6.96, wps=5854.1, ups=0.09, wpb=64791, bsz=128, num_updates=8492, lr=9.99401e-05, gnorm=2.181, loss_scale=4, train_wall=11, gb_free=2.8, wall=97030
2021-06-19 21:36:07 | INFO | train_inner | epoch 003: 2542 / 3002 loss=2.516, ppl=5.72, wps=5849.1, ups=0.09, wpb=64894, bsz=128, num_updates=8493, lr=9.99401e-05, gnorm=2.41, loss_scale=4, train_wall=11, gb_free=2.8, wall=97041
2021-06-19 21:36:18 | INFO | train_inner | epoch 003: 2543 / 3002 loss=2.637, ppl=6.22, wps=5931.2, ups=0.09, wpb=64821, bsz=128, num_updates=8494, lr=9.994e-05, gnorm=2.935, loss_scale=4, train_wall=10, gb_free=2.8, wall=97052
2021-06-19 21:36:29 | INFO | train_inner | epoch 003: 2544 / 3002 loss=2.631, ppl=6.19, wps=5849.3, ups=0.09, wpb=64886, bsz=128, num_updates=8495, lr=9.994e-05, gnorm=3.269, loss_scale=4, train_wall=11, gb_free=2.8, wall=97063
2021-06-19 21:36:40 | INFO | train_inner | epoch 003: 2545 / 3002 loss=2.653, ppl=6.29, wps=5884.7, ups=0.09, wpb=64793, bsz=128, num_updates=8496, lr=9.994e-05, gnorm=3.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=97074
2021-06-19 21:36:51 | INFO | train_inner | epoch 003: 2546 / 3002 loss=2.647, ppl=6.26, wps=5793.2, ups=0.09, wpb=64788, bsz=128, num_updates=8497, lr=9.994e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=97085
2021-06-19 21:37:02 | INFO | train_inner | epoch 003: 2547 / 3002 loss=2.611, ppl=6.11, wps=5716.2, ups=0.09, wpb=64880, bsz=128, num_updates=8498, lr=9.994e-05, gnorm=2.267, loss_scale=4, train_wall=11, gb_free=2.8, wall=97097
2021-06-19 21:37:13 | INFO | train_inner | epoch 003: 2548 / 3002 loss=2.8, ppl=6.97, wps=5832.9, ups=0.09, wpb=64875, bsz=128, num_updates=8499, lr=9.994e-05, gnorm=2.278, loss_scale=4, train_wall=11, gb_free=2.8, wall=97108
2021-06-19 21:37:24 | INFO | train_inner | epoch 003: 2549 / 3002 loss=2.711, ppl=6.55, wps=5890.6, ups=0.09, wpb=64923, bsz=128, num_updates=8500, lr=9.994e-05, gnorm=4.711, loss_scale=4, train_wall=11, gb_free=2.8, wall=97119
2021-06-19 21:37:36 | INFO | train_inner | epoch 003: 2550 / 3002 loss=2.559, ppl=5.89, wps=5747, ups=0.09, wpb=64792, bsz=128, num_updates=8501, lr=9.994e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=97130
2021-06-19 21:37:47 | INFO | train_inner | epoch 003: 2551 / 3002 loss=2.822, ppl=7.07, wps=5968.5, ups=0.09, wpb=64741, bsz=128, num_updates=8502, lr=9.994e-05, gnorm=2.691, loss_scale=4, train_wall=10, gb_free=2.8, wall=97141
2021-06-19 21:37:58 | INFO | train_inner | epoch 003: 2552 / 3002 loss=2.46, ppl=5.5, wps=5834.3, ups=0.09, wpb=64873, bsz=128, num_updates=8503, lr=9.994e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=97152
2021-06-19 21:38:09 | INFO | train_inner | epoch 003: 2553 / 3002 loss=2.77, ppl=6.82, wps=5798.8, ups=0.09, wpb=64797, bsz=128, num_updates=8504, lr=9.994e-05, gnorm=2.416, loss_scale=4, train_wall=11, gb_free=2.8, wall=97163
2021-06-19 21:38:20 | INFO | train_inner | epoch 003: 2554 / 3002 loss=2.513, ppl=5.71, wps=6025.8, ups=0.09, wpb=64849, bsz=128, num_updates=8505, lr=9.994e-05, gnorm=2.402, loss_scale=4, train_wall=10, gb_free=2.8, wall=97174
2021-06-19 21:38:31 | INFO | train_inner | epoch 003: 2555 / 3002 loss=2.619, ppl=6.14, wps=5902.9, ups=0.09, wpb=64825, bsz=128, num_updates=8506, lr=9.99399e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=97185
2021-06-19 21:38:42 | INFO | train_inner | epoch 003: 2556 / 3002 loss=2.675, ppl=6.39, wps=5901, ups=0.09, wpb=64836, bsz=128, num_updates=8507, lr=9.99399e-05, gnorm=2.413, loss_scale=4, train_wall=11, gb_free=2.8, wall=97196
2021-06-19 21:38:53 | INFO | train_inner | epoch 003: 2557 / 3002 loss=2.546, ppl=5.84, wps=5921.1, ups=0.09, wpb=64894, bsz=128, num_updates=8508, lr=9.99399e-05, gnorm=5.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=97207
2021-06-19 21:39:04 | INFO | train_inner | epoch 003: 2558 / 3002 loss=2.702, ppl=6.51, wps=5788.7, ups=0.09, wpb=64823, bsz=128, num_updates=8509, lr=9.99399e-05, gnorm=2.234, loss_scale=4, train_wall=11, gb_free=2.8, wall=97218
2021-06-19 21:39:15 | INFO | train_inner | epoch 003: 2559 / 3002 loss=2.807, ppl=7, wps=5806.8, ups=0.09, wpb=64828, bsz=128, num_updates=8510, lr=9.99399e-05, gnorm=2.296, loss_scale=4, train_wall=11, gb_free=2.8, wall=97229
2021-06-19 21:39:26 | INFO | train_inner | epoch 003: 2560 / 3002 loss=2.593, ppl=6.04, wps=5799.9, ups=0.09, wpb=64841, bsz=128, num_updates=8511, lr=9.99399e-05, gnorm=2.472, loss_scale=4, train_wall=11, gb_free=2.8, wall=97240
2021-06-19 21:39:37 | INFO | train_inner | epoch 003: 2561 / 3002 loss=2.583, ppl=5.99, wps=5779.5, ups=0.09, wpb=64891, bsz=128, num_updates=8512, lr=9.99399e-05, gnorm=5.471, loss_scale=4, train_wall=11, gb_free=2.8, wall=97252
2021-06-19 21:39:48 | INFO | train_inner | epoch 003: 2562 / 3002 loss=2.771, ppl=6.82, wps=5880.7, ups=0.09, wpb=64775, bsz=128, num_updates=8513, lr=9.99399e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=97263
2021-06-19 21:39:59 | INFO | train_inner | epoch 003: 2563 / 3002 loss=2.679, ppl=6.4, wps=5869, ups=0.09, wpb=64828, bsz=128, num_updates=8514, lr=9.99399e-05, gnorm=2.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=97274
2021-06-19 21:40:10 | INFO | train_inner | epoch 003: 2564 / 3002 loss=2.473, ppl=5.55, wps=5963.3, ups=0.09, wpb=64907, bsz=128, num_updates=8515, lr=9.99399e-05, gnorm=2.17, loss_scale=4, train_wall=10, gb_free=2.8, wall=97285
2021-06-19 21:40:22 | INFO | train_inner | epoch 003: 2565 / 3002 loss=2.687, ppl=6.44, wps=5726.2, ups=0.09, wpb=64784, bsz=128, num_updates=8516, lr=9.99399e-05, gnorm=2.349, loss_scale=4, train_wall=11, gb_free=2.8, wall=97296
2021-06-19 21:40:33 | INFO | train_inner | epoch 003: 2566 / 3002 loss=2.736, ppl=6.66, wps=5819.8, ups=0.09, wpb=64815, bsz=128, num_updates=8517, lr=9.99399e-05, gnorm=6.681, loss_scale=4, train_wall=11, gb_free=2.8, wall=97307
2021-06-19 21:40:44 | INFO | train_inner | epoch 003: 2567 / 3002 loss=2.592, ppl=6.03, wps=5934.6, ups=0.09, wpb=64808, bsz=128, num_updates=8518, lr=9.99399e-05, gnorm=2.52, loss_scale=4, train_wall=10, gb_free=2.8, wall=97318
2021-06-19 21:40:55 | INFO | train_inner | epoch 003: 2568 / 3002 loss=2.704, ppl=6.52, wps=5778.2, ups=0.09, wpb=64837, bsz=128, num_updates=8519, lr=9.99398e-05, gnorm=2.233, loss_scale=4, train_wall=11, gb_free=2.8, wall=97329
2021-06-19 21:41:06 | INFO | train_inner | epoch 003: 2569 / 3002 loss=2.615, ppl=6.13, wps=5827.9, ups=0.09, wpb=64845, bsz=128, num_updates=8520, lr=9.99398e-05, gnorm=2.265, loss_scale=4, train_wall=11, gb_free=2.8, wall=97340
2021-06-19 21:41:17 | INFO | train_inner | epoch 003: 2570 / 3002 loss=2.694, ppl=6.47, wps=5822, ups=0.09, wpb=64784, bsz=128, num_updates=8521, lr=9.99398e-05, gnorm=3.24, loss_scale=4, train_wall=11, gb_free=2.8, wall=97351
2021-06-19 21:41:28 | INFO | train_inner | epoch 003: 2571 / 3002 loss=2.544, ppl=5.83, wps=5860.5, ups=0.09, wpb=64752, bsz=128, num_updates=8522, lr=9.99398e-05, gnorm=2.433, loss_scale=4, train_wall=11, gb_free=2.8, wall=97363
2021-06-19 21:41:39 | INFO | train_inner | epoch 003: 2572 / 3002 loss=2.709, ppl=6.54, wps=5836.2, ups=0.09, wpb=64729, bsz=128, num_updates=8523, lr=9.99398e-05, gnorm=2.455, loss_scale=4, train_wall=11, gb_free=2.8, wall=97374
2021-06-19 21:41:50 | INFO | train_inner | epoch 003: 2573 / 3002 loss=2.604, ppl=6.08, wps=5892.4, ups=0.09, wpb=64909, bsz=128, num_updates=8524, lr=9.99398e-05, gnorm=10.401, loss_scale=4, train_wall=11, gb_free=2.8, wall=97385
2021-06-19 21:42:02 | INFO | train_inner | epoch 003: 2574 / 3002 loss=2.577, ppl=5.97, wps=5770.5, ups=0.09, wpb=64850, bsz=128, num_updates=8525, lr=9.99398e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=97396
2021-06-19 21:42:13 | INFO | train_inner | epoch 003: 2575 / 3002 loss=2.801, ppl=6.97, wps=5888.7, ups=0.09, wpb=64762, bsz=128, num_updates=8526, lr=9.99398e-05, gnorm=2.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=97407
2021-06-19 21:42:24 | INFO | train_inner | epoch 003: 2576 / 3002 loss=2.47, ppl=5.54, wps=5847, ups=0.09, wpb=64860, bsz=128, num_updates=8527, lr=9.99398e-05, gnorm=2.272, loss_scale=4, train_wall=11, gb_free=2.8, wall=97418
2021-06-19 21:42:35 | INFO | train_inner | epoch 003: 2577 / 3002 loss=2.354, ppl=5.11, wps=5859.8, ups=0.09, wpb=64818, bsz=128, num_updates=8528, lr=9.99398e-05, gnorm=3.504, loss_scale=4, train_wall=11, gb_free=2.8, wall=97429
2021-06-19 21:42:46 | INFO | train_inner | epoch 003: 2578 / 3002 loss=2.616, ppl=6.13, wps=5888.7, ups=0.09, wpb=64808, bsz=128, num_updates=8529, lr=9.99398e-05, gnorm=2.863, loss_scale=4, train_wall=11, gb_free=2.8, wall=97440
2021-06-19 21:42:57 | INFO | train_inner | epoch 003: 2579 / 3002 loss=2.525, ppl=5.75, wps=5780, ups=0.09, wpb=64777, bsz=128, num_updates=8530, lr=9.99398e-05, gnorm=2.81, loss_scale=4, train_wall=11, gb_free=2.8, wall=97451
2021-06-19 21:43:08 | INFO | train_inner | epoch 003: 2580 / 3002 loss=2.678, ppl=6.4, wps=5852.1, ups=0.09, wpb=64805, bsz=128, num_updates=8531, lr=9.99397e-05, gnorm=2.432, loss_scale=4, train_wall=11, gb_free=2.8, wall=97462
2021-06-19 21:43:19 | INFO | train_inner | epoch 003: 2581 / 3002 loss=2.53, ppl=5.78, wps=5872.3, ups=0.09, wpb=64888, bsz=128, num_updates=8532, lr=9.99397e-05, gnorm=2.322, loss_scale=4, train_wall=11, gb_free=2.8, wall=97473
2021-06-19 21:43:30 | INFO | train_inner | epoch 003: 2582 / 3002 loss=2.645, ppl=6.26, wps=5807, ups=0.09, wpb=64826, bsz=128, num_updates=8533, lr=9.99397e-05, gnorm=2.164, loss_scale=4, train_wall=11, gb_free=2.8, wall=97485
2021-06-19 21:43:41 | INFO | train_inner | epoch 003: 2583 / 3002 loss=2.585, ppl=6, wps=5880.2, ups=0.09, wpb=64832, bsz=128, num_updates=8534, lr=9.99397e-05, gnorm=5.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=97496
2021-06-19 21:43:52 | INFO | train_inner | epoch 003: 2584 / 3002 loss=2.591, ppl=6.02, wps=5909.1, ups=0.09, wpb=64810, bsz=128, num_updates=8535, lr=9.99397e-05, gnorm=13.055, loss_scale=4, train_wall=11, gb_free=2.8, wall=97507
2021-06-19 21:44:03 | INFO | train_inner | epoch 003: 2585 / 3002 loss=2.61, ppl=6.11, wps=5781.7, ups=0.09, wpb=64756, bsz=128, num_updates=8536, lr=9.99397e-05, gnorm=2.806, loss_scale=4, train_wall=11, gb_free=2.8, wall=97518
2021-06-19 21:44:15 | INFO | train_inner | epoch 003: 2586 / 3002 loss=2.492, ppl=5.62, wps=5730.6, ups=0.09, wpb=64811, bsz=128, num_updates=8537, lr=9.99397e-05, gnorm=2.449, loss_scale=4, train_wall=11, gb_free=2.8, wall=97529
2021-06-19 21:44:26 | INFO | train_inner | epoch 003: 2587 / 3002 loss=2.705, ppl=6.52, wps=5719, ups=0.09, wpb=64776, bsz=128, num_updates=8538, lr=9.99397e-05, gnorm=2.404, loss_scale=4, train_wall=11, gb_free=2.8, wall=97540
2021-06-19 21:44:37 | INFO | train_inner | epoch 003: 2588 / 3002 loss=2.652, ppl=6.29, wps=5814.3, ups=0.09, wpb=64886, bsz=128, num_updates=8539, lr=9.99397e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=97552
2021-06-19 21:44:48 | INFO | train_inner | epoch 003: 2589 / 3002 loss=2.72, ppl=6.59, wps=5918, ups=0.09, wpb=64744, bsz=128, num_updates=8540, lr=9.99397e-05, gnorm=2.726, loss_scale=4, train_wall=10, gb_free=2.8, wall=97562
2021-06-19 21:44:59 | INFO | train_inner | epoch 003: 2590 / 3002 loss=2.456, ppl=5.49, wps=5849, ups=0.09, wpb=64797, bsz=128, num_updates=8541, lr=9.99397e-05, gnorm=2.56, loss_scale=4, train_wall=11, gb_free=2.8, wall=97574
2021-06-19 21:45:10 | INFO | train_inner | epoch 003: 2591 / 3002 loss=2.599, ppl=6.06, wps=5773.3, ups=0.09, wpb=64786, bsz=128, num_updates=8542, lr=9.99397e-05, gnorm=4.11, loss_scale=4, train_wall=11, gb_free=2.8, wall=97585
2021-06-19 21:45:22 | INFO | train_inner | epoch 003: 2592 / 3002 loss=2.642, ppl=6.24, wps=5750.3, ups=0.09, wpb=64898, bsz=128, num_updates=8543, lr=9.99397e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=97596
2021-06-19 21:45:33 | INFO | train_inner | epoch 003: 2593 / 3002 loss=2.67, ppl=6.37, wps=5849.5, ups=0.09, wpb=64893, bsz=128, num_updates=8544, lr=9.99396e-05, gnorm=2.34, loss_scale=4, train_wall=11, gb_free=2.8, wall=97607
2021-06-19 21:45:44 | INFO | train_inner | epoch 003: 2594 / 3002 loss=2.511, ppl=5.7, wps=5840.7, ups=0.09, wpb=64852, bsz=128, num_updates=8545, lr=9.99396e-05, gnorm=2.256, loss_scale=4, train_wall=11, gb_free=2.8, wall=97618
2021-06-19 21:45:55 | INFO | train_inner | epoch 003: 2595 / 3002 loss=2.546, ppl=5.84, wps=5921.6, ups=0.09, wpb=64842, bsz=128, num_updates=8546, lr=9.99396e-05, gnorm=2.489, loss_scale=4, train_wall=11, gb_free=2.8, wall=97629
2021-06-19 21:46:06 | INFO | train_inner | epoch 003: 2596 / 3002 loss=2.615, ppl=6.13, wps=5836, ups=0.09, wpb=64852, bsz=128, num_updates=8547, lr=9.99396e-05, gnorm=2.449, loss_scale=4, train_wall=11, gb_free=2.8, wall=97640
2021-06-19 21:46:17 | INFO | train_inner | epoch 003: 2597 / 3002 loss=2.694, ppl=6.47, wps=5820.4, ups=0.09, wpb=64875, bsz=128, num_updates=8548, lr=9.99396e-05, gnorm=2.678, loss_scale=4, train_wall=11, gb_free=2.8, wall=97651
2021-06-19 21:46:28 | INFO | train_inner | epoch 003: 2598 / 3002 loss=2.583, ppl=5.99, wps=5852.4, ups=0.09, wpb=64783, bsz=128, num_updates=8549, lr=9.99396e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=97663
2021-06-19 21:46:39 | INFO | train_inner | epoch 003: 2599 / 3002 loss=2.591, ppl=6.02, wps=5859.8, ups=0.09, wpb=64883, bsz=128, num_updates=8550, lr=9.99396e-05, gnorm=3.609, loss_scale=4, train_wall=11, gb_free=2.8, wall=97674
2021-06-19 21:46:50 | INFO | train_inner | epoch 003: 2600 / 3002 loss=2.71, ppl=6.55, wps=5909.7, ups=0.09, wpb=64848, bsz=128, num_updates=8551, lr=9.99396e-05, gnorm=9.772, loss_scale=4, train_wall=10, gb_free=2.8, wall=97685
2021-06-19 21:47:01 | INFO | train_inner | epoch 003: 2601 / 3002 loss=2.647, ppl=6.26, wps=5770.8, ups=0.09, wpb=64846, bsz=128, num_updates=8552, lr=9.99396e-05, gnorm=2.8, loss_scale=4, train_wall=11, gb_free=2.8, wall=97696
2021-06-19 21:47:13 | INFO | train_inner | epoch 003: 2602 / 3002 loss=2.601, ppl=6.07, wps=5859.7, ups=0.09, wpb=64875, bsz=128, num_updates=8553, lr=9.99396e-05, gnorm=3.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=97707
2021-06-19 21:47:24 | INFO | train_inner | epoch 003: 2603 / 3002 loss=2.809, ppl=7.01, wps=5829.6, ups=0.09, wpb=64835, bsz=128, num_updates=8554, lr=9.99396e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=97718
2021-06-19 21:47:35 | INFO | train_inner | epoch 003: 2604 / 3002 loss=2.505, ppl=5.68, wps=5908.6, ups=0.09, wpb=64769, bsz=128, num_updates=8555, lr=9.99396e-05, gnorm=22.027, loss_scale=4, train_wall=10, gb_free=2.8, wall=97729
2021-06-19 21:47:46 | INFO | train_inner | epoch 003: 2605 / 3002 loss=2.603, ppl=6.07, wps=5891, ups=0.09, wpb=64804, bsz=128, num_updates=8556, lr=9.99395e-05, gnorm=2.206, loss_scale=4, train_wall=11, gb_free=2.8, wall=97740
2021-06-19 21:47:57 | INFO | train_inner | epoch 003: 2606 / 3002 loss=2.626, ppl=6.17, wps=5744.1, ups=0.09, wpb=64783, bsz=128, num_updates=8557, lr=9.99395e-05, gnorm=2.894, loss_scale=4, train_wall=11, gb_free=2.8, wall=97751
2021-06-19 21:48:08 | INFO | train_inner | epoch 003: 2607 / 3002 loss=2.758, ppl=6.76, wps=5893.8, ups=0.09, wpb=64788, bsz=128, num_updates=8558, lr=9.99395e-05, gnorm=3.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=97762
2021-06-19 21:48:19 | INFO | train_inner | epoch 003: 2608 / 3002 loss=2.752, ppl=6.74, wps=5774.4, ups=0.09, wpb=64844, bsz=128, num_updates=8559, lr=9.99395e-05, gnorm=6.844, loss_scale=4, train_wall=11, gb_free=2.8, wall=97773
2021-06-19 21:48:30 | INFO | train_inner | epoch 003: 2609 / 3002 loss=2.799, ppl=6.96, wps=5976.2, ups=0.09, wpb=64824, bsz=128, num_updates=8560, lr=9.99395e-05, gnorm=5.527, loss_scale=4, train_wall=10, gb_free=2.8, wall=97784
2021-06-19 21:48:41 | INFO | train_inner | epoch 003: 2610 / 3002 loss=2.588, ppl=6.01, wps=5908.3, ups=0.09, wpb=64895, bsz=128, num_updates=8561, lr=9.99395e-05, gnorm=2.4, loss_scale=4, train_wall=10, gb_free=2.8, wall=97795
2021-06-19 21:48:52 | INFO | train_inner | epoch 003: 2611 / 3002 loss=2.707, ppl=6.53, wps=5844.9, ups=0.09, wpb=64836, bsz=128, num_updates=8562, lr=9.99395e-05, gnorm=2.437, loss_scale=4, train_wall=11, gb_free=2.8, wall=97806
2021-06-19 21:49:03 | INFO | train_inner | epoch 003: 2612 / 3002 loss=2.601, ppl=6.07, wps=5828, ups=0.09, wpb=64843, bsz=128, num_updates=8563, lr=9.99395e-05, gnorm=2.26, loss_scale=4, train_wall=11, gb_free=2.8, wall=97817
2021-06-19 21:49:14 | INFO | train_inner | epoch 003: 2613 / 3002 loss=2.719, ppl=6.58, wps=5858.2, ups=0.09, wpb=64830, bsz=128, num_updates=8564, lr=9.99395e-05, gnorm=2.439, loss_scale=4, train_wall=11, gb_free=2.8, wall=97829
2021-06-19 21:49:25 | INFO | train_inner | epoch 003: 2614 / 3002 loss=2.71, ppl=6.54, wps=5820, ups=0.09, wpb=64729, bsz=128, num_updates=8565, lr=9.99395e-05, gnorm=2.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=97840
2021-06-19 21:49:37 | INFO | train_inner | epoch 003: 2615 / 3002 loss=2.659, ppl=6.31, wps=5756.8, ups=0.09, wpb=64781, bsz=128, num_updates=8566, lr=9.99395e-05, gnorm=2.738, loss_scale=4, train_wall=11, gb_free=2.8, wall=97851
2021-06-19 21:49:48 | INFO | train_inner | epoch 003: 2616 / 3002 loss=2.832, ppl=7.12, wps=5932.5, ups=0.09, wpb=64853, bsz=128, num_updates=8567, lr=9.99395e-05, gnorm=2.305, loss_scale=4, train_wall=10, gb_free=2.8, wall=97862
2021-06-19 21:49:58 | INFO | train_inner | epoch 003: 2617 / 3002 loss=2.504, ppl=5.67, wps=5961.1, ups=0.09, wpb=64834, bsz=128, num_updates=8568, lr=9.99395e-05, gnorm=2.171, loss_scale=4, train_wall=10, gb_free=2.8, wall=97873
2021-06-19 21:50:09 | INFO | train_inner | epoch 003: 2618 / 3002 loss=2.646, ppl=6.26, wps=5940.7, ups=0.09, wpb=64813, bsz=128, num_updates=8569, lr=9.99394e-05, gnorm=2.242, loss_scale=4, train_wall=10, gb_free=2.8, wall=97884
2021-06-19 21:50:20 | INFO | train_inner | epoch 003: 2619 / 3002 loss=2.854, ppl=7.23, wps=5819.4, ups=0.09, wpb=64745, bsz=128, num_updates=8570, lr=9.99394e-05, gnorm=2.423, loss_scale=4, train_wall=11, gb_free=2.8, wall=97895
2021-06-19 21:50:32 | INFO | train_inner | epoch 003: 2620 / 3002 loss=2.641, ppl=6.24, wps=5834, ups=0.09, wpb=64834, bsz=128, num_updates=8571, lr=9.99394e-05, gnorm=2.173, loss_scale=4, train_wall=11, gb_free=2.8, wall=97906
2021-06-19 21:50:43 | INFO | train_inner | epoch 003: 2621 / 3002 loss=2.677, ppl=6.39, wps=5840.3, ups=0.09, wpb=64838, bsz=128, num_updates=8572, lr=9.99394e-05, gnorm=2.602, loss_scale=4, train_wall=11, gb_free=2.8, wall=97917
2021-06-19 21:50:54 | INFO | train_inner | epoch 003: 2622 / 3002 loss=2.78, ppl=6.87, wps=5762.8, ups=0.09, wpb=64779, bsz=128, num_updates=8573, lr=9.99394e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=97928
2021-06-19 21:51:05 | INFO | train_inner | epoch 003: 2623 / 3002 loss=2.759, ppl=6.77, wps=5853.5, ups=0.09, wpb=64842, bsz=128, num_updates=8574, lr=9.99394e-05, gnorm=2.262, loss_scale=4, train_wall=11, gb_free=2.8, wall=97939
2021-06-19 21:51:16 | INFO | train_inner | epoch 003: 2624 / 3002 loss=2.686, ppl=6.43, wps=5790.3, ups=0.09, wpb=64811, bsz=128, num_updates=8575, lr=9.99394e-05, gnorm=2.207, loss_scale=4, train_wall=11, gb_free=2.8, wall=97951
2021-06-19 21:51:27 | INFO | train_inner | epoch 003: 2625 / 3002 loss=2.545, ppl=5.84, wps=5725.3, ups=0.09, wpb=64767, bsz=128, num_updates=8576, lr=9.99394e-05, gnorm=2.112, loss_scale=4, train_wall=11, gb_free=2.8, wall=97962
2021-06-19 21:51:38 | INFO | train_inner | epoch 003: 2626 / 3002 loss=2.481, ppl=5.58, wps=5937.7, ups=0.09, wpb=64931, bsz=128, num_updates=8577, lr=9.99394e-05, gnorm=2.149, loss_scale=4, train_wall=10, gb_free=2.8, wall=97973
2021-06-19 21:51:50 | INFO | train_inner | epoch 003: 2627 / 3002 loss=2.685, ppl=6.43, wps=5806.7, ups=0.09, wpb=64900, bsz=128, num_updates=8578, lr=9.99394e-05, gnorm=2.345, loss_scale=4, train_wall=11, gb_free=2.8, wall=97984
2021-06-19 21:52:01 | INFO | train_inner | epoch 003: 2628 / 3002 loss=2.613, ppl=6.12, wps=5864.1, ups=0.09, wpb=64829, bsz=128, num_updates=8579, lr=9.99394e-05, gnorm=3.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=97995
2021-06-19 21:52:12 | INFO | train_inner | epoch 003: 2629 / 3002 loss=2.581, ppl=5.98, wps=5807.6, ups=0.09, wpb=64846, bsz=128, num_updates=8580, lr=9.99394e-05, gnorm=2.36, loss_scale=4, train_wall=11, gb_free=2.8, wall=98006
2021-06-19 21:52:23 | INFO | train_inner | epoch 003: 2630 / 3002 loss=2.626, ppl=6.17, wps=5836.2, ups=0.09, wpb=64923, bsz=128, num_updates=8581, lr=9.99393e-05, gnorm=2.406, loss_scale=4, train_wall=11, gb_free=2.8, wall=98017
2021-06-19 21:52:34 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-19 21:52:45 | INFO | train_inner | epoch 003: 2632 / 3002 loss=2.723, ppl=6.6, wps=2910.2, ups=0.04, wpb=64837, bsz=128, num_updates=8582, lr=9.99393e-05, gnorm=2.172, loss_scale=2, train_wall=21, gb_free=2.8, wall=98040
2021-06-19 21:52:56 | INFO | train_inner | epoch 003: 2633 / 3002 loss=2.65, ppl=6.28, wps=5768.6, ups=0.09, wpb=64858, bsz=128, num_updates=8583, lr=9.99393e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=98051
2021-06-19 21:53:08 | INFO | train_inner | epoch 003: 2634 / 3002 loss=2.747, ppl=6.71, wps=5866.8, ups=0.09, wpb=64765, bsz=128, num_updates=8584, lr=9.99393e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=98062
2021-06-19 21:53:19 | INFO | train_inner | epoch 003: 2635 / 3002 loss=2.689, ppl=6.45, wps=5795, ups=0.09, wpb=64818, bsz=128, num_updates=8585, lr=9.99393e-05, gnorm=2.158, loss_scale=2, train_wall=11, gb_free=2.8, wall=98073
2021-06-19 21:53:30 | INFO | train_inner | epoch 003: 2636 / 3002 loss=2.513, ppl=5.71, wps=5821, ups=0.09, wpb=64840, bsz=128, num_updates=8586, lr=9.99393e-05, gnorm=8.682, loss_scale=2, train_wall=11, gb_free=2.8, wall=98084
2021-06-19 21:53:41 | INFO | train_inner | epoch 003: 2637 / 3002 loss=2.648, ppl=6.27, wps=5800.5, ups=0.09, wpb=64797, bsz=128, num_updates=8587, lr=9.99393e-05, gnorm=2.287, loss_scale=2, train_wall=11, gb_free=2.8, wall=98095
2021-06-19 21:53:52 | INFO | train_inner | epoch 003: 2638 / 3002 loss=2.638, ppl=6.23, wps=5810.5, ups=0.09, wpb=64765, bsz=128, num_updates=8588, lr=9.99393e-05, gnorm=4.189, loss_scale=2, train_wall=11, gb_free=2.8, wall=98106
2021-06-19 21:54:03 | INFO | train_inner | epoch 003: 2639 / 3002 loss=2.53, ppl=5.78, wps=5897.4, ups=0.09, wpb=64856, bsz=128, num_updates=8589, lr=9.99393e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=98117
2021-06-19 21:54:14 | INFO | train_inner | epoch 003: 2640 / 3002 loss=2.495, ppl=5.64, wps=5872.4, ups=0.09, wpb=64866, bsz=128, num_updates=8590, lr=9.99393e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=98129
2021-06-19 21:54:25 | INFO | train_inner | epoch 003: 2641 / 3002 loss=2.484, ppl=5.59, wps=5906.4, ups=0.09, wpb=64908, bsz=128, num_updates=8591, lr=9.99393e-05, gnorm=2.491, loss_scale=2, train_wall=11, gb_free=2.8, wall=98140
2021-06-19 21:54:36 | INFO | train_inner | epoch 003: 2642 / 3002 loss=2.5, ppl=5.66, wps=5839.2, ups=0.09, wpb=64902, bsz=128, num_updates=8592, lr=9.99393e-05, gnorm=2.098, loss_scale=2, train_wall=11, gb_free=2.8, wall=98151
2021-06-19 21:54:47 | INFO | train_inner | epoch 003: 2643 / 3002 loss=2.551, ppl=5.86, wps=5867.8, ups=0.09, wpb=64795, bsz=128, num_updates=8593, lr=9.99393e-05, gnorm=13.362, loss_scale=2, train_wall=11, gb_free=2.8, wall=98162
2021-06-19 21:54:58 | INFO | train_inner | epoch 003: 2644 / 3002 loss=2.673, ppl=6.38, wps=5857.3, ups=0.09, wpb=64805, bsz=128, num_updates=8594, lr=9.99392e-05, gnorm=2.07, loss_scale=2, train_wall=11, gb_free=2.8, wall=98173
2021-06-19 21:55:09 | INFO | train_inner | epoch 003: 2645 / 3002 loss=2.627, ppl=6.18, wps=5873.3, ups=0.09, wpb=64818, bsz=128, num_updates=8595, lr=9.99392e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=98184
2021-06-19 21:55:20 | INFO | train_inner | epoch 003: 2646 / 3002 loss=2.533, ppl=5.79, wps=5916.6, ups=0.09, wpb=64818, bsz=128, num_updates=8596, lr=9.99392e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=98195
2021-06-19 21:55:32 | INFO | train_inner | epoch 003: 2647 / 3002 loss=2.695, ppl=6.47, wps=5758.5, ups=0.09, wpb=64807, bsz=128, num_updates=8597, lr=9.99392e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=98206
2021-06-19 21:55:43 | INFO | train_inner | epoch 003: 2648 / 3002 loss=2.681, ppl=6.41, wps=5834, ups=0.09, wpb=64822, bsz=128, num_updates=8598, lr=9.99392e-05, gnorm=2.31, loss_scale=2, train_wall=11, gb_free=2.8, wall=98217
2021-06-19 21:55:54 | INFO | train_inner | epoch 003: 2649 / 3002 loss=2.632, ppl=6.2, wps=5939.9, ups=0.09, wpb=64860, bsz=128, num_updates=8599, lr=9.99392e-05, gnorm=2.804, loss_scale=2, train_wall=10, gb_free=2.8, wall=98228
2021-06-19 21:56:05 | INFO | train_inner | epoch 003: 2650 / 3002 loss=2.583, ppl=5.99, wps=5826.1, ups=0.09, wpb=64900, bsz=128, num_updates=8600, lr=9.99392e-05, gnorm=2.664, loss_scale=2, train_wall=11, gb_free=2.8, wall=98239
2021-06-19 21:56:16 | INFO | train_inner | epoch 003: 2651 / 3002 loss=2.768, ppl=6.81, wps=5911.3, ups=0.09, wpb=64785, bsz=128, num_updates=8601, lr=9.99392e-05, gnorm=2.339, loss_scale=2, train_wall=10, gb_free=2.8, wall=98250
2021-06-19 21:56:27 | INFO | train_inner | epoch 003: 2652 / 3002 loss=2.651, ppl=6.28, wps=5879.4, ups=0.09, wpb=64781, bsz=128, num_updates=8602, lr=9.99392e-05, gnorm=19.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=98261
2021-06-19 21:56:38 | INFO | train_inner | epoch 003: 2653 / 3002 loss=2.507, ppl=5.69, wps=5809.4, ups=0.09, wpb=64867, bsz=128, num_updates=8603, lr=9.99392e-05, gnorm=2.375, loss_scale=2, train_wall=11, gb_free=2.8, wall=98272
2021-06-19 21:56:49 | INFO | train_inner | epoch 003: 2654 / 3002 loss=2.488, ppl=5.61, wps=5745.4, ups=0.09, wpb=64858, bsz=128, num_updates=8604, lr=9.99392e-05, gnorm=2.108, loss_scale=2, train_wall=11, gb_free=2.8, wall=98284
2021-06-19 21:57:00 | INFO | train_inner | epoch 003: 2655 / 3002 loss=2.729, ppl=6.63, wps=5778.7, ups=0.09, wpb=64825, bsz=128, num_updates=8605, lr=9.99392e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=98295
2021-06-19 21:57:11 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-19 21:57:23 | INFO | train_inner | epoch 003: 2657 / 3002 loss=2.566, ppl=5.92, wps=2932.5, ups=0.05, wpb=64855, bsz=128, num_updates=8606, lr=9.99391e-05, gnorm=2.21, loss_scale=1, train_wall=21, gb_free=2.8, wall=98317
2021-06-19 21:57:34 | INFO | train_inner | epoch 003: 2658 / 3002 loss=2.633, ppl=6.2, wps=5851.6, ups=0.09, wpb=64788, bsz=128, num_updates=8607, lr=9.99391e-05, gnorm=4.302, loss_scale=1, train_wall=11, gb_free=2.8, wall=98328
2021-06-19 21:57:45 | INFO | train_inner | epoch 003: 2659 / 3002 loss=2.651, ppl=6.28, wps=5888.9, ups=0.09, wpb=64828, bsz=128, num_updates=8608, lr=9.99391e-05, gnorm=2.142, loss_scale=1, train_wall=11, gb_free=2.8, wall=98339
2021-06-19 21:57:56 | INFO | train_inner | epoch 003: 2660 / 3002 loss=2.559, ppl=5.89, wps=5823.3, ups=0.09, wpb=64843, bsz=128, num_updates=8609, lr=9.99391e-05, gnorm=2.843, loss_scale=1, train_wall=11, gb_free=2.8, wall=98350
2021-06-19 21:58:07 | INFO | train_inner | epoch 003: 2661 / 3002 loss=2.543, ppl=5.83, wps=5770.5, ups=0.09, wpb=64825, bsz=128, num_updates=8610, lr=9.99391e-05, gnorm=2.207, loss_scale=1, train_wall=11, gb_free=2.8, wall=98361
2021-06-19 21:58:18 | INFO | train_inner | epoch 003: 2662 / 3002 loss=2.707, ppl=6.53, wps=5900.6, ups=0.09, wpb=64774, bsz=128, num_updates=8611, lr=9.99391e-05, gnorm=4.007, loss_scale=1, train_wall=11, gb_free=2.8, wall=98372
2021-06-19 21:58:29 | INFO | train_inner | epoch 003: 2663 / 3002 loss=2.731, ppl=6.64, wps=5741.8, ups=0.09, wpb=64782, bsz=128, num_updates=8612, lr=9.99391e-05, gnorm=2.052, loss_scale=1, train_wall=11, gb_free=2.8, wall=98384
2021-06-19 21:58:40 | INFO | train_inner | epoch 003: 2664 / 3002 loss=2.633, ppl=6.2, wps=5809.1, ups=0.09, wpb=64818, bsz=128, num_updates=8613, lr=9.99391e-05, gnorm=2.193, loss_scale=1, train_wall=11, gb_free=2.8, wall=98395
2021-06-19 21:58:52 | INFO | train_inner | epoch 003: 2665 / 3002 loss=2.69, ppl=6.45, wps=5855, ups=0.09, wpb=64866, bsz=128, num_updates=8614, lr=9.99391e-05, gnorm=2.13, loss_scale=1, train_wall=11, gb_free=2.8, wall=98406
2021-06-19 21:59:03 | INFO | train_inner | epoch 003: 2666 / 3002 loss=2.701, ppl=6.5, wps=5751.5, ups=0.09, wpb=64851, bsz=128, num_updates=8615, lr=9.99391e-05, gnorm=2.65, loss_scale=1, train_wall=11, gb_free=2.8, wall=98417
2021-06-19 21:59:14 | INFO | train_inner | epoch 003: 2667 / 3002 loss=2.569, ppl=5.94, wps=5970.9, ups=0.09, wpb=64822, bsz=128, num_updates=8616, lr=9.99391e-05, gnorm=2.297, loss_scale=1, train_wall=10, gb_free=2.8, wall=98428
2021-06-19 21:59:25 | INFO | train_inner | epoch 003: 2668 / 3002 loss=2.621, ppl=6.15, wps=5890.8, ups=0.09, wpb=64787, bsz=128, num_updates=8617, lr=9.99391e-05, gnorm=2.101, loss_scale=1, train_wall=11, gb_free=2.8, wall=98439
2021-06-19 21:59:36 | INFO | train_inner | epoch 003: 2669 / 3002 loss=2.67, ppl=6.37, wps=5769.3, ups=0.09, wpb=64781, bsz=128, num_updates=8618, lr=9.99391e-05, gnorm=2.223, loss_scale=1, train_wall=11, gb_free=2.8, wall=98450
2021-06-19 21:59:47 | INFO | train_inner | epoch 003: 2670 / 3002 loss=2.561, ppl=5.9, wps=5831.5, ups=0.09, wpb=64869, bsz=128, num_updates=8619, lr=9.9939e-05, gnorm=2.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=98461
2021-06-19 21:59:58 | INFO | train_inner | epoch 003: 2671 / 3002 loss=2.738, ppl=6.67, wps=5816.6, ups=0.09, wpb=64763, bsz=128, num_updates=8620, lr=9.9939e-05, gnorm=2.51, loss_scale=1, train_wall=11, gb_free=2.8, wall=98472
2021-06-19 22:00:09 | INFO | train_inner | epoch 003: 2672 / 3002 loss=2.522, ppl=5.74, wps=5797.3, ups=0.09, wpb=64895, bsz=128, num_updates=8621, lr=9.9939e-05, gnorm=2.066, loss_scale=1, train_wall=11, gb_free=2.8, wall=98484
2021-06-19 22:00:21 | INFO | train_inner | epoch 003: 2673 / 3002 loss=2.568, ppl=5.93, wps=5793, ups=0.09, wpb=64859, bsz=128, num_updates=8622, lr=9.9939e-05, gnorm=2.296, loss_scale=1, train_wall=11, gb_free=2.8, wall=98495
2021-06-19 22:00:32 | INFO | train_inner | epoch 003: 2674 / 3002 loss=2.614, ppl=6.12, wps=5910.7, ups=0.09, wpb=64883, bsz=128, num_updates=8623, lr=9.9939e-05, gnorm=2.761, loss_scale=1, train_wall=11, gb_free=2.8, wall=98506
2021-06-19 22:00:43 | INFO | train_inner | epoch 003: 2675 / 3002 loss=2.631, ppl=6.19, wps=5888, ups=0.09, wpb=64822, bsz=128, num_updates=8624, lr=9.9939e-05, gnorm=2.118, loss_scale=1, train_wall=11, gb_free=2.8, wall=98517
2021-06-19 22:00:54 | INFO | train_inner | epoch 003: 2676 / 3002 loss=2.549, ppl=5.85, wps=5778, ups=0.09, wpb=64838, bsz=128, num_updates=8625, lr=9.9939e-05, gnorm=2.612, loss_scale=1, train_wall=11, gb_free=2.8, wall=98528
2021-06-19 22:01:05 | INFO | train_inner | epoch 003: 2677 / 3002 loss=2.634, ppl=6.21, wps=5865.6, ups=0.09, wpb=64862, bsz=128, num_updates=8626, lr=9.9939e-05, gnorm=2.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=98539
2021-06-19 22:01:16 | INFO | train_inner | epoch 003: 2678 / 3002 loss=2.646, ppl=6.26, wps=5743.8, ups=0.09, wpb=64786, bsz=128, num_updates=8627, lr=9.9939e-05, gnorm=2.243, loss_scale=1, train_wall=11, gb_free=2.8, wall=98550
2021-06-19 22:01:27 | INFO | train_inner | epoch 003: 2679 / 3002 loss=2.627, ppl=6.18, wps=5860.7, ups=0.09, wpb=64811, bsz=128, num_updates=8628, lr=9.9939e-05, gnorm=2.362, loss_scale=1, train_wall=11, gb_free=2.8, wall=98561
2021-06-19 22:01:38 | INFO | train_inner | epoch 003: 2680 / 3002 loss=2.587, ppl=6.01, wps=5806.4, ups=0.09, wpb=64789, bsz=128, num_updates=8629, lr=9.9939e-05, gnorm=3.719, loss_scale=1, train_wall=11, gb_free=2.8, wall=98573
2021-06-19 22:01:50 | INFO | train_inner | epoch 003: 2681 / 3002 loss=2.605, ppl=6.08, wps=5794.4, ups=0.09, wpb=64888, bsz=128, num_updates=8630, lr=9.9939e-05, gnorm=2.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=98584
2021-06-19 22:02:01 | INFO | train_inner | epoch 003: 2682 / 3002 loss=2.661, ppl=6.32, wps=5884.3, ups=0.09, wpb=64834, bsz=128, num_updates=8631, lr=9.99389e-05, gnorm=2.593, loss_scale=1, train_wall=11, gb_free=2.8, wall=98595
2021-06-19 22:02:12 | INFO | train_inner | epoch 003: 2683 / 3002 loss=2.626, ppl=6.17, wps=5884.4, ups=0.09, wpb=64868, bsz=128, num_updates=8632, lr=9.99389e-05, gnorm=3.182, loss_scale=1, train_wall=11, gb_free=2.8, wall=98606
2021-06-19 22:02:23 | INFO | train_inner | epoch 003: 2684 / 3002 loss=2.582, ppl=5.99, wps=5666.5, ups=0.09, wpb=64799, bsz=128, num_updates=8633, lr=9.99389e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=98617
2021-06-19 22:02:34 | INFO | train_inner | epoch 003: 2685 / 3002 loss=2.507, ppl=5.69, wps=5817.6, ups=0.09, wpb=64793, bsz=128, num_updates=8634, lr=9.99389e-05, gnorm=3.09, loss_scale=1, train_wall=11, gb_free=2.8, wall=98628
2021-06-19 22:02:45 | INFO | train_inner | epoch 003: 2686 / 3002 loss=2.625, ppl=6.17, wps=5837.3, ups=0.09, wpb=64831, bsz=128, num_updates=8635, lr=9.99389e-05, gnorm=2.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=98640
2021-06-19 22:02:56 | INFO | train_inner | epoch 003: 2687 / 3002 loss=2.692, ppl=6.46, wps=5799.5, ups=0.09, wpb=64821, bsz=128, num_updates=8636, lr=9.99389e-05, gnorm=2.2, loss_scale=1, train_wall=11, gb_free=2.8, wall=98651
2021-06-19 22:03:07 | INFO | train_inner | epoch 003: 2688 / 3002 loss=2.564, ppl=5.91, wps=5867.1, ups=0.09, wpb=64894, bsz=128, num_updates=8637, lr=9.99389e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=98662
2021-06-19 22:03:19 | INFO | train_inner | epoch 003: 2689 / 3002 loss=2.675, ppl=6.39, wps=5835.6, ups=0.09, wpb=64824, bsz=128, num_updates=8638, lr=9.99389e-05, gnorm=2.12, loss_scale=1, train_wall=11, gb_free=2.8, wall=98673
2021-06-19 22:03:30 | INFO | train_inner | epoch 003: 2690 / 3002 loss=2.496, ppl=5.64, wps=5877.5, ups=0.09, wpb=64821, bsz=128, num_updates=8639, lr=9.99389e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=98684
2021-06-19 22:03:41 | INFO | train_inner | epoch 003: 2691 / 3002 loss=2.609, ppl=6.1, wps=5909.9, ups=0.09, wpb=64878, bsz=128, num_updates=8640, lr=9.99389e-05, gnorm=2.845, loss_scale=1, train_wall=11, gb_free=2.8, wall=98695
2021-06-19 22:03:52 | INFO | train_inner | epoch 003: 2692 / 3002 loss=2.493, ppl=5.63, wps=5913.2, ups=0.09, wpb=64869, bsz=128, num_updates=8641, lr=9.99389e-05, gnorm=2.109, loss_scale=1, train_wall=11, gb_free=2.8, wall=98706
2021-06-19 22:04:03 | INFO | train_inner | epoch 003: 2693 / 3002 loss=2.488, ppl=5.61, wps=5836.9, ups=0.09, wpb=64800, bsz=128, num_updates=8642, lr=9.99389e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=98717
2021-06-19 22:04:14 | INFO | train_inner | epoch 003: 2694 / 3002 loss=2.641, ppl=6.24, wps=5900.9, ups=0.09, wpb=64863, bsz=128, num_updates=8643, lr=9.99389e-05, gnorm=2.183, loss_scale=1, train_wall=11, gb_free=2.8, wall=98728
2021-06-19 22:04:25 | INFO | train_inner | epoch 003: 2695 / 3002 loss=2.571, ppl=5.94, wps=5879.7, ups=0.09, wpb=64832, bsz=128, num_updates=8644, lr=9.99388e-05, gnorm=2.35, loss_scale=1, train_wall=11, gb_free=2.8, wall=98739
2021-06-19 22:04:36 | INFO | train_inner | epoch 003: 2696 / 3002 loss=2.508, ppl=5.69, wps=5795, ups=0.09, wpb=64780, bsz=128, num_updates=8645, lr=9.99388e-05, gnorm=2.067, loss_scale=1, train_wall=11, gb_free=2.8, wall=98750
2021-06-19 22:04:47 | INFO | train_inner | epoch 003: 2697 / 3002 loss=2.552, ppl=5.86, wps=5830.8, ups=0.09, wpb=64836, bsz=128, num_updates=8646, lr=9.99388e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=98761
2021-06-19 22:04:58 | INFO | train_inner | epoch 003: 2698 / 3002 loss=2.703, ppl=6.51, wps=5866.8, ups=0.09, wpb=64904, bsz=128, num_updates=8647, lr=9.99388e-05, gnorm=4.152, loss_scale=1, train_wall=11, gb_free=2.8, wall=98772
2021-06-19 22:05:09 | INFO | train_inner | epoch 003: 2699 / 3002 loss=2.667, ppl=6.35, wps=5908.1, ups=0.09, wpb=64799, bsz=128, num_updates=8648, lr=9.99388e-05, gnorm=2.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=98783
2021-06-19 22:05:20 | INFO | train_inner | epoch 003: 2700 / 3002 loss=2.605, ppl=6.08, wps=5890.5, ups=0.09, wpb=64892, bsz=128, num_updates=8649, lr=9.99388e-05, gnorm=2.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=98794
2021-06-19 22:05:31 | INFO | train_inner | epoch 003: 2701 / 3002 loss=2.583, ppl=5.99, wps=5760, ups=0.09, wpb=64829, bsz=128, num_updates=8650, lr=9.99388e-05, gnorm=2.34, loss_scale=1, train_wall=11, gb_free=2.8, wall=98806
2021-06-19 22:05:42 | INFO | train_inner | epoch 003: 2702 / 3002 loss=2.561, ppl=5.9, wps=5839.4, ups=0.09, wpb=64878, bsz=128, num_updates=8651, lr=9.99388e-05, gnorm=2.12, loss_scale=1, train_wall=11, gb_free=2.8, wall=98817
2021-06-19 22:05:53 | INFO | train_inner | epoch 003: 2703 / 3002 loss=2.622, ppl=6.16, wps=5864.3, ups=0.09, wpb=64804, bsz=128, num_updates=8652, lr=9.99388e-05, gnorm=2.506, loss_scale=1, train_wall=11, gb_free=2.8, wall=98828
2021-06-19 22:06:04 | INFO | train_inner | epoch 003: 2704 / 3002 loss=2.599, ppl=6.06, wps=5917.4, ups=0.09, wpb=64743, bsz=128, num_updates=8653, lr=9.99388e-05, gnorm=2.295, loss_scale=1, train_wall=10, gb_free=2.8, wall=98839
2021-06-19 22:06:15 | INFO | train_inner | epoch 003: 2705 / 3002 loss=2.685, ppl=6.43, wps=5847, ups=0.09, wpb=64862, bsz=128, num_updates=8654, lr=9.99388e-05, gnorm=2.339, loss_scale=1, train_wall=11, gb_free=2.8, wall=98850
2021-06-19 22:06:27 | INFO | train_inner | epoch 003: 2706 / 3002 loss=2.525, ppl=5.76, wps=5881, ups=0.09, wpb=64834, bsz=128, num_updates=8655, lr=9.99388e-05, gnorm=2.552, loss_scale=1, train_wall=11, gb_free=2.8, wall=98861
2021-06-19 22:06:38 | INFO | train_inner | epoch 003: 2707 / 3002 loss=2.689, ppl=6.45, wps=5772.5, ups=0.09, wpb=64828, bsz=128, num_updates=8656, lr=9.99387e-05, gnorm=2.157, loss_scale=1, train_wall=11, gb_free=2.8, wall=98872
2021-06-19 22:06:49 | INFO | train_inner | epoch 003: 2708 / 3002 loss=2.511, ppl=5.7, wps=5870.7, ups=0.09, wpb=64811, bsz=128, num_updates=8657, lr=9.99387e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=98883
2021-06-19 22:07:00 | INFO | train_inner | epoch 003: 2709 / 3002 loss=2.639, ppl=6.23, wps=5845.5, ups=0.09, wpb=64826, bsz=128, num_updates=8658, lr=9.99387e-05, gnorm=2.031, loss_scale=1, train_wall=11, gb_free=2.8, wall=98894
2021-06-19 22:07:11 | INFO | train_inner | epoch 003: 2710 / 3002 loss=2.527, ppl=5.77, wps=5971.5, ups=0.09, wpb=64901, bsz=128, num_updates=8659, lr=9.99387e-05, gnorm=12.579, loss_scale=1, train_wall=10, gb_free=2.8, wall=98905
2021-06-19 22:07:22 | INFO | train_inner | epoch 003: 2711 / 3002 loss=2.731, ppl=6.64, wps=5828.1, ups=0.09, wpb=64822, bsz=128, num_updates=8660, lr=9.99387e-05, gnorm=4.434, loss_scale=1, train_wall=11, gb_free=2.8, wall=98916
2021-06-19 22:07:33 | INFO | train_inner | epoch 003: 2712 / 3002 loss=2.706, ppl=6.53, wps=5728.3, ups=0.09, wpb=64844, bsz=128, num_updates=8661, lr=9.99387e-05, gnorm=2.171, loss_scale=1, train_wall=11, gb_free=2.8, wall=98928
2021-06-19 22:07:44 | INFO | train_inner | epoch 003: 2713 / 3002 loss=2.447, ppl=5.45, wps=5886.1, ups=0.09, wpb=64866, bsz=128, num_updates=8662, lr=9.99387e-05, gnorm=2.078, loss_scale=1, train_wall=11, gb_free=2.8, wall=98939
2021-06-19 22:07:55 | INFO | train_inner | epoch 003: 2714 / 3002 loss=2.627, ppl=6.18, wps=5772.9, ups=0.09, wpb=64876, bsz=128, num_updates=8663, lr=9.99387e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=98950
2021-06-19 22:08:07 | INFO | train_inner | epoch 003: 2715 / 3002 loss=2.664, ppl=6.34, wps=5789.4, ups=0.09, wpb=64847, bsz=128, num_updates=8664, lr=9.99387e-05, gnorm=3.032, loss_scale=1, train_wall=11, gb_free=2.8, wall=98961
2021-06-19 22:08:18 | INFO | train_inner | epoch 003: 2716 / 3002 loss=2.71, ppl=6.54, wps=5802.1, ups=0.09, wpb=64801, bsz=128, num_updates=8665, lr=9.99387e-05, gnorm=2.276, loss_scale=1, train_wall=11, gb_free=2.8, wall=98972
2021-06-19 22:08:29 | INFO | train_inner | epoch 003: 2717 / 3002 loss=2.614, ppl=6.12, wps=5914.9, ups=0.09, wpb=64819, bsz=128, num_updates=8666, lr=9.99387e-05, gnorm=2.114, loss_scale=1, train_wall=11, gb_free=2.8, wall=98983
2021-06-19 22:08:40 | INFO | train_inner | epoch 003: 2718 / 3002 loss=2.63, ppl=6.19, wps=5821.6, ups=0.09, wpb=64822, bsz=128, num_updates=8667, lr=9.99387e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=98994
2021-06-19 22:08:51 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-19 22:09:02 | INFO | train_inner | epoch 003: 2720 / 3002 loss=2.587, ppl=6.01, wps=2961.2, ups=0.05, wpb=64843, bsz=128, num_updates=8668, lr=9.99387e-05, gnorm=2.18, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=99016
2021-06-19 22:09:13 | INFO | train_inner | epoch 003: 2721 / 3002 loss=2.652, ppl=6.29, wps=5763.9, ups=0.09, wpb=64870, bsz=128, num_updates=8669, lr=9.99386e-05, gnorm=2.517, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99027
2021-06-19 22:09:24 | INFO | train_inner | epoch 003: 2722 / 3002 loss=2.52, ppl=5.74, wps=5977, ups=0.09, wpb=64875, bsz=128, num_updates=8670, lr=9.99386e-05, gnorm=2.111, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99038
2021-06-19 22:09:35 | INFO | train_inner | epoch 003: 2723 / 3002 loss=2.559, ppl=5.89, wps=5913.1, ups=0.09, wpb=64847, bsz=128, num_updates=8671, lr=9.99386e-05, gnorm=2.24, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99049
2021-06-19 22:09:46 | INFO | train_inner | epoch 003: 2724 / 3002 loss=2.692, ppl=6.46, wps=5909.9, ups=0.09, wpb=64849, bsz=128, num_updates=8672, lr=9.99386e-05, gnorm=2.163, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99060
2021-06-19 22:09:57 | INFO | train_inner | epoch 003: 2725 / 3002 loss=2.713, ppl=6.56, wps=5803.3, ups=0.09, wpb=64700, bsz=128, num_updates=8673, lr=9.99386e-05, gnorm=2.258, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99071
2021-06-19 22:10:08 | INFO | train_inner | epoch 003: 2726 / 3002 loss=2.626, ppl=6.17, wps=5877.2, ups=0.09, wpb=64835, bsz=128, num_updates=8674, lr=9.99386e-05, gnorm=2.277, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99082
2021-06-19 22:10:19 | INFO | train_inner | epoch 003: 2727 / 3002 loss=2.61, ppl=6.11, wps=6010.2, ups=0.09, wpb=64883, bsz=128, num_updates=8675, lr=9.99386e-05, gnorm=2.297, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99093
2021-06-19 22:10:30 | INFO | train_inner | epoch 003: 2728 / 3002 loss=2.673, ppl=6.38, wps=5937.5, ups=0.09, wpb=64854, bsz=128, num_updates=8676, lr=9.99386e-05, gnorm=2.182, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99104
2021-06-19 22:10:41 | INFO | train_inner | epoch 003: 2729 / 3002 loss=2.526, ppl=5.76, wps=5843.8, ups=0.09, wpb=64786, bsz=128, num_updates=8677, lr=9.99386e-05, gnorm=2.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99115
2021-06-19 22:10:52 | INFO | train_inner | epoch 003: 2730 / 3002 loss=2.721, ppl=6.59, wps=5922.6, ups=0.09, wpb=64802, bsz=128, num_updates=8678, lr=9.99386e-05, gnorm=2.123, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99126
2021-06-19 22:11:03 | INFO | train_inner | epoch 003: 2731 / 3002 loss=2.742, ppl=6.69, wps=5811.9, ups=0.09, wpb=64871, bsz=128, num_updates=8679, lr=9.99386e-05, gnorm=2.199, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99137
2021-06-19 22:11:14 | INFO | train_inner | epoch 003: 2732 / 3002 loss=2.727, ppl=6.62, wps=5796.2, ups=0.09, wpb=64815, bsz=128, num_updates=8680, lr=9.99386e-05, gnorm=2.169, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99148
2021-06-19 22:11:25 | INFO | train_inner | epoch 003: 2733 / 3002 loss=2.652, ppl=6.28, wps=5805.7, ups=0.09, wpb=64795, bsz=128, num_updates=8681, lr=9.99385e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99160
2021-06-19 22:11:36 | INFO | train_inner | epoch 003: 2734 / 3002 loss=2.549, ppl=5.85, wps=5908.2, ups=0.09, wpb=64839, bsz=128, num_updates=8682, lr=9.99385e-05, gnorm=2.186, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99171
2021-06-19 22:11:47 | INFO | train_inner | epoch 003: 2735 / 3002 loss=2.67, ppl=6.36, wps=5928.2, ups=0.09, wpb=64825, bsz=128, num_updates=8683, lr=9.99385e-05, gnorm=2.097, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99182
2021-06-19 22:11:58 | INFO | train_inner | epoch 003: 2736 / 3002 loss=2.585, ppl=6, wps=5894.8, ups=0.09, wpb=64891, bsz=128, num_updates=8684, lr=9.99385e-05, gnorm=2.814, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99193
2021-06-19 22:12:09 | INFO | train_inner | epoch 003: 2737 / 3002 loss=2.705, ppl=6.52, wps=5756.2, ups=0.09, wpb=64745, bsz=128, num_updates=8685, lr=9.99385e-05, gnorm=2.077, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99204
2021-06-19 22:12:21 | INFO | train_inner | epoch 003: 2738 / 3002 loss=2.642, ppl=6.24, wps=5756.1, ups=0.09, wpb=64745, bsz=128, num_updates=8686, lr=9.99385e-05, gnorm=2.244, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99215
2021-06-19 22:12:32 | INFO | train_inner | epoch 003: 2739 / 3002 loss=2.689, ppl=6.45, wps=5850.6, ups=0.09, wpb=64874, bsz=128, num_updates=8687, lr=9.99385e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99226
2021-06-19 22:12:43 | INFO | train_inner | epoch 003: 2740 / 3002 loss=2.646, ppl=6.26, wps=5840.3, ups=0.09, wpb=64773, bsz=128, num_updates=8688, lr=9.99385e-05, gnorm=2.147, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99237
2021-06-19 22:12:54 | INFO | train_inner | epoch 003: 2741 / 3002 loss=2.526, ppl=5.76, wps=5942.7, ups=0.09, wpb=64835, bsz=128, num_updates=8689, lr=9.99385e-05, gnorm=2.083, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99248
2021-06-19 22:13:05 | INFO | train_inner | epoch 003: 2742 / 3002 loss=2.769, ppl=6.82, wps=5830.6, ups=0.09, wpb=64755, bsz=128, num_updates=8690, lr=9.99385e-05, gnorm=2.184, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99259
2021-06-19 22:13:16 | INFO | train_inner | epoch 003: 2743 / 3002 loss=2.502, ppl=5.67, wps=5915.1, ups=0.09, wpb=64831, bsz=128, num_updates=8691, lr=9.99385e-05, gnorm=2.057, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99270
2021-06-19 22:13:27 | INFO | train_inner | epoch 003: 2744 / 3002 loss=2.54, ppl=5.82, wps=5939.4, ups=0.09, wpb=64848, bsz=128, num_updates=8692, lr=9.99385e-05, gnorm=2.184, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99281
2021-06-19 22:13:38 | INFO | train_inner | epoch 003: 2745 / 3002 loss=2.636, ppl=6.21, wps=5801.4, ups=0.09, wpb=64849, bsz=128, num_updates=8693, lr=9.99385e-05, gnorm=2.769, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99292
2021-06-19 22:13:49 | INFO | train_inner | epoch 003: 2746 / 3002 loss=2.628, ppl=6.18, wps=5909.3, ups=0.09, wpb=64920, bsz=128, num_updates=8694, lr=9.99384e-05, gnorm=2.202, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99303
2021-06-19 22:14:00 | INFO | train_inner | epoch 003: 2747 / 3002 loss=2.686, ppl=6.43, wps=5930.2, ups=0.09, wpb=64914, bsz=128, num_updates=8695, lr=9.99384e-05, gnorm=2.126, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99314
2021-06-19 22:14:11 | INFO | train_inner | epoch 003: 2748 / 3002 loss=2.743, ppl=6.7, wps=5814.1, ups=0.09, wpb=64764, bsz=128, num_updates=8696, lr=9.99384e-05, gnorm=2.119, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99325
2021-06-19 22:14:22 | INFO | train_inner | epoch 003: 2749 / 3002 loss=2.629, ppl=6.19, wps=5807, ups=0.09, wpb=64826, bsz=128, num_updates=8697, lr=9.99384e-05, gnorm=2.187, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99337
2021-06-19 22:14:33 | INFO | train_inner | epoch 003: 2750 / 3002 loss=2.547, ppl=5.85, wps=5849.1, ups=0.09, wpb=64894, bsz=128, num_updates=8698, lr=9.99384e-05, gnorm=2.092, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99348
2021-06-19 22:14:44 | INFO | train_inner | epoch 003: 2751 / 3002 loss=2.612, ppl=6.12, wps=5943.5, ups=0.09, wpb=64852, bsz=128, num_updates=8699, lr=9.99384e-05, gnorm=2.457, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99359
2021-06-19 22:14:55 | INFO | train_inner | epoch 003: 2752 / 3002 loss=2.703, ppl=6.51, wps=5961, ups=0.09, wpb=64801, bsz=128, num_updates=8700, lr=9.99384e-05, gnorm=2.162, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99369
2021-06-19 22:15:06 | INFO | train_inner | epoch 003: 2753 / 3002 loss=2.648, ppl=6.27, wps=5896.6, ups=0.09, wpb=64716, bsz=128, num_updates=8701, lr=9.99384e-05, gnorm=2.828, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99380
2021-06-19 22:15:17 | INFO | train_inner | epoch 003: 2754 / 3002 loss=2.658, ppl=6.31, wps=5765.4, ups=0.09, wpb=64716, bsz=128, num_updates=8702, lr=9.99384e-05, gnorm=2.208, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99392
2021-06-19 22:15:28 | INFO | train_inner | epoch 003: 2755 / 3002 loss=2.611, ppl=6.11, wps=5938.1, ups=0.09, wpb=64845, bsz=128, num_updates=8703, lr=9.99384e-05, gnorm=2.12, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99403
2021-06-19 22:15:39 | INFO | train_inner | epoch 003: 2756 / 3002 loss=2.46, ppl=5.5, wps=5853.1, ups=0.09, wpb=64813, bsz=128, num_updates=8704, lr=9.99384e-05, gnorm=2.225, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99414
2021-06-19 22:15:50 | INFO | train_inner | epoch 003: 2757 / 3002 loss=2.663, ppl=6.33, wps=5879.2, ups=0.09, wpb=64812, bsz=128, num_updates=8705, lr=9.99384e-05, gnorm=2.072, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99425
2021-06-19 22:16:01 | INFO | train_inner | epoch 003: 2758 / 3002 loss=2.634, ppl=6.21, wps=5851.9, ups=0.09, wpb=64862, bsz=128, num_updates=8706, lr=9.99383e-05, gnorm=2.282, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99436
2021-06-19 22:16:12 | INFO | train_inner | epoch 003: 2759 / 3002 loss=2.606, ppl=6.09, wps=5938.9, ups=0.09, wpb=64806, bsz=128, num_updates=8707, lr=9.99383e-05, gnorm=2.248, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99447
2021-06-19 22:16:23 | INFO | train_inner | epoch 003: 2760 / 3002 loss=2.553, ppl=5.87, wps=5950.7, ups=0.09, wpb=64867, bsz=128, num_updates=8708, lr=9.99383e-05, gnorm=2.221, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99458
2021-06-19 22:16:34 | INFO | train_inner | epoch 003: 2761 / 3002 loss=2.415, ppl=5.33, wps=5834.2, ups=0.09, wpb=64879, bsz=128, num_updates=8709, lr=9.99383e-05, gnorm=2.278, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99469
2021-06-19 22:16:45 | INFO | train_inner | epoch 003: 2762 / 3002 loss=2.68, ppl=6.41, wps=5800, ups=0.09, wpb=64808, bsz=128, num_updates=8710, lr=9.99383e-05, gnorm=2.2, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99480
2021-06-19 22:16:57 | INFO | train_inner | epoch 003: 2763 / 3002 loss=2.622, ppl=6.16, wps=5819.1, ups=0.09, wpb=64836, bsz=128, num_updates=8711, lr=9.99383e-05, gnorm=2.168, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99491
2021-06-19 22:17:08 | INFO | train_inner | epoch 003: 2764 / 3002 loss=2.652, ppl=6.29, wps=5844.5, ups=0.09, wpb=64807, bsz=128, num_updates=8712, lr=9.99383e-05, gnorm=2.82, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99502
2021-06-19 22:17:19 | INFO | train_inner | epoch 003: 2765 / 3002 loss=2.494, ppl=5.63, wps=5844.9, ups=0.09, wpb=64878, bsz=128, num_updates=8713, lr=9.99383e-05, gnorm=2.181, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99513
2021-06-19 22:17:30 | INFO | train_inner | epoch 003: 2766 / 3002 loss=2.514, ppl=5.71, wps=5823.9, ups=0.09, wpb=64818, bsz=128, num_updates=8714, lr=9.99383e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99524
2021-06-19 22:17:41 | INFO | train_inner | epoch 003: 2767 / 3002 loss=2.731, ppl=6.64, wps=5821.4, ups=0.09, wpb=64844, bsz=128, num_updates=8715, lr=9.99383e-05, gnorm=2.144, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99535
2021-06-19 22:17:52 | INFO | train_inner | epoch 003: 2768 / 3002 loss=2.695, ppl=6.47, wps=5789.4, ups=0.09, wpb=64745, bsz=128, num_updates=8716, lr=9.99383e-05, gnorm=2.072, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99547
2021-06-19 22:18:03 | INFO | train_inner | epoch 003: 2769 / 3002 loss=2.772, ppl=6.83, wps=5790.6, ups=0.09, wpb=64759, bsz=128, num_updates=8717, lr=9.99383e-05, gnorm=2.149, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99558
2021-06-19 22:18:15 | INFO | train_inner | epoch 003: 2770 / 3002 loss=2.466, ppl=5.53, wps=5832.1, ups=0.09, wpb=64843, bsz=128, num_updates=8718, lr=9.99383e-05, gnorm=2.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99569
2021-06-19 22:18:26 | INFO | train_inner | epoch 003: 2771 / 3002 loss=2.485, ppl=5.6, wps=5903.6, ups=0.09, wpb=64808, bsz=128, num_updates=8719, lr=9.99382e-05, gnorm=2.236, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99580
2021-06-19 22:18:37 | INFO | train_inner | epoch 003: 2772 / 3002 loss=2.635, ppl=6.21, wps=5783.6, ups=0.09, wpb=64778, bsz=128, num_updates=8720, lr=9.99382e-05, gnorm=2.331, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99591
2021-06-19 22:18:48 | INFO | train_inner | epoch 003: 2773 / 3002 loss=2.585, ppl=6, wps=5815.6, ups=0.09, wpb=64774, bsz=128, num_updates=8721, lr=9.99382e-05, gnorm=2.648, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99602
2021-06-19 22:18:59 | INFO | train_inner | epoch 003: 2774 / 3002 loss=2.53, ppl=5.78, wps=5884.4, ups=0.09, wpb=64891, bsz=128, num_updates=8722, lr=9.99382e-05, gnorm=9.276, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99613
2021-06-19 22:19:10 | INFO | train_inner | epoch 003: 2775 / 3002 loss=2.664, ppl=6.34, wps=5892.2, ups=0.09, wpb=64877, bsz=128, num_updates=8723, lr=9.99382e-05, gnorm=2.17, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99624
2021-06-19 22:19:21 | INFO | train_inner | epoch 003: 2776 / 3002 loss=2.532, ppl=5.78, wps=5747.7, ups=0.09, wpb=64785, bsz=128, num_updates=8724, lr=9.99382e-05, gnorm=2.088, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99636
2021-06-19 22:19:32 | INFO | train_inner | epoch 003: 2777 / 3002 loss=2.595, ppl=6.04, wps=5775.3, ups=0.09, wpb=64744, bsz=128, num_updates=8725, lr=9.99382e-05, gnorm=2.796, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99647
2021-06-19 22:19:44 | INFO | train_inner | epoch 003: 2778 / 3002 loss=2.608, ppl=6.1, wps=5834.1, ups=0.09, wpb=64797, bsz=128, num_updates=8726, lr=9.99382e-05, gnorm=2.308, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99658
2021-06-19 22:19:55 | INFO | train_inner | epoch 003: 2779 / 3002 loss=2.734, ppl=6.65, wps=5774.2, ups=0.09, wpb=64724, bsz=128, num_updates=8727, lr=9.99382e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99669
2021-06-19 22:20:06 | INFO | train_inner | epoch 003: 2780 / 3002 loss=2.521, ppl=5.74, wps=5819.6, ups=0.09, wpb=64863, bsz=128, num_updates=8728, lr=9.99382e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99680
2021-06-19 22:20:17 | INFO | train_inner | epoch 003: 2781 / 3002 loss=2.759, ppl=6.77, wps=5868.7, ups=0.09, wpb=64810, bsz=128, num_updates=8729, lr=9.99382e-05, gnorm=2.394, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99691
2021-06-19 22:20:28 | INFO | train_inner | epoch 003: 2782 / 3002 loss=2.435, ppl=5.41, wps=5912, ups=0.09, wpb=64860, bsz=128, num_updates=8730, lr=9.99382e-05, gnorm=2.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99702
2021-06-19 22:20:39 | INFO | train_inner | epoch 003: 2783 / 3002 loss=2.458, ppl=5.49, wps=5893.5, ups=0.09, wpb=64905, bsz=128, num_updates=8731, lr=9.99381e-05, gnorm=2.086, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99713
2021-06-19 22:20:50 | INFO | train_inner | epoch 003: 2784 / 3002 loss=2.654, ppl=6.29, wps=5895.6, ups=0.09, wpb=64838, bsz=128, num_updates=8732, lr=9.99381e-05, gnorm=2.24, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99724
2021-06-19 22:21:01 | INFO | train_inner | epoch 003: 2785 / 3002 loss=2.607, ppl=6.09, wps=5777.7, ups=0.09, wpb=64862, bsz=128, num_updates=8733, lr=9.99381e-05, gnorm=2.052, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99735
2021-06-19 22:21:12 | INFO | train_inner | epoch 003: 2786 / 3002 loss=2.551, ppl=5.86, wps=5777.9, ups=0.09, wpb=64846, bsz=128, num_updates=8734, lr=9.99381e-05, gnorm=2.022, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99747
2021-06-19 22:21:23 | INFO | train_inner | epoch 003: 2787 / 3002 loss=2.612, ppl=6.12, wps=5864.9, ups=0.09, wpb=64818, bsz=128, num_updates=8735, lr=9.99381e-05, gnorm=17.357, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99758
2021-06-19 22:21:35 | INFO | train_inner | epoch 003: 2788 / 3002 loss=2.678, ppl=6.4, wps=5787.4, ups=0.09, wpb=64839, bsz=128, num_updates=8736, lr=9.99381e-05, gnorm=2.167, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99769
2021-06-19 22:21:46 | INFO | train_inner | epoch 003: 2789 / 3002 loss=2.913, ppl=7.53, wps=5824.7, ups=0.09, wpb=64786, bsz=128, num_updates=8737, lr=9.99381e-05, gnorm=3.465, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99780
2021-06-19 22:21:57 | INFO | train_inner | epoch 003: 2790 / 3002 loss=2.673, ppl=6.38, wps=5907.5, ups=0.09, wpb=64849, bsz=128, num_updates=8738, lr=9.99381e-05, gnorm=2.168, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99791
2021-06-19 22:22:08 | INFO | train_inner | epoch 003: 2791 / 3002 loss=2.569, ppl=5.93, wps=5914.2, ups=0.09, wpb=64876, bsz=128, num_updates=8739, lr=9.99381e-05, gnorm=7.887, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99802
2021-06-19 22:22:19 | INFO | train_inner | epoch 003: 2792 / 3002 loss=2.633, ppl=6.2, wps=5882.7, ups=0.09, wpb=64835, bsz=128, num_updates=8740, lr=9.99381e-05, gnorm=2.848, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99813
2021-06-19 22:22:30 | INFO | train_inner | epoch 003: 2793 / 3002 loss=2.704, ppl=6.52, wps=5801.7, ups=0.09, wpb=64874, bsz=128, num_updates=8741, lr=9.99381e-05, gnorm=2.278, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99824
2021-06-19 22:22:41 | INFO | train_inner | epoch 003: 2794 / 3002 loss=2.616, ppl=6.13, wps=5817.8, ups=0.09, wpb=64832, bsz=128, num_updates=8742, lr=9.99381e-05, gnorm=2.176, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99835
2021-06-19 22:22:52 | INFO | train_inner | epoch 003: 2795 / 3002 loss=2.509, ppl=5.69, wps=5884.1, ups=0.09, wpb=64825, bsz=128, num_updates=8743, lr=9.99381e-05, gnorm=2.048, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99846
2021-06-19 22:23:03 | INFO | train_inner | epoch 003: 2796 / 3002 loss=2.67, ppl=6.36, wps=5787.3, ups=0.09, wpb=64818, bsz=128, num_updates=8744, lr=9.9938e-05, gnorm=2.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99858
2021-06-19 22:23:14 | INFO | train_inner | epoch 003: 2797 / 3002 loss=2.516, ppl=5.72, wps=5807.5, ups=0.09, wpb=64860, bsz=128, num_updates=8745, lr=9.9938e-05, gnorm=2.336, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99869
2021-06-19 22:23:26 | INFO | train_inner | epoch 003: 2798 / 3002 loss=2.519, ppl=5.73, wps=5830.1, ups=0.09, wpb=64808, bsz=128, num_updates=8746, lr=9.9938e-05, gnorm=2.277, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99880
2021-06-19 22:23:36 | INFO | train_inner | epoch 003: 2799 / 3002 loss=2.597, ppl=6.05, wps=5925.9, ups=0.09, wpb=64888, bsz=128, num_updates=8747, lr=9.9938e-05, gnorm=2.188, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99891
2021-06-19 22:23:48 | INFO | train_inner | epoch 003: 2800 / 3002 loss=2.793, ppl=6.93, wps=5755.1, ups=0.09, wpb=64769, bsz=128, num_updates=8748, lr=9.9938e-05, gnorm=4.338, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99902
2021-06-19 22:23:59 | INFO | train_inner | epoch 003: 2801 / 3002 loss=2.704, ppl=6.52, wps=5880.5, ups=0.09, wpb=64857, bsz=128, num_updates=8749, lr=9.9938e-05, gnorm=2.265, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99913
2021-06-19 22:24:10 | INFO | train_inner | epoch 003: 2802 / 3002 loss=2.666, ppl=6.35, wps=5787.8, ups=0.09, wpb=64892, bsz=128, num_updates=8750, lr=9.9938e-05, gnorm=2.724, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99924
2021-06-19 22:24:21 | INFO | train_inner | epoch 003: 2803 / 3002 loss=2.607, ppl=6.09, wps=5899.1, ups=0.09, wpb=64844, bsz=128, num_updates=8751, lr=9.9938e-05, gnorm=2.316, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99935
2021-06-19 22:24:32 | INFO | train_inner | epoch 003: 2804 / 3002 loss=2.584, ppl=5.99, wps=5862.2, ups=0.09, wpb=64861, bsz=128, num_updates=8752, lr=9.9938e-05, gnorm=2.271, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99946
2021-06-19 22:24:43 | INFO | train_inner | epoch 003: 2805 / 3002 loss=2.738, ppl=6.67, wps=5921.5, ups=0.09, wpb=64728, bsz=128, num_updates=8753, lr=9.9938e-05, gnorm=2.245, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99957
2021-06-19 22:24:54 | INFO | train_inner | epoch 003: 2806 / 3002 loss=2.657, ppl=6.31, wps=5959.2, ups=0.09, wpb=64846, bsz=128, num_updates=8754, lr=9.9938e-05, gnorm=2.271, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99968
2021-06-19 22:25:05 | INFO | train_inner | epoch 003: 2807 / 3002 loss=2.779, ppl=6.86, wps=5846.9, ups=0.09, wpb=64809, bsz=128, num_updates=8755, lr=9.9938e-05, gnorm=2.213, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=99979
2021-06-19 22:25:16 | INFO | train_inner | epoch 003: 2808 / 3002 loss=2.566, ppl=5.92, wps=5910.9, ups=0.09, wpb=64807, bsz=128, num_updates=8756, lr=9.99379e-05, gnorm=2.757, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=99990
2021-06-19 22:25:27 | INFO | train_inner | epoch 003: 2809 / 3002 loss=2.79, ppl=6.92, wps=5792.7, ups=0.09, wpb=64779, bsz=128, num_updates=8757, lr=9.99379e-05, gnorm=2.324, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100001
2021-06-19 22:25:38 | INFO | train_inner | epoch 003: 2810 / 3002 loss=2.55, ppl=5.86, wps=5780.7, ups=0.09, wpb=64815, bsz=128, num_updates=8758, lr=9.99379e-05, gnorm=2.366, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100013
2021-06-19 22:25:49 | INFO | train_inner | epoch 003: 2811 / 3002 loss=2.487, ppl=5.6, wps=5861.2, ups=0.09, wpb=64839, bsz=128, num_updates=8759, lr=9.99379e-05, gnorm=2.351, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100024
2021-06-19 22:26:00 | INFO | train_inner | epoch 003: 2812 / 3002 loss=2.626, ppl=6.17, wps=5944.1, ups=0.09, wpb=64839, bsz=128, num_updates=8760, lr=9.99379e-05, gnorm=2.292, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100035
2021-06-19 22:26:11 | INFO | train_inner | epoch 003: 2813 / 3002 loss=2.485, ppl=5.6, wps=5813.3, ups=0.09, wpb=64962, bsz=128, num_updates=8761, lr=9.99379e-05, gnorm=3.626, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100046
2021-06-19 22:26:23 | INFO | train_inner | epoch 003: 2814 / 3002 loss=2.52, ppl=5.74, wps=5825.4, ups=0.09, wpb=64847, bsz=128, num_updates=8762, lr=9.99379e-05, gnorm=2.371, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100057
2021-06-19 22:26:34 | INFO | train_inner | epoch 003: 2815 / 3002 loss=2.685, ppl=6.43, wps=5855.7, ups=0.09, wpb=64786, bsz=128, num_updates=8763, lr=9.99379e-05, gnorm=2.531, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100068
2021-06-19 22:26:45 | INFO | train_inner | epoch 003: 2816 / 3002 loss=2.538, ppl=5.81, wps=5755.7, ups=0.09, wpb=64754, bsz=128, num_updates=8764, lr=9.99379e-05, gnorm=9.554, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100079
2021-06-19 22:26:56 | INFO | train_inner | epoch 003: 2817 / 3002 loss=2.529, ppl=5.77, wps=5780.2, ups=0.09, wpb=64781, bsz=128, num_updates=8765, lr=9.99379e-05, gnorm=2.387, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100090
2021-06-19 22:27:07 | INFO | train_inner | epoch 003: 2818 / 3002 loss=2.53, ppl=5.78, wps=5777.5, ups=0.09, wpb=64789, bsz=128, num_updates=8766, lr=9.99379e-05, gnorm=2.183, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100102
2021-06-19 22:27:18 | INFO | train_inner | epoch 003: 2819 / 3002 loss=2.576, ppl=5.96, wps=5929.2, ups=0.09, wpb=64822, bsz=128, num_updates=8767, lr=9.99379e-05, gnorm=5.255, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100113
2021-06-19 22:27:29 | INFO | train_inner | epoch 003: 2820 / 3002 loss=2.554, ppl=5.87, wps=5869.1, ups=0.09, wpb=64810, bsz=128, num_updates=8768, lr=9.99379e-05, gnorm=2.483, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100124
2021-06-19 22:27:40 | INFO | train_inner | epoch 003: 2821 / 3002 loss=2.597, ppl=6.05, wps=5837.9, ups=0.09, wpb=64814, bsz=128, num_updates=8769, lr=9.99378e-05, gnorm=2.558, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100135
2021-06-19 22:27:51 | INFO | train_inner | epoch 003: 2822 / 3002 loss=2.763, ppl=6.79, wps=5827.5, ups=0.09, wpb=64754, bsz=128, num_updates=8770, lr=9.99378e-05, gnorm=2.365, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100146
2021-06-19 22:28:02 | INFO | train_inner | epoch 003: 2823 / 3002 loss=2.574, ppl=5.95, wps=5891, ups=0.09, wpb=64867, bsz=128, num_updates=8771, lr=9.99378e-05, gnorm=2.305, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100157
2021-06-19 22:28:14 | INFO | train_inner | epoch 003: 2824 / 3002 loss=2.574, ppl=5.96, wps=5777.7, ups=0.09, wpb=64812, bsz=128, num_updates=8772, lr=9.99378e-05, gnorm=4.563, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100168
2021-06-19 22:28:25 | INFO | train_inner | epoch 003: 2825 / 3002 loss=2.575, ppl=5.96, wps=5836.7, ups=0.09, wpb=64770, bsz=128, num_updates=8773, lr=9.99378e-05, gnorm=2.788, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100179
2021-06-19 22:28:36 | INFO | train_inner | epoch 003: 2826 / 3002 loss=2.551, ppl=5.86, wps=5866, ups=0.09, wpb=64871, bsz=128, num_updates=8774, lr=9.99378e-05, gnorm=2.363, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100190
2021-06-19 22:28:47 | INFO | train_inner | epoch 003: 2827 / 3002 loss=2.793, ppl=6.93, wps=5911.4, ups=0.09, wpb=64949, bsz=128, num_updates=8775, lr=9.99378e-05, gnorm=2.859, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100201
2021-06-19 22:28:58 | INFO | train_inner | epoch 003: 2828 / 3002 loss=2.833, ppl=7.12, wps=5915.4, ups=0.09, wpb=64795, bsz=128, num_updates=8776, lr=9.99378e-05, gnorm=2.344, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100212
2021-06-19 22:29:09 | INFO | train_inner | epoch 003: 2829 / 3002 loss=2.712, ppl=6.55, wps=5857.3, ups=0.09, wpb=64840, bsz=128, num_updates=8777, lr=9.99378e-05, gnorm=2.224, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100223
2021-06-19 22:29:20 | INFO | train_inner | epoch 003: 2830 / 3002 loss=2.794, ppl=6.94, wps=5933, ups=0.09, wpb=64888, bsz=128, num_updates=8778, lr=9.99378e-05, gnorm=2.211, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100234
2021-06-19 22:29:31 | INFO | train_inner | epoch 003: 2831 / 3002 loss=2.877, ppl=7.34, wps=5793.5, ups=0.09, wpb=64801, bsz=128, num_updates=8779, lr=9.99378e-05, gnorm=14.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100245
2021-06-19 22:29:42 | INFO | train_inner | epoch 003: 2832 / 3002 loss=2.537, ppl=5.8, wps=5926.8, ups=0.09, wpb=64827, bsz=128, num_updates=8780, lr=9.99378e-05, gnorm=3.103, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100256
2021-06-19 22:29:53 | INFO | train_inner | epoch 003: 2833 / 3002 loss=2.656, ppl=6.3, wps=5824.7, ups=0.09, wpb=64850, bsz=128, num_updates=8781, lr=9.99377e-05, gnorm=3.005, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100267
2021-06-19 22:30:04 | INFO | train_inner | epoch 003: 2834 / 3002 loss=2.626, ppl=6.17, wps=5832, ups=0.09, wpb=64824, bsz=128, num_updates=8782, lr=9.99377e-05, gnorm=2.365, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100279
2021-06-19 22:30:15 | INFO | train_inner | epoch 003: 2835 / 3002 loss=2.743, ppl=6.7, wps=5918.4, ups=0.09, wpb=64934, bsz=128, num_updates=8783, lr=9.99377e-05, gnorm=2.752, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100289
2021-06-19 22:30:26 | INFO | train_inner | epoch 003: 2836 / 3002 loss=2.566, ppl=5.92, wps=5849.3, ups=0.09, wpb=64853, bsz=128, num_updates=8784, lr=9.99377e-05, gnorm=3.166, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100301
2021-06-19 22:30:37 | INFO | train_inner | epoch 003: 2837 / 3002 loss=2.685, ppl=6.43, wps=5901.7, ups=0.09, wpb=64887, bsz=128, num_updates=8785, lr=9.99377e-05, gnorm=2.266, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100312
2021-06-19 22:30:48 | INFO | train_inner | epoch 003: 2838 / 3002 loss=2.665, ppl=6.34, wps=5808.6, ups=0.09, wpb=64752, bsz=128, num_updates=8786, lr=9.99377e-05, gnorm=2.421, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100323
2021-06-19 22:30:59 | INFO | train_inner | epoch 003: 2839 / 3002 loss=2.62, ppl=6.15, wps=5841.4, ups=0.09, wpb=64833, bsz=128, num_updates=8787, lr=9.99377e-05, gnorm=2.191, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100334
2021-06-19 22:31:11 | INFO | train_inner | epoch 003: 2840 / 3002 loss=2.592, ppl=6.03, wps=5828.2, ups=0.09, wpb=64892, bsz=128, num_updates=8788, lr=9.99377e-05, gnorm=2.192, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100345
2021-06-19 22:31:22 | INFO | train_inner | epoch 003: 2841 / 3002 loss=2.588, ppl=6.01, wps=5841.3, ups=0.09, wpb=64747, bsz=128, num_updates=8789, lr=9.99377e-05, gnorm=3.304, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100356
2021-06-19 22:31:33 | INFO | train_inner | epoch 003: 2842 / 3002 loss=2.551, ppl=5.86, wps=5944.2, ups=0.09, wpb=64825, bsz=128, num_updates=8790, lr=9.99377e-05, gnorm=2.316, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100367
2021-06-19 22:31:44 | INFO | train_inner | epoch 003: 2843 / 3002 loss=2.638, ppl=6.22, wps=5789.6, ups=0.09, wpb=64849, bsz=128, num_updates=8791, lr=9.99377e-05, gnorm=2.413, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100378
2021-06-19 22:31:55 | INFO | train_inner | epoch 003: 2844 / 3002 loss=2.653, ppl=6.29, wps=5791.2, ups=0.09, wpb=64831, bsz=128, num_updates=8792, lr=9.99377e-05, gnorm=3.429, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100389
2021-06-19 22:32:06 | INFO | train_inner | epoch 003: 2845 / 3002 loss=2.576, ppl=5.96, wps=5891.1, ups=0.09, wpb=64919, bsz=128, num_updates=8793, lr=9.99377e-05, gnorm=2.187, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100400
2021-06-19 22:32:17 | INFO | train_inner | epoch 003: 2846 / 3002 loss=2.662, ppl=6.33, wps=5833.9, ups=0.09, wpb=64779, bsz=128, num_updates=8794, lr=9.99376e-05, gnorm=2.283, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100411
2021-06-19 22:32:28 | INFO | train_inner | epoch 003: 2847 / 3002 loss=2.602, ppl=6.07, wps=5734, ups=0.09, wpb=64798, bsz=128, num_updates=8795, lr=9.99376e-05, gnorm=2.321, loss_scale=1, train_wall=11, gb_free=2.8, wall=100423
2021-06-19 22:32:40 | INFO | train_inner | epoch 003: 2848 / 3002 loss=2.833, ppl=7.12, wps=5770.2, ups=0.09, wpb=64774, bsz=128, num_updates=8796, lr=9.99376e-05, gnorm=4.234, loss_scale=1, train_wall=11, gb_free=2.8, wall=100434
2021-06-19 22:32:51 | INFO | train_inner | epoch 003: 2849 / 3002 loss=2.551, ppl=5.86, wps=5800.5, ups=0.09, wpb=64864, bsz=128, num_updates=8797, lr=9.99376e-05, gnorm=2.254, loss_scale=1, train_wall=11, gb_free=2.8, wall=100445
2021-06-19 22:33:02 | INFO | train_inner | epoch 003: 2850 / 3002 loss=2.539, ppl=5.81, wps=5720.7, ups=0.09, wpb=64830, bsz=128, num_updates=8798, lr=9.99376e-05, gnorm=2.13, loss_scale=1, train_wall=11, gb_free=2.8, wall=100457
2021-06-19 22:33:13 | INFO | train_inner | epoch 003: 2851 / 3002 loss=2.561, ppl=5.9, wps=5890.7, ups=0.09, wpb=64887, bsz=128, num_updates=8799, lr=9.99376e-05, gnorm=2.091, loss_scale=1, train_wall=11, gb_free=2.8, wall=100468
2021-06-19 22:33:24 | INFO | train_inner | epoch 003: 2852 / 3002 loss=2.715, ppl=6.57, wps=5777.7, ups=0.09, wpb=64797, bsz=128, num_updates=8800, lr=9.99376e-05, gnorm=4.371, loss_scale=1, train_wall=11, gb_free=2.8, wall=100479
2021-06-19 22:33:36 | INFO | train_inner | epoch 003: 2853 / 3002 loss=2.533, ppl=5.79, wps=5800.3, ups=0.09, wpb=64833, bsz=128, num_updates=8801, lr=9.99376e-05, gnorm=2.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=100490
2021-06-19 22:33:47 | INFO | train_inner | epoch 003: 2854 / 3002 loss=2.538, ppl=5.81, wps=5844.7, ups=0.09, wpb=64835, bsz=128, num_updates=8802, lr=9.99376e-05, gnorm=2.828, loss_scale=1, train_wall=11, gb_free=2.8, wall=100501
2021-06-19 22:33:58 | INFO | train_inner | epoch 003: 2855 / 3002 loss=2.766, ppl=6.8, wps=5797.2, ups=0.09, wpb=64824, bsz=128, num_updates=8803, lr=9.99376e-05, gnorm=2.217, loss_scale=1, train_wall=11, gb_free=2.8, wall=100512
2021-06-19 22:34:09 | INFO | train_inner | epoch 003: 2856 / 3002 loss=2.748, ppl=6.72, wps=5761.9, ups=0.09, wpb=64898, bsz=128, num_updates=8804, lr=9.99376e-05, gnorm=2.722, loss_scale=1, train_wall=11, gb_free=2.8, wall=100523
2021-06-19 22:34:20 | INFO | train_inner | epoch 003: 2857 / 3002 loss=2.65, ppl=6.28, wps=5757, ups=0.09, wpb=64788, bsz=128, num_updates=8805, lr=9.99376e-05, gnorm=10.385, loss_scale=1, train_wall=11, gb_free=2.8, wall=100535
2021-06-19 22:34:31 | INFO | train_inner | epoch 003: 2858 / 3002 loss=2.632, ppl=6.2, wps=5855.1, ups=0.09, wpb=64816, bsz=128, num_updates=8806, lr=9.99375e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=100546
2021-06-19 22:34:43 | INFO | train_inner | epoch 003: 2859 / 3002 loss=2.726, ppl=6.62, wps=5846.7, ups=0.09, wpb=64789, bsz=128, num_updates=8807, lr=9.99375e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=100557
2021-06-19 22:34:54 | INFO | train_inner | epoch 003: 2860 / 3002 loss=2.665, ppl=6.34, wps=5897.1, ups=0.09, wpb=64800, bsz=128, num_updates=8808, lr=9.99375e-05, gnorm=2.197, loss_scale=1, train_wall=10, gb_free=2.8, wall=100568
2021-06-19 22:35:05 | INFO | train_inner | epoch 003: 2861 / 3002 loss=2.571, ppl=5.94, wps=5866.7, ups=0.09, wpb=64848, bsz=128, num_updates=8809, lr=9.99375e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=100579
2021-06-19 22:35:16 | INFO | train_inner | epoch 003: 2862 / 3002 loss=2.634, ppl=6.21, wps=5826.3, ups=0.09, wpb=64795, bsz=128, num_updates=8810, lr=9.99375e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=100590
2021-06-19 22:35:27 | INFO | train_inner | epoch 003: 2863 / 3002 loss=2.608, ppl=6.1, wps=5751.9, ups=0.09, wpb=64811, bsz=128, num_updates=8811, lr=9.99375e-05, gnorm=2.585, loss_scale=1, train_wall=11, gb_free=2.8, wall=100601
2021-06-19 22:35:38 | INFO | train_inner | epoch 003: 2864 / 3002 loss=2.772, ppl=6.83, wps=5935, ups=0.09, wpb=64853, bsz=128, num_updates=8812, lr=9.99375e-05, gnorm=6.563, loss_scale=1, train_wall=10, gb_free=2.8, wall=100612
2021-06-19 22:35:49 | INFO | train_inner | epoch 003: 2865 / 3002 loss=2.657, ppl=6.31, wps=5738.5, ups=0.09, wpb=64915, bsz=128, num_updates=8813, lr=9.99375e-05, gnorm=2.596, loss_scale=1, train_wall=11, gb_free=2.8, wall=100624
2021-06-19 22:36:00 | INFO | train_inner | epoch 003: 2866 / 3002 loss=2.602, ppl=6.07, wps=5841.9, ups=0.09, wpb=64899, bsz=128, num_updates=8814, lr=9.99375e-05, gnorm=2.365, loss_scale=1, train_wall=11, gb_free=2.8, wall=100635
2021-06-19 22:36:11 | INFO | train_inner | epoch 003: 2867 / 3002 loss=2.613, ppl=6.12, wps=5888, ups=0.09, wpb=64881, bsz=128, num_updates=8815, lr=9.99375e-05, gnorm=2.248, loss_scale=1, train_wall=11, gb_free=2.8, wall=100646
2021-06-19 22:36:22 | INFO | train_inner | epoch 003: 2868 / 3002 loss=2.74, ppl=6.68, wps=5950.2, ups=0.09, wpb=64860, bsz=128, num_updates=8816, lr=9.99375e-05, gnorm=2.724, loss_scale=1, train_wall=10, gb_free=2.8, wall=100657
2021-06-19 22:36:33 | INFO | train_inner | epoch 003: 2869 / 3002 loss=2.524, ppl=5.75, wps=5880.6, ups=0.09, wpb=64896, bsz=128, num_updates=8817, lr=9.99375e-05, gnorm=3.268, loss_scale=1, train_wall=11, gb_free=2.8, wall=100668
2021-06-19 22:36:44 | INFO | train_inner | epoch 003: 2870 / 3002 loss=2.705, ppl=6.52, wps=5832, ups=0.09, wpb=64823, bsz=128, num_updates=8818, lr=9.99375e-05, gnorm=2.204, loss_scale=1, train_wall=11, gb_free=2.8, wall=100679
2021-06-19 22:36:55 | INFO | train_inner | epoch 003: 2871 / 3002 loss=2.656, ppl=6.3, wps=5951.3, ups=0.09, wpb=64835, bsz=128, num_updates=8819, lr=9.99374e-05, gnorm=3.697, loss_scale=1, train_wall=10, gb_free=2.8, wall=100690
2021-06-19 22:37:06 | INFO | train_inner | epoch 003: 2872 / 3002 loss=2.775, ppl=6.84, wps=5857, ups=0.09, wpb=64736, bsz=128, num_updates=8820, lr=9.99374e-05, gnorm=2.24, loss_scale=1, train_wall=11, gb_free=2.8, wall=100701
2021-06-19 22:37:17 | INFO | train_inner | epoch 003: 2873 / 3002 loss=2.474, ppl=5.56, wps=5899.7, ups=0.09, wpb=64852, bsz=128, num_updates=8821, lr=9.99374e-05, gnorm=4.818, loss_scale=1, train_wall=11, gb_free=2.8, wall=100712
2021-06-19 22:37:29 | INFO | train_inner | epoch 003: 2874 / 3002 loss=2.536, ppl=5.8, wps=5774, ups=0.09, wpb=64863, bsz=128, num_updates=8822, lr=9.99374e-05, gnorm=2.296, loss_scale=1, train_wall=11, gb_free=2.8, wall=100723
2021-06-19 22:37:40 | INFO | train_inner | epoch 003: 2875 / 3002 loss=2.508, ppl=5.69, wps=5758.4, ups=0.09, wpb=64867, bsz=128, num_updates=8823, lr=9.99374e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=100734
2021-06-19 22:37:51 | INFO | train_inner | epoch 003: 2876 / 3002 loss=2.655, ppl=6.3, wps=5835.6, ups=0.09, wpb=64862, bsz=128, num_updates=8824, lr=9.99374e-05, gnorm=2.202, loss_scale=1, train_wall=11, gb_free=2.8, wall=100745
2021-06-19 22:38:02 | INFO | train_inner | epoch 003: 2877 / 3002 loss=2.629, ppl=6.18, wps=5881.4, ups=0.09, wpb=64801, bsz=128, num_updates=8825, lr=9.99374e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=100756
2021-06-19 22:38:13 | INFO | train_inner | epoch 003: 2878 / 3002 loss=2.484, ppl=5.6, wps=5780.4, ups=0.09, wpb=64821, bsz=128, num_updates=8826, lr=9.99374e-05, gnorm=2.531, loss_scale=1, train_wall=11, gb_free=2.8, wall=100768
2021-06-19 22:38:24 | INFO | train_inner | epoch 003: 2879 / 3002 loss=2.634, ppl=6.21, wps=5858.8, ups=0.09, wpb=64795, bsz=128, num_updates=8827, lr=9.99374e-05, gnorm=2.605, loss_scale=1, train_wall=11, gb_free=2.8, wall=100779
2021-06-19 22:38:35 | INFO | train_inner | epoch 003: 2880 / 3002 loss=2.687, ppl=6.44, wps=5813.5, ups=0.09, wpb=64747, bsz=128, num_updates=8828, lr=9.99374e-05, gnorm=2.254, loss_scale=1, train_wall=11, gb_free=2.8, wall=100790
2021-06-19 22:38:46 | INFO | train_inner | epoch 003: 2881 / 3002 loss=2.676, ppl=6.39, wps=5844.9, ups=0.09, wpb=64837, bsz=128, num_updates=8829, lr=9.99374e-05, gnorm=2.29, loss_scale=1, train_wall=11, gb_free=2.8, wall=100801
2021-06-19 22:38:58 | INFO | train_inner | epoch 003: 2882 / 3002 loss=2.653, ppl=6.29, wps=5847.6, ups=0.09, wpb=64901, bsz=128, num_updates=8830, lr=9.99374e-05, gnorm=12.284, loss_scale=1, train_wall=11, gb_free=2.8, wall=100812
2021-06-19 22:39:09 | INFO | train_inner | epoch 003: 2883 / 3002 loss=2.86, ppl=7.26, wps=5819.8, ups=0.09, wpb=64768, bsz=128, num_updates=8831, lr=9.99373e-05, gnorm=2.257, loss_scale=1, train_wall=11, gb_free=2.8, wall=100823
2021-06-19 22:39:20 | INFO | train_inner | epoch 003: 2884 / 3002 loss=2.712, ppl=6.55, wps=5864.5, ups=0.09, wpb=64876, bsz=128, num_updates=8832, lr=9.99373e-05, gnorm=2.186, loss_scale=1, train_wall=11, gb_free=2.8, wall=100834
2021-06-19 22:39:31 | INFO | train_inner | epoch 003: 2885 / 3002 loss=2.507, ppl=5.68, wps=5944.6, ups=0.09, wpb=64891, bsz=128, num_updates=8833, lr=9.99373e-05, gnorm=2.092, loss_scale=1, train_wall=10, gb_free=2.8, wall=100845
2021-06-19 22:39:42 | INFO | train_inner | epoch 003: 2886 / 3002 loss=2.548, ppl=5.85, wps=5767.3, ups=0.09, wpb=64826, bsz=128, num_updates=8834, lr=9.99373e-05, gnorm=2.039, loss_scale=1, train_wall=11, gb_free=2.8, wall=100856
2021-06-19 22:39:53 | INFO | train_inner | epoch 003: 2887 / 3002 loss=2.752, ppl=6.74, wps=5879, ups=0.09, wpb=64851, bsz=128, num_updates=8835, lr=9.99373e-05, gnorm=4.367, loss_scale=1, train_wall=11, gb_free=2.8, wall=100867
2021-06-19 22:40:04 | INFO | train_inner | epoch 003: 2888 / 3002 loss=2.767, ppl=6.81, wps=5851.9, ups=0.09, wpb=64906, bsz=128, num_updates=8836, lr=9.99373e-05, gnorm=2.318, loss_scale=1, train_wall=11, gb_free=2.8, wall=100878
2021-06-19 22:40:15 | INFO | train_inner | epoch 003: 2889 / 3002 loss=2.557, ppl=5.89, wps=5753, ups=0.09, wpb=64878, bsz=128, num_updates=8837, lr=9.99373e-05, gnorm=2.181, loss_scale=1, train_wall=11, gb_free=2.8, wall=100890
2021-06-19 22:40:26 | INFO | train_inner | epoch 003: 2890 / 3002 loss=2.616, ppl=6.13, wps=5868.3, ups=0.09, wpb=64909, bsz=128, num_updates=8838, lr=9.99373e-05, gnorm=3.001, loss_scale=1, train_wall=11, gb_free=2.8, wall=100901
2021-06-19 22:40:37 | INFO | train_inner | epoch 003: 2891 / 3002 loss=2.598, ppl=6.05, wps=5939.9, ups=0.09, wpb=64894, bsz=128, num_updates=8839, lr=9.99373e-05, gnorm=2.73, loss_scale=1, train_wall=10, gb_free=2.8, wall=100912
2021-06-19 22:40:49 | INFO | train_inner | epoch 003: 2892 / 3002 loss=2.499, ppl=5.65, wps=5746.8, ups=0.09, wpb=64761, bsz=128, num_updates=8840, lr=9.99373e-05, gnorm=2.312, loss_scale=1, train_wall=11, gb_free=2.8, wall=100923
2021-06-19 22:41:00 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-19 22:41:11 | INFO | train_inner | epoch 003: 2894 / 3002 loss=2.531, ppl=5.78, wps=2887.9, ups=0.04, wpb=64805, bsz=128, num_updates=8841, lr=9.99373e-05, gnorm=2.676, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=100945
2021-06-19 22:41:22 | INFO | train_inner | epoch 003: 2895 / 3002 loss=2.659, ppl=6.32, wps=5848.8, ups=0.09, wpb=64823, bsz=128, num_updates=8842, lr=9.99373e-05, gnorm=2.904, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100956
2021-06-19 22:41:33 | INFO | train_inner | epoch 003: 2896 / 3002 loss=2.556, ppl=5.88, wps=5751.1, ups=0.09, wpb=64819, bsz=128, num_updates=8843, lr=9.99373e-05, gnorm=2.542, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100968
2021-06-19 22:41:44 | INFO | train_inner | epoch 003: 2897 / 3002 loss=2.712, ppl=6.55, wps=5962.4, ups=0.09, wpb=64875, bsz=128, num_updates=8844, lr=9.99372e-05, gnorm=17.082, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=100979
2021-06-19 22:41:55 | INFO | train_inner | epoch 003: 2898 / 3002 loss=2.502, ppl=5.67, wps=5776.6, ups=0.09, wpb=64831, bsz=128, num_updates=8845, lr=9.99372e-05, gnorm=2.777, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=100990
2021-06-19 22:42:06 | INFO | train_inner | epoch 003: 2899 / 3002 loss=2.74, ppl=6.68, wps=5960, ups=0.09, wpb=64825, bsz=128, num_updates=8846, lr=9.99372e-05, gnorm=2.337, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101001
2021-06-19 22:42:18 | INFO | train_inner | epoch 003: 2900 / 3002 loss=2.634, ppl=6.21, wps=5808.4, ups=0.09, wpb=64819, bsz=128, num_updates=8847, lr=9.99372e-05, gnorm=2.839, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101012
2021-06-19 22:42:29 | INFO | train_inner | epoch 003: 2901 / 3002 loss=2.703, ppl=6.51, wps=5805.1, ups=0.09, wpb=64787, bsz=128, num_updates=8848, lr=9.99372e-05, gnorm=2.238, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101023
2021-06-19 22:42:40 | INFO | train_inner | epoch 003: 2902 / 3002 loss=2.598, ppl=6.06, wps=5840.7, ups=0.09, wpb=64855, bsz=128, num_updates=8849, lr=9.99372e-05, gnorm=2.751, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101034
2021-06-19 22:42:51 | INFO | train_inner | epoch 003: 2903 / 3002 loss=2.683, ppl=6.42, wps=5763.4, ups=0.09, wpb=64777, bsz=128, num_updates=8850, lr=9.99372e-05, gnorm=2.516, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101045
2021-06-19 22:43:02 | INFO | train_inner | epoch 003: 2904 / 3002 loss=2.774, ppl=6.84, wps=5870.9, ups=0.09, wpb=64822, bsz=128, num_updates=8851, lr=9.99372e-05, gnorm=2.515, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101056
2021-06-19 22:43:13 | INFO | train_inner | epoch 003: 2905 / 3002 loss=2.561, ppl=5.9, wps=5743.5, ups=0.09, wpb=64783, bsz=128, num_updates=8852, lr=9.99372e-05, gnorm=18.514, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101068
2021-06-19 22:43:24 | INFO | train_inner | epoch 003: 2906 / 3002 loss=2.6, ppl=6.06, wps=5864.5, ups=0.09, wpb=64883, bsz=128, num_updates=8853, lr=9.99372e-05, gnorm=2.461, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101079
2021-06-19 22:43:36 | INFO | train_inner | epoch 003: 2907 / 3002 loss=2.704, ppl=6.52, wps=5825.9, ups=0.09, wpb=64851, bsz=128, num_updates=8854, lr=9.99372e-05, gnorm=2.301, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101090
2021-06-19 22:43:47 | INFO | train_inner | epoch 003: 2908 / 3002 loss=2.693, ppl=6.47, wps=5753.6, ups=0.09, wpb=64755, bsz=128, num_updates=8855, lr=9.99372e-05, gnorm=2.233, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101101
2021-06-19 22:43:58 | INFO | train_inner | epoch 003: 2909 / 3002 loss=2.563, ppl=5.91, wps=5836.1, ups=0.09, wpb=64794, bsz=128, num_updates=8856, lr=9.99371e-05, gnorm=2.34, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101112
2021-06-19 22:44:09 | INFO | train_inner | epoch 003: 2910 / 3002 loss=2.664, ppl=6.34, wps=5827, ups=0.09, wpb=64798, bsz=128, num_updates=8857, lr=9.99371e-05, gnorm=2.386, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101123
2021-06-19 22:44:20 | INFO | train_inner | epoch 003: 2911 / 3002 loss=2.729, ppl=6.63, wps=5841.5, ups=0.09, wpb=64823, bsz=128, num_updates=8858, lr=9.99371e-05, gnorm=2.211, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101134
2021-06-19 22:44:31 | INFO | train_inner | epoch 003: 2912 / 3002 loss=2.679, ppl=6.41, wps=5911.4, ups=0.09, wpb=64843, bsz=128, num_updates=8859, lr=9.99371e-05, gnorm=2.34, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101145
2021-06-19 22:44:42 | INFO | train_inner | epoch 003: 2913 / 3002 loss=2.611, ppl=6.11, wps=5803.3, ups=0.09, wpb=64823, bsz=128, num_updates=8860, lr=9.99371e-05, gnorm=2.153, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101157
2021-06-19 22:44:53 | INFO | train_inner | epoch 003: 2914 / 3002 loss=2.624, ppl=6.16, wps=5772.6, ups=0.09, wpb=64792, bsz=128, num_updates=8861, lr=9.99371e-05, gnorm=2.29, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101168
2021-06-19 22:45:05 | INFO | train_inner | epoch 003: 2915 / 3002 loss=2.618, ppl=6.14, wps=5792.7, ups=0.09, wpb=64792, bsz=128, num_updates=8862, lr=9.99371e-05, gnorm=2.433, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101179
2021-06-19 22:45:16 | INFO | train_inner | epoch 003: 2916 / 3002 loss=2.552, ppl=5.86, wps=5904, ups=0.09, wpb=64790, bsz=128, num_updates=8863, lr=9.99371e-05, gnorm=2.414, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101190
2021-06-19 22:45:27 | INFO | train_inner | epoch 003: 2917 / 3002 loss=2.599, ppl=6.06, wps=5792.2, ups=0.09, wpb=64757, bsz=128, num_updates=8864, lr=9.99371e-05, gnorm=2.196, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101201
2021-06-19 22:45:38 | INFO | train_inner | epoch 003: 2918 / 3002 loss=2.594, ppl=6.04, wps=5785.9, ups=0.09, wpb=64871, bsz=128, num_updates=8865, lr=9.99371e-05, gnorm=2.254, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101212
2021-06-19 22:45:49 | INFO | train_inner | epoch 003: 2919 / 3002 loss=2.572, ppl=5.95, wps=5748.4, ups=0.09, wpb=64861, bsz=128, num_updates=8866, lr=9.99371e-05, gnorm=2.314, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101224
2021-06-19 22:46:00 | INFO | train_inner | epoch 003: 2920 / 3002 loss=2.635, ppl=6.21, wps=5916.2, ups=0.09, wpb=64889, bsz=128, num_updates=8867, lr=9.99371e-05, gnorm=2.193, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101235
2021-06-19 22:46:11 | INFO | train_inner | epoch 003: 2921 / 3002 loss=2.625, ppl=6.17, wps=5897.9, ups=0.09, wpb=64855, bsz=128, num_updates=8868, lr=9.99371e-05, gnorm=2.451, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101246
2021-06-19 22:46:22 | INFO | train_inner | epoch 003: 2922 / 3002 loss=2.577, ppl=5.97, wps=5832, ups=0.09, wpb=64867, bsz=128, num_updates=8869, lr=9.9937e-05, gnorm=2.623, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101257
2021-06-19 22:46:34 | INFO | train_inner | epoch 003: 2923 / 3002 loss=2.619, ppl=6.15, wps=5773.9, ups=0.09, wpb=64852, bsz=128, num_updates=8870, lr=9.9937e-05, gnorm=2.506, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101268
2021-06-19 22:46:45 | INFO | train_inner | epoch 003: 2924 / 3002 loss=2.594, ppl=6.04, wps=5818.1, ups=0.09, wpb=64856, bsz=128, num_updates=8871, lr=9.9937e-05, gnorm=32.179, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101279
2021-06-19 22:46:56 | INFO | train_inner | epoch 003: 2925 / 3002 loss=2.652, ppl=6.28, wps=5855.7, ups=0.09, wpb=64861, bsz=128, num_updates=8872, lr=9.9937e-05, gnorm=2.33, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101290
2021-06-19 22:47:07 | INFO | train_inner | epoch 003: 2926 / 3002 loss=2.79, ppl=6.92, wps=5867.8, ups=0.09, wpb=64864, bsz=128, num_updates=8873, lr=9.9937e-05, gnorm=2.746, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101301
2021-06-19 22:47:18 | INFO | train_inner | epoch 003: 2927 / 3002 loss=2.619, ppl=6.14, wps=5830.3, ups=0.09, wpb=64739, bsz=128, num_updates=8874, lr=9.9937e-05, gnorm=2.964, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101312
2021-06-19 22:47:29 | INFO | train_inner | epoch 003: 2928 / 3002 loss=2.661, ppl=6.32, wps=5792.4, ups=0.09, wpb=64823, bsz=128, num_updates=8875, lr=9.9937e-05, gnorm=2.226, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101324
2021-06-19 22:47:40 | INFO | train_inner | epoch 003: 2929 / 3002 loss=2.62, ppl=6.15, wps=5832.3, ups=0.09, wpb=64814, bsz=128, num_updates=8876, lr=9.9937e-05, gnorm=2.976, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101335
2021-06-19 22:47:51 | INFO | train_inner | epoch 003: 2930 / 3002 loss=2.564, ppl=5.91, wps=5873, ups=0.09, wpb=64763, bsz=128, num_updates=8877, lr=9.9937e-05, gnorm=2.241, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101346
2021-06-19 22:48:02 | INFO | train_inner | epoch 003: 2931 / 3002 loss=2.63, ppl=6.19, wps=5934, ups=0.09, wpb=64896, bsz=128, num_updates=8878, lr=9.9937e-05, gnorm=3.389, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101357
2021-06-19 22:48:14 | INFO | train_inner | epoch 003: 2932 / 3002 loss=2.735, ppl=6.66, wps=5766, ups=0.09, wpb=64783, bsz=128, num_updates=8879, lr=9.9937e-05, gnorm=2.996, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101368
2021-06-19 22:48:25 | INFO | train_inner | epoch 003: 2933 / 3002 loss=2.664, ppl=6.34, wps=5824.1, ups=0.09, wpb=64817, bsz=128, num_updates=8880, lr=9.9937e-05, gnorm=2.402, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101379
2021-06-19 22:48:36 | INFO | train_inner | epoch 003: 2934 / 3002 loss=2.634, ppl=6.21, wps=5804.2, ups=0.09, wpb=64785, bsz=128, num_updates=8881, lr=9.99369e-05, gnorm=5.055, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101390
2021-06-19 22:48:47 | INFO | train_inner | epoch 003: 2935 / 3002 loss=2.66, ppl=6.32, wps=5784.7, ups=0.09, wpb=64798, bsz=128, num_updates=8882, lr=9.99369e-05, gnorm=2.32, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101401
2021-06-19 22:48:58 | INFO | train_inner | epoch 003: 2936 / 3002 loss=2.563, ppl=5.91, wps=5941.7, ups=0.09, wpb=64845, bsz=128, num_updates=8883, lr=9.99369e-05, gnorm=2.741, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101412
2021-06-19 22:49:09 | INFO | train_inner | epoch 003: 2937 / 3002 loss=2.626, ppl=6.17, wps=5909.2, ups=0.09, wpb=64887, bsz=128, num_updates=8884, lr=9.99369e-05, gnorm=3.078, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101423
2021-06-19 22:49:20 | INFO | train_inner | epoch 003: 2938 / 3002 loss=2.915, ppl=7.54, wps=5799.6, ups=0.09, wpb=64790, bsz=128, num_updates=8885, lr=9.99369e-05, gnorm=4.825, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101434
2021-06-19 22:49:31 | INFO | train_inner | epoch 003: 2939 / 3002 loss=2.61, ppl=6.1, wps=5900.6, ups=0.09, wpb=64889, bsz=128, num_updates=8886, lr=9.99369e-05, gnorm=2.414, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101445
2021-06-19 22:49:42 | INFO | train_inner | epoch 003: 2940 / 3002 loss=2.554, ppl=5.87, wps=5816.8, ups=0.09, wpb=64802, bsz=128, num_updates=8887, lr=9.99369e-05, gnorm=5.978, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101457
2021-06-19 22:49:53 | INFO | train_inner | epoch 003: 2941 / 3002 loss=2.494, ppl=5.63, wps=5859.8, ups=0.09, wpb=64911, bsz=128, num_updates=8888, lr=9.99369e-05, gnorm=2.14, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101468
2021-06-19 22:50:04 | INFO | train_inner | epoch 003: 2942 / 3002 loss=2.602, ppl=6.07, wps=5778.7, ups=0.09, wpb=64781, bsz=128, num_updates=8889, lr=9.99369e-05, gnorm=2.261, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101479
2021-06-19 22:50:16 | INFO | train_inner | epoch 003: 2943 / 3002 loss=2.743, ppl=6.7, wps=5822.2, ups=0.09, wpb=64791, bsz=128, num_updates=8890, lr=9.99369e-05, gnorm=33.718, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101490
2021-06-19 22:50:26 | INFO | train_inner | epoch 003: 2944 / 3002 loss=2.841, ppl=7.16, wps=5981.3, ups=0.09, wpb=64867, bsz=128, num_updates=8891, lr=9.99369e-05, gnorm=2.905, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101501
2021-06-19 22:50:37 | INFO | train_inner | epoch 003: 2945 / 3002 loss=2.67, ppl=6.36, wps=5974.8, ups=0.09, wpb=64828, bsz=128, num_updates=8892, lr=9.99369e-05, gnorm=3.419, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101512
2021-06-19 22:50:48 | INFO | train_inner | epoch 003: 2946 / 3002 loss=2.784, ppl=6.89, wps=5827, ups=0.09, wpb=64850, bsz=128, num_updates=8893, lr=9.99369e-05, gnorm=3.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101523
2021-06-19 22:51:00 | INFO | train_inner | epoch 003: 2947 / 3002 loss=2.579, ppl=5.97, wps=5851.4, ups=0.09, wpb=64741, bsz=128, num_updates=8894, lr=9.99368e-05, gnorm=3.804, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101534
2021-06-19 22:51:10 | INFO | train_inner | epoch 003: 2948 / 3002 loss=2.456, ppl=5.49, wps=5944.7, ups=0.09, wpb=64788, bsz=128, num_updates=8895, lr=9.99368e-05, gnorm=2.427, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101545
2021-06-19 22:51:22 | INFO | train_inner | epoch 003: 2949 / 3002 loss=2.718, ppl=6.58, wps=5792.7, ups=0.09, wpb=64746, bsz=128, num_updates=8896, lr=9.99368e-05, gnorm=2.47, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101556
2021-06-19 22:51:33 | INFO | train_inner | epoch 003: 2950 / 3002 loss=2.557, ppl=5.89, wps=5846.6, ups=0.09, wpb=64894, bsz=128, num_updates=8897, lr=9.99368e-05, gnorm=2.324, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101567
2021-06-19 22:51:44 | INFO | train_inner | epoch 003: 2951 / 3002 loss=2.704, ppl=6.51, wps=5798.3, ups=0.09, wpb=64859, bsz=128, num_updates=8898, lr=9.99368e-05, gnorm=4.024, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101578
2021-06-19 22:51:55 | INFO | train_inner | epoch 003: 2952 / 3002 loss=2.698, ppl=6.49, wps=5795.3, ups=0.09, wpb=64813, bsz=128, num_updates=8899, lr=9.99368e-05, gnorm=2.353, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101589
2021-06-19 22:52:06 | INFO | train_inner | epoch 003: 2953 / 3002 loss=2.652, ppl=6.29, wps=5820.7, ups=0.09, wpb=64831, bsz=128, num_updates=8900, lr=9.99368e-05, gnorm=2.337, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101601
2021-06-19 22:52:17 | INFO | train_inner | epoch 003: 2954 / 3002 loss=2.616, ppl=6.13, wps=5976.5, ups=0.09, wpb=64844, bsz=128, num_updates=8901, lr=9.99368e-05, gnorm=2.475, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101611
2021-06-19 22:52:28 | INFO | train_inner | epoch 003: 2955 / 3002 loss=2.653, ppl=6.29, wps=5863, ups=0.09, wpb=64839, bsz=128, num_updates=8902, lr=9.99368e-05, gnorm=2.573, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101622
2021-06-19 22:52:39 | INFO | train_inner | epoch 003: 2956 / 3002 loss=2.777, ppl=6.85, wps=5902.9, ups=0.09, wpb=64863, bsz=128, num_updates=8903, lr=9.99368e-05, gnorm=2.459, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101633
2021-06-19 22:52:50 | INFO | train_inner | epoch 003: 2957 / 3002 loss=2.871, ppl=7.32, wps=5982.7, ups=0.09, wpb=64785, bsz=128, num_updates=8904, lr=9.99368e-05, gnorm=2.242, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101644
2021-06-19 22:53:01 | INFO | train_inner | epoch 003: 2958 / 3002 loss=2.765, ppl=6.8, wps=5878.3, ups=0.09, wpb=64779, bsz=128, num_updates=8905, lr=9.99368e-05, gnorm=2.355, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101655
2021-06-19 22:53:12 | INFO | train_inner | epoch 003: 2959 / 3002 loss=2.555, ppl=5.88, wps=5878.3, ups=0.09, wpb=64807, bsz=128, num_updates=8906, lr=9.99367e-05, gnorm=3.162, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101666
2021-06-19 22:53:23 | INFO | train_inner | epoch 003: 2960 / 3002 loss=2.497, ppl=5.64, wps=5880.2, ups=0.09, wpb=64843, bsz=128, num_updates=8907, lr=9.99367e-05, gnorm=2.258, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101677
2021-06-19 22:53:34 | INFO | train_inner | epoch 003: 2961 / 3002 loss=2.671, ppl=6.37, wps=5929.6, ups=0.09, wpb=64810, bsz=128, num_updates=8908, lr=9.99367e-05, gnorm=3.416, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101688
2021-06-19 22:53:45 | INFO | train_inner | epoch 003: 2962 / 3002 loss=2.604, ppl=6.08, wps=5784, ups=0.09, wpb=64860, bsz=128, num_updates=8909, lr=9.99367e-05, gnorm=2.676, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101699
2021-06-19 22:53:56 | INFO | train_inner | epoch 003: 2963 / 3002 loss=2.608, ppl=6.1, wps=5921.4, ups=0.09, wpb=64887, bsz=128, num_updates=8910, lr=9.99367e-05, gnorm=2.51, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101710
2021-06-19 22:54:07 | INFO | train_inner | epoch 003: 2964 / 3002 loss=2.63, ppl=6.19, wps=5824.1, ups=0.09, wpb=64894, bsz=128, num_updates=8911, lr=9.99367e-05, gnorm=2.609, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101722
2021-06-19 22:54:18 | INFO | train_inner | epoch 003: 2965 / 3002 loss=2.603, ppl=6.07, wps=5970, ups=0.09, wpb=64789, bsz=128, num_updates=8912, lr=9.99367e-05, gnorm=2.187, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101732
2021-06-19 22:54:29 | INFO | train_inner | epoch 003: 2966 / 3002 loss=2.619, ppl=6.14, wps=5781.3, ups=0.09, wpb=64781, bsz=128, num_updates=8913, lr=9.99367e-05, gnorm=3.717, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101744
2021-06-19 22:54:40 | INFO | train_inner | epoch 003: 2967 / 3002 loss=2.608, ppl=6.1, wps=5801.5, ups=0.09, wpb=64893, bsz=128, num_updates=8914, lr=9.99367e-05, gnorm=2.264, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101755
2021-06-19 22:54:52 | INFO | train_inner | epoch 003: 2968 / 3002 loss=2.557, ppl=5.89, wps=5789.7, ups=0.09, wpb=64828, bsz=128, num_updates=8915, lr=9.99367e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101766
2021-06-19 22:55:03 | INFO | train_inner | epoch 003: 2969 / 3002 loss=2.741, ppl=6.68, wps=5834.2, ups=0.09, wpb=64817, bsz=128, num_updates=8916, lr=9.99367e-05, gnorm=2.809, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101777
2021-06-19 22:55:14 | INFO | train_inner | epoch 003: 2970 / 3002 loss=2.533, ppl=5.79, wps=5865.6, ups=0.09, wpb=64805, bsz=128, num_updates=8917, lr=9.99367e-05, gnorm=2.489, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101788
2021-06-19 22:55:25 | INFO | train_inner | epoch 003: 2971 / 3002 loss=2.708, ppl=6.54, wps=5711.8, ups=0.09, wpb=64790, bsz=128, num_updates=8918, lr=9.99367e-05, gnorm=2.098, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101800
2021-06-19 22:55:36 | INFO | train_inner | epoch 003: 2972 / 3002 loss=2.597, ppl=6.05, wps=5864.4, ups=0.09, wpb=64815, bsz=128, num_updates=8919, lr=9.99366e-05, gnorm=3.585, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101811
2021-06-19 22:55:47 | INFO | train_inner | epoch 003: 2973 / 3002 loss=2.692, ppl=6.46, wps=5813.5, ups=0.09, wpb=64833, bsz=128, num_updates=8920, lr=9.99366e-05, gnorm=4.659, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101822
2021-06-19 22:55:59 | INFO | train_inner | epoch 003: 2974 / 3002 loss=2.637, ppl=6.22, wps=5731.9, ups=0.09, wpb=64724, bsz=128, num_updates=8921, lr=9.99366e-05, gnorm=2.291, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101833
2021-06-19 22:56:10 | INFO | train_inner | epoch 003: 2975 / 3002 loss=2.795, ppl=6.94, wps=5912.6, ups=0.09, wpb=64759, bsz=128, num_updates=8922, lr=9.99366e-05, gnorm=3.96, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101844
2021-06-19 22:56:21 | INFO | train_inner | epoch 003: 2976 / 3002 loss=2.688, ppl=6.44, wps=5754.9, ups=0.09, wpb=64793, bsz=128, num_updates=8923, lr=9.99366e-05, gnorm=5.874, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101855
2021-06-19 22:56:32 | INFO | train_inner | epoch 003: 2977 / 3002 loss=2.721, ppl=6.59, wps=5862.5, ups=0.09, wpb=64816, bsz=128, num_updates=8924, lr=9.99366e-05, gnorm=2.467, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101866
2021-06-19 22:56:43 | INFO | train_inner | epoch 003: 2978 / 3002 loss=2.583, ppl=5.99, wps=5871.2, ups=0.09, wpb=64809, bsz=128, num_updates=8925, lr=9.99366e-05, gnorm=2.524, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101877
2021-06-19 22:56:54 | INFO | train_inner | epoch 003: 2979 / 3002 loss=2.617, ppl=6.14, wps=5864.8, ups=0.09, wpb=64814, bsz=128, num_updates=8926, lr=9.99366e-05, gnorm=2.152, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101888
2021-06-19 22:57:05 | INFO | train_inner | epoch 003: 2980 / 3002 loss=2.604, ppl=6.08, wps=5965, ups=0.09, wpb=64823, bsz=128, num_updates=8927, lr=9.99366e-05, gnorm=2.369, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=101899
2021-06-19 22:57:16 | INFO | train_inner | epoch 003: 2981 / 3002 loss=2.663, ppl=6.33, wps=5875.6, ups=0.09, wpb=64850, bsz=128, num_updates=8928, lr=9.99366e-05, gnorm=2.299, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101910
2021-06-19 22:57:27 | INFO | train_inner | epoch 003: 2982 / 3002 loss=2.634, ppl=6.21, wps=5856.3, ups=0.09, wpb=64819, bsz=128, num_updates=8929, lr=9.99366e-05, gnorm=2.309, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101921
2021-06-19 22:57:38 | INFO | train_inner | epoch 003: 2983 / 3002 loss=2.687, ppl=6.44, wps=5861.8, ups=0.09, wpb=64839, bsz=128, num_updates=8930, lr=9.99366e-05, gnorm=6.849, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101932
2021-06-19 22:57:49 | INFO | train_inner | epoch 003: 2984 / 3002 loss=2.699, ppl=6.49, wps=5917.3, ups=0.09, wpb=64756, bsz=128, num_updates=8931, lr=9.99365e-05, gnorm=8.182, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101943
2021-06-19 22:58:00 | INFO | train_inner | epoch 003: 2985 / 3002 loss=2.527, ppl=5.76, wps=5849.8, ups=0.09, wpb=64791, bsz=128, num_updates=8932, lr=9.99365e-05, gnorm=2.198, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101954
2021-06-19 22:58:11 | INFO | train_inner | epoch 003: 2986 / 3002 loss=2.517, ppl=5.72, wps=5811.5, ups=0.09, wpb=64870, bsz=128, num_updates=8933, lr=9.99365e-05, gnorm=2.299, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101966
2021-06-19 22:58:23 | INFO | train_inner | epoch 003: 2987 / 3002 loss=2.637, ppl=6.22, wps=5744.7, ups=0.09, wpb=64767, bsz=128, num_updates=8934, lr=9.99365e-05, gnorm=4.498, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101977
2021-06-19 22:58:34 | INFO | train_inner | epoch 003: 2988 / 3002 loss=2.538, ppl=5.81, wps=5728.8, ups=0.09, wpb=64728, bsz=128, num_updates=8935, lr=9.99365e-05, gnorm=2.22, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101988
2021-06-19 22:58:45 | INFO | train_inner | epoch 003: 2989 / 3002 loss=2.541, ppl=5.82, wps=5768.6, ups=0.09, wpb=64809, bsz=128, num_updates=8936, lr=9.99365e-05, gnorm=2.311, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=101999
2021-06-19 22:58:56 | INFO | train_inner | epoch 003: 2990 / 3002 loss=2.693, ppl=6.47, wps=5897.1, ups=0.09, wpb=64847, bsz=128, num_updates=8937, lr=9.99365e-05, gnorm=4.031, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102010
2021-06-19 22:59:07 | INFO | train_inner | epoch 003: 2991 / 3002 loss=2.73, ppl=6.63, wps=5740.5, ups=0.09, wpb=64884, bsz=128, num_updates=8938, lr=9.99365e-05, gnorm=2.271, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102022
2021-06-19 22:59:18 | INFO | train_inner | epoch 003: 2992 / 3002 loss=2.605, ppl=6.08, wps=5830, ups=0.09, wpb=64803, bsz=128, num_updates=8939, lr=9.99365e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102033
2021-06-19 22:59:29 | INFO | train_inner | epoch 003: 2993 / 3002 loss=2.722, ppl=6.6, wps=5935.6, ups=0.09, wpb=64823, bsz=128, num_updates=8940, lr=9.99365e-05, gnorm=3.385, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=102044
2021-06-19 22:59:40 | INFO | train_inner | epoch 003: 2994 / 3002 loss=2.772, ppl=6.83, wps=5858.1, ups=0.09, wpb=64848, bsz=128, num_updates=8941, lr=9.99365e-05, gnorm=2.775, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102055
2021-06-19 22:59:52 | INFO | train_inner | epoch 003: 2995 / 3002 loss=2.792, ppl=6.93, wps=5882.4, ups=0.09, wpb=64875, bsz=128, num_updates=8942, lr=9.99365e-05, gnorm=20.74, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102066
2021-06-19 23:00:03 | INFO | train_inner | epoch 003: 2996 / 3002 loss=2.547, ppl=5.84, wps=5780.7, ups=0.09, wpb=64793, bsz=128, num_updates=8943, lr=9.99365e-05, gnorm=2.143, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102077
2021-06-19 23:00:14 | INFO | train_inner | epoch 003: 2997 / 3002 loss=2.694, ppl=6.47, wps=5748.3, ups=0.09, wpb=64779, bsz=128, num_updates=8944, lr=9.99364e-05, gnorm=36.553, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102088
2021-06-19 23:00:25 | INFO | train_inner | epoch 003: 2998 / 3002 loss=2.703, ppl=6.51, wps=5960.5, ups=0.09, wpb=64854, bsz=128, num_updates=8945, lr=9.99364e-05, gnorm=2.163, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=102099
2021-06-19 23:00:36 | INFO | train_inner | epoch 003: 2999 / 3002 loss=2.533, ppl=5.79, wps=5875.5, ups=0.09, wpb=64819, bsz=128, num_updates=8946, lr=9.99364e-05, gnorm=2.165, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102110
2021-06-19 23:00:47 | INFO | train_inner | epoch 003: 3000 / 3002 loss=2.629, ppl=6.19, wps=5885.1, ups=0.09, wpb=64823, bsz=128, num_updates=8947, lr=9.99364e-05, gnorm=2.207, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102121
2021-06-19 23:00:58 | INFO | train_inner | epoch 003: 3001 / 3002 loss=2.635, ppl=6.21, wps=5781.7, ups=0.09, wpb=64897, bsz=128, num_updates=8948, lr=9.99364e-05, gnorm=2.209, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=102132
2021-06-19 23:01:04 | INFO | train_inner | epoch 003: 3002 / 3002 loss=2.67, ppl=6.36, wps=5843.2, ups=0.16, wpb=36454, bsz=72, num_updates=8949, lr=9.99364e-05, gnorm=3.212, loss_scale=0.5, train_wall=6, gb_free=2.8, wall=102139
2021-06-19 23:01:04 | INFO | fairseq_cli.train | begin validation on "valid" subset
2021-06-19 23:16:01 | INFO | valid | epoch 003 | valid on 'valid' subset | loss 2.505 | ppl 5.68 | wps 19673.8 | wpb 506.5 | bsz 1 | num_updates 8949 | best_loss 2.505
2021-06-19 23:16:01 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 3 @ 8949 updates
2021-06-19 23:16:01 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint3.pt
2021-06-19 23:16:13 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint3.pt
2021-06-19 23:21:10 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint3.pt (epoch 3 @ 8949 updates, score 2.505) (writing took 308.19168733900005 seconds)
2021-06-19 23:21:10 | INFO | fairseq_cli.train | end of epoch 3 (average epoch stats below)
2021-06-19 23:21:10 | INFO | train | epoch 003 | loss 2.648 | ppl 6.27 | wps 5606.4 | ups 0.09 | wpb 64819.5 | bsz 128 | num_updates 8949 | lr 9.99364e-05 | gnorm 2.655 | loss_scale 0.5 | train_wall 31896 | gb_free 2.8 | wall 103344
2021-06-19 23:21:10 | INFO | fairseq.trainer | begin training epoch 4
2021-06-19 23:21:10 | INFO | fairseq_cli.train | Start iterating over samples
2021-06-19 23:21:20 | INFO | train_inner | epoch 004: 1 / 3002 loss=2.704, ppl=6.52, wps=53.3, ups=0, wpb=64845, bsz=128, num_updates=8950, lr=9.99364e-05, gnorm=2.272, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103355
2021-06-19 23:21:31 | INFO | train_inner | epoch 004: 2 / 3002 loss=2.657, ppl=6.31, wps=6228.2, ups=0.1, wpb=64758, bsz=128, num_updates=8951, lr=9.99364e-05, gnorm=17.478, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103365
2021-06-19 23:21:41 | INFO | train_inner | epoch 004: 3 / 3002 loss=2.761, ppl=6.78, wps=6278.9, ups=0.1, wpb=64796, bsz=128, num_updates=8952, lr=9.99364e-05, gnorm=11.167, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103376
2021-06-19 23:21:52 | INFO | train_inner | epoch 004: 4 / 3002 loss=2.732, ppl=6.65, wps=6087.7, ups=0.09, wpb=64764, bsz=128, num_updates=8953, lr=9.99364e-05, gnorm=2.747, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103386
2021-06-19 23:22:03 | INFO | train_inner | epoch 004: 5 / 3002 loss=2.562, ppl=5.9, wps=6007.8, ups=0.09, wpb=64775, bsz=128, num_updates=8954, lr=9.99364e-05, gnorm=2.415, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103397
2021-06-19 23:22:14 | INFO | train_inner | epoch 004: 6 / 3002 loss=2.595, ppl=6.04, wps=5963.8, ups=0.09, wpb=64830, bsz=128, num_updates=8955, lr=9.99364e-05, gnorm=2.41, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103408
2021-06-19 23:22:24 | INFO | train_inner | epoch 004: 7 / 3002 loss=2.74, ppl=6.68, wps=6054.6, ups=0.09, wpb=64850, bsz=128, num_updates=8956, lr=9.99363e-05, gnorm=2.373, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103419
2021-06-19 23:22:35 | INFO | train_inner | epoch 004: 8 / 3002 loss=2.688, ppl=6.44, wps=6159.6, ups=0.09, wpb=64889, bsz=128, num_updates=8957, lr=9.99363e-05, gnorm=2.358, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103429
2021-06-19 23:22:46 | INFO | train_inner | epoch 004: 9 / 3002 loss=2.574, ppl=5.96, wps=5979.8, ups=0.09, wpb=64797, bsz=128, num_updates=8958, lr=9.99363e-05, gnorm=2.197, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103440
2021-06-19 23:22:56 | INFO | train_inner | epoch 004: 10 / 3002 loss=2.754, ppl=6.75, wps=5992.8, ups=0.09, wpb=64814, bsz=128, num_updates=8959, lr=9.99363e-05, gnorm=2.367, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103451
2021-06-19 23:23:07 | INFO | train_inner | epoch 004: 11 / 3002 loss=2.68, ppl=6.41, wps=6002.1, ups=0.09, wpb=64840, bsz=128, num_updates=8960, lr=9.99363e-05, gnorm=2.258, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103462
2021-06-19 23:23:18 | INFO | train_inner | epoch 004: 12 / 3002 loss=2.703, ppl=6.51, wps=5917, ups=0.09, wpb=64796, bsz=128, num_updates=8961, lr=9.99363e-05, gnorm=2.211, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103472
2021-06-19 23:23:29 | INFO | train_inner | epoch 004: 13 / 3002 loss=2.534, ppl=5.79, wps=5939.8, ups=0.09, wpb=64838, bsz=128, num_updates=8962, lr=9.99363e-05, gnorm=2.314, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103483
2021-06-19 23:23:40 | INFO | train_inner | epoch 004: 14 / 3002 loss=2.704, ppl=6.52, wps=5727.8, ups=0.09, wpb=64776, bsz=128, num_updates=8963, lr=9.99363e-05, gnorm=2.399, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103495
2021-06-19 23:23:51 | INFO | train_inner | epoch 004: 15 / 3002 loss=2.549, ppl=5.85, wps=5983.9, ups=0.09, wpb=64925, bsz=128, num_updates=8964, lr=9.99363e-05, gnorm=2.398, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=103506
2021-06-19 23:24:02 | INFO | train_inner | epoch 004: 16 / 3002 loss=2.569, ppl=5.94, wps=5903.2, ups=0.09, wpb=64911, bsz=128, num_updates=8965, lr=9.99363e-05, gnorm=2.15, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103517
2021-06-19 23:24:14 | INFO | train_inner | epoch 004: 17 / 3002 loss=2.681, ppl=6.41, wps=5720.6, ups=0.09, wpb=64762, bsz=128, num_updates=8966, lr=9.99363e-05, gnorm=2.183, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103528
2021-06-19 23:24:25 | INFO | train_inner | epoch 004: 18 / 3002 loss=2.724, ppl=6.61, wps=5835.4, ups=0.09, wpb=64889, bsz=128, num_updates=8967, lr=9.99363e-05, gnorm=2.292, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=103539
2021-06-19 23:24:36 | INFO | train_inner | epoch 004: 19 / 3002 loss=2.623, ppl=6.16, wps=5811.9, ups=0.09, wpb=64871, bsz=128, num_updates=8968, lr=9.99363e-05, gnorm=2.377, loss_scale=1, train_wall=11, gb_free=2.8, wall=103550
2021-06-19 23:24:47 | INFO | train_inner | epoch 004: 20 / 3002 loss=2.557, ppl=5.88, wps=5712.8, ups=0.09, wpb=64884, bsz=128, num_updates=8969, lr=9.99362e-05, gnorm=11.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=103562
2021-06-19 23:24:58 | INFO | train_inner | epoch 004: 21 / 3002 loss=2.473, ppl=5.55, wps=5813.1, ups=0.09, wpb=64800, bsz=128, num_updates=8970, lr=9.99362e-05, gnorm=2.146, loss_scale=1, train_wall=11, gb_free=2.8, wall=103573
2021-06-19 23:25:09 | INFO | train_inner | epoch 004: 22 / 3002 loss=2.652, ppl=6.28, wps=5928.3, ups=0.09, wpb=64820, bsz=128, num_updates=8971, lr=9.99362e-05, gnorm=2.104, loss_scale=1, train_wall=10, gb_free=2.8, wall=103584
2021-06-19 23:25:20 | INFO | train_inner | epoch 004: 23 / 3002 loss=2.743, ppl=6.7, wps=5899.2, ups=0.09, wpb=64812, bsz=128, num_updates=8972, lr=9.99362e-05, gnorm=2.312, loss_scale=1, train_wall=11, gb_free=2.8, wall=103595
2021-06-19 23:25:31 | INFO | train_inner | epoch 004: 24 / 3002 loss=2.755, ppl=6.75, wps=5837, ups=0.09, wpb=64794, bsz=128, num_updates=8973, lr=9.99362e-05, gnorm=2.32, loss_scale=1, train_wall=11, gb_free=2.8, wall=103606
2021-06-19 23:25:42 | INFO | train_inner | epoch 004: 25 / 3002 loss=2.61, ppl=6.1, wps=5884.8, ups=0.09, wpb=64806, bsz=128, num_updates=8974, lr=9.99362e-05, gnorm=2.151, loss_scale=1, train_wall=11, gb_free=2.8, wall=103617
2021-06-19 23:25:53 | INFO | train_inner | epoch 004: 26 / 3002 loss=2.685, ppl=6.43, wps=5828.6, ups=0.09, wpb=64852, bsz=128, num_updates=8975, lr=9.99362e-05, gnorm=2.948, loss_scale=1, train_wall=11, gb_free=2.8, wall=103628
2021-06-19 23:26:04 | INFO | train_inner | epoch 004: 27 / 3002 loss=2.609, ppl=6.1, wps=5943.7, ups=0.09, wpb=64850, bsz=128, num_updates=8976, lr=9.99362e-05, gnorm=2.107, loss_scale=1, train_wall=10, gb_free=2.8, wall=103639
2021-06-19 23:26:15 | INFO | train_inner | epoch 004: 28 / 3002 loss=2.569, ppl=5.94, wps=5992.1, ups=0.09, wpb=64737, bsz=128, num_updates=8977, lr=9.99362e-05, gnorm=2.354, loss_scale=1, train_wall=10, gb_free=2.8, wall=103650
2021-06-19 23:26:26 | INFO | train_inner | epoch 004: 29 / 3002 loss=2.659, ppl=6.32, wps=5800.5, ups=0.09, wpb=64834, bsz=128, num_updates=8978, lr=9.99362e-05, gnorm=2.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=103661
2021-06-19 23:26:38 | INFO | train_inner | epoch 004: 30 / 3002 loss=2.661, ppl=6.32, wps=5790.3, ups=0.09, wpb=64830, bsz=128, num_updates=8979, lr=9.99362e-05, gnorm=2.178, loss_scale=1, train_wall=11, gb_free=2.8, wall=103672
2021-06-19 23:26:49 | INFO | train_inner | epoch 004: 31 / 3002 loss=2.704, ppl=6.52, wps=5784.4, ups=0.09, wpb=64768, bsz=128, num_updates=8980, lr=9.99362e-05, gnorm=2.212, loss_scale=1, train_wall=11, gb_free=2.8, wall=103683
2021-06-19 23:27:00 | INFO | train_inner | epoch 004: 32 / 3002 loss=2.588, ppl=6.01, wps=5953.8, ups=0.09, wpb=64831, bsz=128, num_updates=8981, lr=9.99361e-05, gnorm=2.253, loss_scale=1, train_wall=10, gb_free=2.8, wall=103694
2021-06-19 23:27:11 | INFO | train_inner | epoch 004: 33 / 3002 loss=2.642, ppl=6.24, wps=5842.8, ups=0.09, wpb=64864, bsz=128, num_updates=8982, lr=9.99361e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=103705
2021-06-19 23:27:22 | INFO | train_inner | epoch 004: 34 / 3002 loss=2.707, ppl=6.53, wps=5930.1, ups=0.09, wpb=64865, bsz=128, num_updates=8983, lr=9.99361e-05, gnorm=2.485, loss_scale=1, train_wall=10, gb_free=2.8, wall=103716
2021-06-19 23:27:33 | INFO | train_inner | epoch 004: 35 / 3002 loss=2.673, ppl=6.38, wps=5744.8, ups=0.09, wpb=64829, bsz=128, num_updates=8984, lr=9.99361e-05, gnorm=2.32, loss_scale=1, train_wall=11, gb_free=2.8, wall=103727
2021-06-19 23:27:44 | INFO | train_inner | epoch 004: 36 / 3002 loss=2.61, ppl=6.1, wps=5910.8, ups=0.09, wpb=64806, bsz=128, num_updates=8985, lr=9.99361e-05, gnorm=6.062, loss_scale=1, train_wall=10, gb_free=2.8, wall=103738
2021-06-19 23:27:55 | INFO | train_inner | epoch 004: 37 / 3002 loss=2.611, ppl=6.11, wps=5885.2, ups=0.09, wpb=64792, bsz=128, num_updates=8986, lr=9.99361e-05, gnorm=2.155, loss_scale=1, train_wall=11, gb_free=2.8, wall=103749
2021-06-19 23:28:06 | INFO | train_inner | epoch 004: 38 / 3002 loss=2.684, ppl=6.42, wps=5885.6, ups=0.09, wpb=64885, bsz=128, num_updates=8987, lr=9.99361e-05, gnorm=2.193, loss_scale=1, train_wall=11, gb_free=2.8, wall=103760
2021-06-19 23:28:17 | INFO | train_inner | epoch 004: 39 / 3002 loss=2.709, ppl=6.54, wps=5847.9, ups=0.09, wpb=64746, bsz=128, num_updates=8988, lr=9.99361e-05, gnorm=2.298, loss_scale=1, train_wall=11, gb_free=2.8, wall=103771
2021-06-19 23:28:28 | INFO | train_inner | epoch 004: 40 / 3002 loss=2.635, ppl=6.21, wps=5870, ups=0.09, wpb=64778, bsz=128, num_updates=8989, lr=9.99361e-05, gnorm=2.321, loss_scale=1, train_wall=11, gb_free=2.8, wall=103782
2021-06-19 23:28:39 | INFO | train_inner | epoch 004: 41 / 3002 loss=2.833, ppl=7.13, wps=5982.6, ups=0.09, wpb=64863, bsz=128, num_updates=8990, lr=9.99361e-05, gnorm=2.24, loss_scale=1, train_wall=10, gb_free=2.8, wall=103793
2021-06-19 23:28:50 | INFO | train_inner | epoch 004: 42 / 3002 loss=2.596, ppl=6.05, wps=5855.6, ups=0.09, wpb=64923, bsz=128, num_updates=8991, lr=9.99361e-05, gnorm=2.131, loss_scale=1, train_wall=11, gb_free=2.8, wall=103804
2021-06-19 23:29:01 | INFO | train_inner | epoch 004: 43 / 3002 loss=2.736, ppl=6.66, wps=5802.2, ups=0.09, wpb=64865, bsz=128, num_updates=8992, lr=9.99361e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=103816
2021-06-19 23:29:12 | INFO | train_inner | epoch 004: 44 / 3002 loss=2.784, ppl=6.89, wps=5959.9, ups=0.09, wpb=64924, bsz=128, num_updates=8993, lr=9.99361e-05, gnorm=12.927, loss_scale=1, train_wall=10, gb_free=2.8, wall=103826
2021-06-19 23:29:23 | INFO | train_inner | epoch 004: 45 / 3002 loss=2.467, ppl=5.53, wps=6011, ups=0.09, wpb=64848, bsz=128, num_updates=8994, lr=9.9936e-05, gnorm=2.311, loss_scale=1, train_wall=10, gb_free=2.8, wall=103837
2021-06-19 23:29:34 | INFO | train_inner | epoch 004: 46 / 3002 loss=2.762, ppl=6.79, wps=5888, ups=0.09, wpb=64839, bsz=128, num_updates=8995, lr=9.9936e-05, gnorm=2.434, loss_scale=1, train_wall=11, gb_free=2.8, wall=103848
2021-06-19 23:29:45 | INFO | train_inner | epoch 004: 47 / 3002 loss=2.792, ppl=6.93, wps=5835.2, ups=0.09, wpb=64775, bsz=128, num_updates=8996, lr=9.9936e-05, gnorm=2.978, loss_scale=1, train_wall=11, gb_free=2.8, wall=103859
2021-06-19 23:29:56 | INFO | train_inner | epoch 004: 48 / 3002 loss=2.672, ppl=6.37, wps=5818.5, ups=0.09, wpb=64754, bsz=128, num_updates=8997, lr=9.9936e-05, gnorm=2.188, loss_scale=1, train_wall=11, gb_free=2.8, wall=103870
2021-06-19 23:30:07 | INFO | train_inner | epoch 004: 49 / 3002 loss=2.523, ppl=5.75, wps=5777.6, ups=0.09, wpb=64546, bsz=128, num_updates=8998, lr=9.9936e-05, gnorm=2.236, loss_scale=1, train_wall=11, gb_free=2.8, wall=103882
2021-06-19 23:30:18 | INFO | train_inner | epoch 004: 50 / 3002 loss=2.663, ppl=6.33, wps=5898.3, ups=0.09, wpb=64798, bsz=128, num_updates=8999, lr=9.9936e-05, gnorm=2.998, loss_scale=1, train_wall=11, gb_free=2.8, wall=103893
2021-06-19 23:30:29 | INFO | train_inner | epoch 004: 51 / 3002 loss=2.504, ppl=5.67, wps=5965, ups=0.09, wpb=64928, bsz=128, num_updates=9000, lr=9.9936e-05, gnorm=2.26, loss_scale=1, train_wall=10, gb_free=2.8, wall=103904
2021-06-19 23:30:40 | INFO | train_inner | epoch 004: 52 / 3002 loss=2.743, ppl=6.69, wps=5950.2, ups=0.09, wpb=64825, bsz=128, num_updates=9001, lr=9.9936e-05, gnorm=2.215, loss_scale=1, train_wall=10, gb_free=2.8, wall=103914
2021-06-19 23:30:51 | INFO | train_inner | epoch 004: 53 / 3002 loss=2.736, ppl=6.66, wps=5819.2, ups=0.09, wpb=64713, bsz=128, num_updates=9002, lr=9.9936e-05, gnorm=2.547, loss_scale=1, train_wall=11, gb_free=2.8, wall=103926
2021-06-19 23:31:02 | INFO | train_inner | epoch 004: 54 / 3002 loss=2.729, ppl=6.63, wps=5795.7, ups=0.09, wpb=64788, bsz=128, num_updates=9003, lr=9.9936e-05, gnorm=2.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=103937
2021-06-19 23:31:13 | INFO | train_inner | epoch 004: 55 / 3002 loss=2.659, ppl=6.32, wps=5883.5, ups=0.09, wpb=64792, bsz=128, num_updates=9004, lr=9.9936e-05, gnorm=3.554, loss_scale=1, train_wall=11, gb_free=2.8, wall=103948
2021-06-19 23:31:24 | INFO | train_inner | epoch 004: 56 / 3002 loss=2.613, ppl=6.12, wps=5943.2, ups=0.09, wpb=64840, bsz=128, num_updates=9005, lr=9.9936e-05, gnorm=2.234, loss_scale=1, train_wall=10, gb_free=2.8, wall=103959
2021-06-19 23:31:36 | INFO | train_inner | epoch 004: 57 / 3002 loss=2.649, ppl=6.27, wps=5720.6, ups=0.09, wpb=64776, bsz=128, num_updates=9006, lr=9.99359e-05, gnorm=2.113, loss_scale=1, train_wall=11, gb_free=2.8, wall=103970
2021-06-19 23:31:47 | INFO | train_inner | epoch 004: 58 / 3002 loss=2.579, ppl=5.97, wps=5821, ups=0.09, wpb=64847, bsz=128, num_updates=9007, lr=9.99359e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=103981
2021-06-19 23:31:58 | INFO | train_inner | epoch 004: 59 / 3002 loss=2.592, ppl=6.03, wps=5971, ups=0.09, wpb=64885, bsz=128, num_updates=9008, lr=9.99359e-05, gnorm=2.23, loss_scale=1, train_wall=10, gb_free=2.8, wall=103992
2021-06-19 23:32:09 | INFO | train_inner | epoch 004: 60 / 3002 loss=2.738, ppl=6.67, wps=5813.2, ups=0.09, wpb=64837, bsz=128, num_updates=9009, lr=9.99359e-05, gnorm=2.197, loss_scale=1, train_wall=11, gb_free=2.8, wall=104003
2021-06-19 23:32:20 | INFO | train_inner | epoch 004: 61 / 3002 loss=2.587, ppl=6.01, wps=5803.4, ups=0.09, wpb=64829, bsz=128, num_updates=9010, lr=9.99359e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=104014
2021-06-19 23:32:31 | INFO | train_inner | epoch 004: 62 / 3002 loss=2.631, ppl=6.2, wps=5834.9, ups=0.09, wpb=64811, bsz=128, num_updates=9011, lr=9.99359e-05, gnorm=2.767, loss_scale=1, train_wall=11, gb_free=2.8, wall=104025
2021-06-19 23:32:42 | INFO | train_inner | epoch 004: 63 / 3002 loss=2.482, ppl=5.59, wps=6025.2, ups=0.09, wpb=64915, bsz=128, num_updates=9012, lr=9.99359e-05, gnorm=2.68, loss_scale=1, train_wall=10, gb_free=2.8, wall=104036
2021-06-19 23:32:53 | INFO | train_inner | epoch 004: 64 / 3002 loss=2.57, ppl=5.94, wps=5924.3, ups=0.09, wpb=64960, bsz=128, num_updates=9013, lr=9.99359e-05, gnorm=2.065, loss_scale=1, train_wall=11, gb_free=2.8, wall=104047
2021-06-19 23:33:04 | INFO | train_inner | epoch 004: 65 / 3002 loss=2.584, ppl=6, wps=5907.4, ups=0.09, wpb=64842, bsz=128, num_updates=9014, lr=9.99359e-05, gnorm=2.771, loss_scale=1, train_wall=11, gb_free=2.8, wall=104058
2021-06-19 23:33:15 | INFO | train_inner | epoch 004: 66 / 3002 loss=2.705, ppl=6.52, wps=5804.2, ups=0.09, wpb=64813, bsz=128, num_updates=9015, lr=9.99359e-05, gnorm=2.197, loss_scale=1, train_wall=11, gb_free=2.8, wall=104069
2021-06-19 23:33:26 | INFO | train_inner | epoch 004: 67 / 3002 loss=2.616, ppl=6.13, wps=5820.2, ups=0.09, wpb=64839, bsz=128, num_updates=9016, lr=9.99359e-05, gnorm=2.226, loss_scale=1, train_wall=11, gb_free=2.8, wall=104080
2021-06-19 23:33:37 | INFO | train_inner | epoch 004: 68 / 3002 loss=2.64, ppl=6.24, wps=5885.3, ups=0.09, wpb=64824, bsz=128, num_updates=9017, lr=9.99359e-05, gnorm=2.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=104091
2021-06-19 23:33:48 | INFO | train_inner | epoch 004: 69 / 3002 loss=2.647, ppl=6.26, wps=5984.6, ups=0.09, wpb=64889, bsz=128, num_updates=9018, lr=9.99359e-05, gnorm=2.268, loss_scale=1, train_wall=10, gb_free=2.8, wall=104102
2021-06-19 23:33:59 | INFO | train_inner | epoch 004: 70 / 3002 loss=2.564, ppl=5.91, wps=5834, ups=0.09, wpb=64866, bsz=128, num_updates=9019, lr=9.99358e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=104113
2021-06-19 23:34:10 | INFO | train_inner | epoch 004: 71 / 3002 loss=2.599, ppl=6.06, wps=5825.9, ups=0.09, wpb=64460, bsz=128, num_updates=9020, lr=9.99358e-05, gnorm=3.296, loss_scale=1, train_wall=11, gb_free=2.8, wall=104124
2021-06-19 23:34:21 | INFO | train_inner | epoch 004: 72 / 3002 loss=2.638, ppl=6.23, wps=5876.9, ups=0.09, wpb=64916, bsz=128, num_updates=9021, lr=9.99358e-05, gnorm=2.119, loss_scale=1, train_wall=11, gb_free=2.8, wall=104136
2021-06-19 23:34:32 | INFO | train_inner | epoch 004: 73 / 3002 loss=2.621, ppl=6.15, wps=5805.6, ups=0.09, wpb=64835, bsz=128, num_updates=9022, lr=9.99358e-05, gnorm=2.827, loss_scale=1, train_wall=11, gb_free=2.8, wall=104147
2021-06-19 23:34:43 | INFO | train_inner | epoch 004: 74 / 3002 loss=2.672, ppl=6.37, wps=5834.4, ups=0.09, wpb=64833, bsz=128, num_updates=9023, lr=9.99358e-05, gnorm=2.164, loss_scale=1, train_wall=11, gb_free=2.8, wall=104158
2021-06-19 23:34:55 | INFO | train_inner | epoch 004: 75 / 3002 loss=2.58, ppl=5.98, wps=5818.5, ups=0.09, wpb=64847, bsz=128, num_updates=9024, lr=9.99358e-05, gnorm=2.166, loss_scale=1, train_wall=11, gb_free=2.8, wall=104169
2021-06-19 23:35:06 | INFO | train_inner | epoch 004: 76 / 3002 loss=2.666, ppl=6.35, wps=5855.4, ups=0.09, wpb=64854, bsz=128, num_updates=9025, lr=9.99358e-05, gnorm=2.709, loss_scale=1, train_wall=11, gb_free=2.8, wall=104180
2021-06-19 23:35:17 | INFO | train_inner | epoch 004: 77 / 3002 loss=2.611, ppl=6.11, wps=5907, ups=0.09, wpb=64813, bsz=128, num_updates=9026, lr=9.99358e-05, gnorm=2.272, loss_scale=1, train_wall=10, gb_free=2.8, wall=104191
2021-06-19 23:35:28 | INFO | train_inner | epoch 004: 78 / 3002 loss=2.618, ppl=6.14, wps=5913.1, ups=0.09, wpb=64852, bsz=128, num_updates=9027, lr=9.99358e-05, gnorm=2.327, loss_scale=1, train_wall=10, gb_free=2.8, wall=104202
2021-06-19 23:35:39 | INFO | train_inner | epoch 004: 79 / 3002 loss=2.578, ppl=5.97, wps=5776.6, ups=0.09, wpb=64841, bsz=128, num_updates=9028, lr=9.99358e-05, gnorm=2.404, loss_scale=1, train_wall=11, gb_free=2.8, wall=104213
2021-06-19 23:35:50 | INFO | train_inner | epoch 004: 80 / 3002 loss=2.686, ppl=6.44, wps=5825, ups=0.09, wpb=64767, bsz=128, num_updates=9029, lr=9.99358e-05, gnorm=2.307, loss_scale=1, train_wall=11, gb_free=2.8, wall=104224
2021-06-19 23:36:01 | INFO | train_inner | epoch 004: 81 / 3002 loss=2.463, ppl=5.51, wps=5915.6, ups=0.09, wpb=64910, bsz=128, num_updates=9030, lr=9.99358e-05, gnorm=2.507, loss_scale=1, train_wall=11, gb_free=2.8, wall=104235
2021-06-19 23:36:12 | INFO | train_inner | epoch 004: 82 / 3002 loss=2.645, ppl=6.26, wps=5860.8, ups=0.09, wpb=64710, bsz=128, num_updates=9031, lr=9.99357e-05, gnorm=2.244, loss_scale=1, train_wall=11, gb_free=2.8, wall=104246
2021-06-19 23:36:23 | INFO | train_inner | epoch 004: 83 / 3002 loss=2.651, ppl=6.28, wps=5885.5, ups=0.09, wpb=64871, bsz=128, num_updates=9032, lr=9.99357e-05, gnorm=2.154, loss_scale=1, train_wall=11, gb_free=2.8, wall=104257
2021-06-19 23:36:34 | INFO | train_inner | epoch 004: 84 / 3002 loss=2.63, ppl=6.19, wps=5884.1, ups=0.09, wpb=64825, bsz=128, num_updates=9033, lr=9.99357e-05, gnorm=2.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=104268
2021-06-19 23:36:45 | INFO | train_inner | epoch 004: 85 / 3002 loss=2.674, ppl=6.38, wps=5810.3, ups=0.09, wpb=64797, bsz=128, num_updates=9034, lr=9.99357e-05, gnorm=2.273, loss_scale=1, train_wall=11, gb_free=2.8, wall=104280
2021-06-19 23:36:56 | INFO | train_inner | epoch 004: 86 / 3002 loss=2.588, ppl=6.01, wps=5899.6, ups=0.09, wpb=64930, bsz=128, num_updates=9035, lr=9.99357e-05, gnorm=2.273, loss_scale=1, train_wall=11, gb_free=2.8, wall=104291
2021-06-19 23:37:07 | INFO | train_inner | epoch 004: 87 / 3002 loss=2.638, ppl=6.23, wps=5844.8, ups=0.09, wpb=64734, bsz=128, num_updates=9036, lr=9.99357e-05, gnorm=2.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=104302
2021-06-19 23:37:18 | INFO | train_inner | epoch 004: 88 / 3002 loss=2.697, ppl=6.48, wps=5892.6, ups=0.09, wpb=64854, bsz=128, num_updates=9037, lr=9.99357e-05, gnorm=2.477, loss_scale=1, train_wall=11, gb_free=2.8, wall=104313
2021-06-19 23:37:30 | INFO | train_inner | epoch 004: 89 / 3002 loss=2.493, ppl=5.63, wps=5700, ups=0.09, wpb=64754, bsz=128, num_updates=9038, lr=9.99357e-05, gnorm=2.394, loss_scale=1, train_wall=11, gb_free=2.8, wall=104324
2021-06-19 23:37:41 | INFO | train_inner | epoch 004: 90 / 3002 loss=2.611, ppl=6.11, wps=5893.5, ups=0.09, wpb=64728, bsz=128, num_updates=9039, lr=9.99357e-05, gnorm=2.143, loss_scale=1, train_wall=11, gb_free=2.8, wall=104335
2021-06-19 23:37:52 | INFO | train_inner | epoch 004: 91 / 3002 loss=2.637, ppl=6.22, wps=5931.5, ups=0.09, wpb=64836, bsz=128, num_updates=9040, lr=9.99357e-05, gnorm=2.564, loss_scale=1, train_wall=10, gb_free=2.8, wall=104346
2021-06-19 23:38:03 | INFO | train_inner | epoch 004: 92 / 3002 loss=2.548, ppl=5.85, wps=5825.9, ups=0.09, wpb=64875, bsz=128, num_updates=9041, lr=9.99357e-05, gnorm=2.31, loss_scale=1, train_wall=11, gb_free=2.8, wall=104357
2021-06-19 23:38:14 | INFO | train_inner | epoch 004: 93 / 3002 loss=2.735, ppl=6.66, wps=5810.6, ups=0.09, wpb=64850, bsz=128, num_updates=9042, lr=9.99357e-05, gnorm=2.266, loss_scale=1, train_wall=11, gb_free=2.8, wall=104368
2021-06-19 23:38:25 | INFO | train_inner | epoch 004: 94 / 3002 loss=2.518, ppl=5.73, wps=5943.5, ups=0.09, wpb=64934, bsz=128, num_updates=9043, lr=9.99357e-05, gnorm=2.146, loss_scale=1, train_wall=10, gb_free=2.8, wall=104379
2021-06-19 23:38:36 | INFO | train_inner | epoch 004: 95 / 3002 loss=2.767, ppl=6.81, wps=5851.2, ups=0.09, wpb=64859, bsz=128, num_updates=9044, lr=9.99356e-05, gnorm=6.565, loss_scale=1, train_wall=11, gb_free=2.8, wall=104390
2021-06-19 23:38:47 | INFO | train_inner | epoch 004: 96 / 3002 loss=2.661, ppl=6.32, wps=5979.7, ups=0.09, wpb=64927, bsz=128, num_updates=9045, lr=9.99356e-05, gnorm=2.947, loss_scale=1, train_wall=10, gb_free=2.8, wall=104401
2021-06-19 23:38:58 | INFO | train_inner | epoch 004: 97 / 3002 loss=2.526, ppl=5.76, wps=5887.9, ups=0.09, wpb=64885, bsz=128, num_updates=9046, lr=9.99356e-05, gnorm=2.259, loss_scale=1, train_wall=11, gb_free=2.8, wall=104412
2021-06-19 23:39:09 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-19 23:39:20 | INFO | train_inner | epoch 004: 99 / 3002 loss=2.673, ppl=6.38, wps=2950.5, ups=0.05, wpb=64798, bsz=128, num_updates=9047, lr=9.99356e-05, gnorm=2.185, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=104434
2021-06-19 23:39:31 | INFO | train_inner | epoch 004: 100 / 3002 loss=2.627, ppl=6.18, wps=5856.1, ups=0.09, wpb=64839, bsz=128, num_updates=9048, lr=9.99356e-05, gnorm=3.124, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104445
2021-06-19 23:39:42 | INFO | train_inner | epoch 004: 101 / 3002 loss=2.425, ppl=5.37, wps=5803.3, ups=0.09, wpb=64815, bsz=128, num_updates=9049, lr=9.99356e-05, gnorm=16.203, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104456
2021-06-19 23:39:53 | INFO | train_inner | epoch 004: 102 / 3002 loss=2.598, ppl=6.05, wps=5908.5, ups=0.09, wpb=64814, bsz=128, num_updates=9050, lr=9.99356e-05, gnorm=2.317, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104467
2021-06-19 23:40:04 | INFO | train_inner | epoch 004: 103 / 3002 loss=2.471, ppl=5.55, wps=5957.5, ups=0.09, wpb=64835, bsz=128, num_updates=9051, lr=9.99356e-05, gnorm=2.024, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104478
2021-06-19 23:40:15 | INFO | train_inner | epoch 004: 104 / 3002 loss=2.588, ppl=6.01, wps=5965.8, ups=0.09, wpb=64898, bsz=128, num_updates=9052, lr=9.99356e-05, gnorm=2.102, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104489
2021-06-19 23:40:26 | INFO | train_inner | epoch 004: 105 / 3002 loss=2.546, ppl=5.84, wps=5953.7, ups=0.09, wpb=64871, bsz=128, num_updates=9053, lr=9.99356e-05, gnorm=2.088, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104500
2021-06-19 23:40:37 | INFO | train_inner | epoch 004: 106 / 3002 loss=2.502, ppl=5.66, wps=5791.6, ups=0.09, wpb=64822, bsz=128, num_updates=9054, lr=9.99356e-05, gnorm=2.223, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104511
2021-06-19 23:40:48 | INFO | train_inner | epoch 004: 107 / 3002 loss=2.688, ppl=6.44, wps=5897.9, ups=0.09, wpb=64801, bsz=128, num_updates=9055, lr=9.99356e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104522
2021-06-19 23:40:59 | INFO | train_inner | epoch 004: 108 / 3002 loss=2.759, ppl=6.77, wps=5925.5, ups=0.09, wpb=64812, bsz=128, num_updates=9056, lr=9.99355e-05, gnorm=2.251, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104533
2021-06-19 23:41:10 | INFO | train_inner | epoch 004: 109 / 3002 loss=2.573, ppl=5.95, wps=5910.6, ups=0.09, wpb=64782, bsz=128, num_updates=9057, lr=9.99355e-05, gnorm=2.107, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104544
2021-06-19 23:41:21 | INFO | train_inner | epoch 004: 110 / 3002 loss=2.521, ppl=5.74, wps=5906.5, ups=0.09, wpb=64878, bsz=128, num_updates=9058, lr=9.99355e-05, gnorm=2.31, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104555
2021-06-19 23:41:32 | INFO | train_inner | epoch 004: 111 / 3002 loss=2.785, ppl=6.89, wps=5829.2, ups=0.09, wpb=64817, bsz=128, num_updates=9059, lr=9.99355e-05, gnorm=2.246, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104566
2021-06-19 23:41:43 | INFO | train_inner | epoch 004: 112 / 3002 loss=2.537, ppl=5.8, wps=5844.3, ups=0.09, wpb=64856, bsz=128, num_updates=9060, lr=9.99355e-05, gnorm=2.294, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104577
2021-06-19 23:41:54 | INFO | train_inner | epoch 004: 113 / 3002 loss=2.569, ppl=5.94, wps=5912, ups=0.09, wpb=64830, bsz=128, num_updates=9061, lr=9.99355e-05, gnorm=2.259, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104588
2021-06-19 23:42:05 | INFO | train_inner | epoch 004: 114 / 3002 loss=2.851, ppl=7.21, wps=5894.1, ups=0.09, wpb=64829, bsz=128, num_updates=9062, lr=9.99355e-05, gnorm=2.429, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104599
2021-06-19 23:42:16 | INFO | train_inner | epoch 004: 115 / 3002 loss=2.477, ppl=5.57, wps=5812.8, ups=0.09, wpb=64912, bsz=128, num_updates=9063, lr=9.99355e-05, gnorm=3.431, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104610
2021-06-19 23:42:27 | INFO | train_inner | epoch 004: 116 / 3002 loss=2.738, ppl=6.67, wps=5960.9, ups=0.09, wpb=64851, bsz=128, num_updates=9064, lr=9.99355e-05, gnorm=3.455, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104621
2021-06-19 23:42:38 | INFO | train_inner | epoch 004: 117 / 3002 loss=2.501, ppl=5.66, wps=5723.4, ups=0.09, wpb=64832, bsz=128, num_updates=9065, lr=9.99355e-05, gnorm=2.389, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104633
2021-06-19 23:42:49 | INFO | train_inner | epoch 004: 118 / 3002 loss=2.518, ppl=5.73, wps=5918.6, ups=0.09, wpb=64902, bsz=128, num_updates=9066, lr=9.99355e-05, gnorm=3.039, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104643
2021-06-19 23:43:00 | INFO | train_inner | epoch 004: 119 / 3002 loss=2.645, ppl=6.25, wps=5943.3, ups=0.09, wpb=64893, bsz=128, num_updates=9067, lr=9.99355e-05, gnorm=2.128, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104654
2021-06-19 23:43:11 | INFO | train_inner | epoch 004: 120 / 3002 loss=2.515, ppl=5.72, wps=5834.2, ups=0.09, wpb=64871, bsz=128, num_updates=9068, lr=9.99355e-05, gnorm=74.549, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104666
2021-06-19 23:43:22 | INFO | train_inner | epoch 004: 121 / 3002 loss=2.523, ppl=5.75, wps=5768.1, ups=0.09, wpb=64816, bsz=128, num_updates=9069, lr=9.99354e-05, gnorm=2.278, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104677
2021-06-19 23:43:33 | INFO | train_inner | epoch 004: 122 / 3002 loss=2.681, ppl=6.41, wps=5937.9, ups=0.09, wpb=64908, bsz=128, num_updates=9070, lr=9.99354e-05, gnorm=3.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104688
2021-06-19 23:43:44 | INFO | train_inner | epoch 004: 123 / 3002 loss=2.614, ppl=6.12, wps=5871.5, ups=0.09, wpb=64804, bsz=128, num_updates=9071, lr=9.99354e-05, gnorm=2.481, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104699
2021-06-19 23:43:55 | INFO | train_inner | epoch 004: 124 / 3002 loss=2.618, ppl=6.14, wps=5854.3, ups=0.09, wpb=64825, bsz=128, num_updates=9072, lr=9.99354e-05, gnorm=2.554, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104710
2021-06-19 23:44:07 | INFO | train_inner | epoch 004: 125 / 3002 loss=2.66, ppl=6.32, wps=5804.8, ups=0.09, wpb=64793, bsz=128, num_updates=9073, lr=9.99354e-05, gnorm=2.86, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104721
2021-06-19 23:44:17 | INFO | train_inner | epoch 004: 126 / 3002 loss=2.749, ppl=6.72, wps=5992, ups=0.09, wpb=64830, bsz=128, num_updates=9074, lr=9.99354e-05, gnorm=3.559, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104732
2021-06-19 23:44:29 | INFO | train_inner | epoch 004: 127 / 3002 loss=2.695, ppl=6.47, wps=5759.5, ups=0.09, wpb=64802, bsz=128, num_updates=9075, lr=9.99354e-05, gnorm=3.054, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104743
2021-06-19 23:44:40 | INFO | train_inner | epoch 004: 128 / 3002 loss=2.724, ppl=6.61, wps=5820.2, ups=0.09, wpb=64815, bsz=128, num_updates=9076, lr=9.99354e-05, gnorm=2.782, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104754
2021-06-19 23:44:51 | INFO | train_inner | epoch 004: 129 / 3002 loss=2.654, ppl=6.3, wps=5835, ups=0.09, wpb=64829, bsz=128, num_updates=9077, lr=9.99354e-05, gnorm=2.752, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104765
2021-06-19 23:45:02 | INFO | train_inner | epoch 004: 130 / 3002 loss=2.707, ppl=6.53, wps=5807.8, ups=0.09, wpb=64837, bsz=128, num_updates=9078, lr=9.99354e-05, gnorm=2.458, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104776
2021-06-19 23:45:13 | INFO | train_inner | epoch 004: 131 / 3002 loss=2.657, ppl=6.31, wps=5893.7, ups=0.09, wpb=64789, bsz=128, num_updates=9079, lr=9.99354e-05, gnorm=2.345, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104787
2021-06-19 23:45:24 | INFO | train_inner | epoch 004: 132 / 3002 loss=2.64, ppl=6.23, wps=5729.5, ups=0.09, wpb=64858, bsz=128, num_updates=9080, lr=9.99354e-05, gnorm=2.533, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104799
2021-06-19 23:45:35 | INFO | train_inner | epoch 004: 133 / 3002 loss=2.494, ppl=5.63, wps=5940.2, ups=0.09, wpb=64876, bsz=128, num_updates=9081, lr=9.99353e-05, gnorm=3.12, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104810
2021-06-19 23:45:46 | INFO | train_inner | epoch 004: 134 / 3002 loss=2.83, ppl=7.11, wps=5858.8, ups=0.09, wpb=64811, bsz=128, num_updates=9082, lr=9.99353e-05, gnorm=2.272, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104821
2021-06-19 23:45:58 | INFO | train_inner | epoch 004: 135 / 3002 loss=2.692, ppl=6.46, wps=5784.6, ups=0.09, wpb=64867, bsz=128, num_updates=9083, lr=9.99353e-05, gnorm=2.172, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104832
2021-06-19 23:46:09 | INFO | train_inner | epoch 004: 136 / 3002 loss=2.471, ppl=5.55, wps=5850.2, ups=0.09, wpb=64820, bsz=128, num_updates=9084, lr=9.99353e-05, gnorm=2.187, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104843
2021-06-19 23:46:20 | INFO | train_inner | epoch 004: 137 / 3002 loss=2.624, ppl=6.16, wps=5807.5, ups=0.09, wpb=64914, bsz=128, num_updates=9085, lr=9.99353e-05, gnorm=2.287, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104854
2021-06-19 23:46:31 | INFO | train_inner | epoch 004: 138 / 3002 loss=2.542, ppl=5.82, wps=5858.7, ups=0.09, wpb=64871, bsz=128, num_updates=9086, lr=9.99353e-05, gnorm=2.109, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104865
2021-06-19 23:46:42 | INFO | train_inner | epoch 004: 139 / 3002 loss=2.659, ppl=6.32, wps=5764.8, ups=0.09, wpb=64870, bsz=128, num_updates=9087, lr=9.99353e-05, gnorm=8.641, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104877
2021-06-19 23:46:53 | INFO | train_inner | epoch 004: 140 / 3002 loss=2.694, ppl=6.47, wps=5818.2, ups=0.09, wpb=64762, bsz=128, num_updates=9088, lr=9.99353e-05, gnorm=2.27, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104888
2021-06-19 23:47:04 | INFO | train_inner | epoch 004: 141 / 3002 loss=2.602, ppl=6.07, wps=5960.1, ups=0.09, wpb=64827, bsz=128, num_updates=9089, lr=9.99353e-05, gnorm=2.164, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104899
2021-06-19 23:47:15 | INFO | train_inner | epoch 004: 142 / 3002 loss=2.818, ppl=7.05, wps=5827.7, ups=0.09, wpb=64827, bsz=128, num_updates=9090, lr=9.99353e-05, gnorm=2.791, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104910
2021-06-19 23:47:27 | INFO | train_inner | epoch 004: 143 / 3002 loss=2.608, ppl=6.1, wps=5782.2, ups=0.09, wpb=64844, bsz=128, num_updates=9091, lr=9.99353e-05, gnorm=2.614, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104921
2021-06-19 23:47:38 | INFO | train_inner | epoch 004: 144 / 3002 loss=2.578, ppl=5.97, wps=5886.5, ups=0.09, wpb=64805, bsz=128, num_updates=9092, lr=9.99353e-05, gnorm=5.421, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104932
2021-06-19 23:47:49 | INFO | train_inner | epoch 004: 145 / 3002 loss=2.622, ppl=6.16, wps=5917.9, ups=0.09, wpb=64838, bsz=128, num_updates=9093, lr=9.99353e-05, gnorm=2.109, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=104943
2021-06-19 23:48:00 | INFO | train_inner | epoch 004: 146 / 3002 loss=2.581, ppl=5.98, wps=5867.1, ups=0.09, wpb=64838, bsz=128, num_updates=9094, lr=9.99352e-05, gnorm=2.207, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104954
2021-06-19 23:48:11 | INFO | train_inner | epoch 004: 147 / 3002 loss=2.509, ppl=5.69, wps=5768.5, ups=0.09, wpb=64801, bsz=128, num_updates=9095, lr=9.99352e-05, gnorm=3.608, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104965
2021-06-19 23:48:22 | INFO | train_inner | epoch 004: 148 / 3002 loss=2.756, ppl=6.75, wps=5839.5, ups=0.09, wpb=64807, bsz=128, num_updates=9096, lr=9.99352e-05, gnorm=2.577, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104976
2021-06-19 23:48:33 | INFO | train_inner | epoch 004: 149 / 3002 loss=2.57, ppl=5.94, wps=5858.5, ups=0.09, wpb=64762, bsz=128, num_updates=9097, lr=9.99352e-05, gnorm=2.279, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104987
2021-06-19 23:48:44 | INFO | train_inner | epoch 004: 150 / 3002 loss=2.549, ppl=5.85, wps=5808.4, ups=0.09, wpb=64828, bsz=128, num_updates=9098, lr=9.99352e-05, gnorm=2.214, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=104998
2021-06-19 23:48:55 | INFO | train_inner | epoch 004: 151 / 3002 loss=2.634, ppl=6.21, wps=5868.7, ups=0.09, wpb=64882, bsz=128, num_updates=9099, lr=9.99352e-05, gnorm=2.117, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105009
2021-06-19 23:49:06 | INFO | train_inner | epoch 004: 152 / 3002 loss=2.593, ppl=6.03, wps=5872.5, ups=0.09, wpb=64775, bsz=128, num_updates=9100, lr=9.99352e-05, gnorm=2.207, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105021
2021-06-19 23:49:17 | INFO | train_inner | epoch 004: 153 / 3002 loss=2.575, ppl=5.96, wps=5870.1, ups=0.09, wpb=64797, bsz=128, num_updates=9101, lr=9.99352e-05, gnorm=2.226, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105032
2021-06-19 23:49:28 | INFO | train_inner | epoch 004: 154 / 3002 loss=2.571, ppl=5.94, wps=5911.1, ups=0.09, wpb=64838, bsz=128, num_updates=9102, lr=9.99352e-05, gnorm=7.422, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105043
2021-06-19 23:49:39 | INFO | train_inner | epoch 004: 155 / 3002 loss=2.662, ppl=6.33, wps=5884.3, ups=0.09, wpb=64847, bsz=128, num_updates=9103, lr=9.99352e-05, gnorm=2.215, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105054
2021-06-19 23:49:50 | INFO | train_inner | epoch 004: 156 / 3002 loss=2.654, ppl=6.3, wps=5885.5, ups=0.09, wpb=64793, bsz=128, num_updates=9104, lr=9.99352e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105065
2021-06-19 23:50:01 | INFO | train_inner | epoch 004: 157 / 3002 loss=2.774, ppl=6.84, wps=5813.1, ups=0.09, wpb=64766, bsz=128, num_updates=9105, lr=9.99352e-05, gnorm=6.37, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105076
2021-06-19 23:50:12 | INFO | train_inner | epoch 004: 158 / 3002 loss=2.71, ppl=6.54, wps=5839.7, ups=0.09, wpb=64847, bsz=128, num_updates=9106, lr=9.99351e-05, gnorm=2.103, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105087
2021-06-19 23:50:24 | INFO | train_inner | epoch 004: 159 / 3002 loss=2.623, ppl=6.16, wps=5868.8, ups=0.09, wpb=64847, bsz=128, num_updates=9107, lr=9.99351e-05, gnorm=2.198, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105098
2021-06-19 23:50:35 | INFO | train_inner | epoch 004: 160 / 3002 loss=2.523, ppl=5.75, wps=5794.5, ups=0.09, wpb=64842, bsz=128, num_updates=9108, lr=9.99351e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105109
2021-06-19 23:50:46 | INFO | train_inner | epoch 004: 161 / 3002 loss=2.539, ppl=5.81, wps=5886.2, ups=0.09, wpb=64863, bsz=128, num_updates=9109, lr=9.99351e-05, gnorm=2.234, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105120
2021-06-19 23:50:57 | INFO | train_inner | epoch 004: 162 / 3002 loss=2.52, ppl=5.73, wps=5753.4, ups=0.09, wpb=64829, bsz=128, num_updates=9110, lr=9.99351e-05, gnorm=2.192, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105131
2021-06-19 23:51:08 | INFO | train_inner | epoch 004: 163 / 3002 loss=2.616, ppl=6.13, wps=5935.8, ups=0.09, wpb=64714, bsz=128, num_updates=9111, lr=9.99351e-05, gnorm=2.27, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105142
2021-06-19 23:51:19 | INFO | train_inner | epoch 004: 164 / 3002 loss=2.663, ppl=6.33, wps=5802.9, ups=0.09, wpb=64795, bsz=128, num_updates=9112, lr=9.99351e-05, gnorm=2.654, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105153
2021-06-19 23:51:30 | INFO | train_inner | epoch 004: 165 / 3002 loss=2.446, ppl=5.45, wps=5805.9, ups=0.09, wpb=64816, bsz=128, num_updates=9113, lr=9.99351e-05, gnorm=13.197, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105165
2021-06-19 23:51:41 | INFO | train_inner | epoch 004: 166 / 3002 loss=2.546, ppl=5.84, wps=5803.3, ups=0.09, wpb=64885, bsz=128, num_updates=9114, lr=9.99351e-05, gnorm=3.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105176
2021-06-19 23:51:52 | INFO | train_inner | epoch 004: 167 / 3002 loss=2.727, ppl=6.62, wps=5868.6, ups=0.09, wpb=64811, bsz=128, num_updates=9115, lr=9.99351e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105187
2021-06-19 23:52:04 | INFO | train_inner | epoch 004: 168 / 3002 loss=2.559, ppl=5.89, wps=5818.7, ups=0.09, wpb=64848, bsz=128, num_updates=9116, lr=9.99351e-05, gnorm=2.15, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105198
2021-06-19 23:52:15 | INFO | train_inner | epoch 004: 169 / 3002 loss=2.627, ppl=6.18, wps=5859.9, ups=0.09, wpb=64814, bsz=128, num_updates=9117, lr=9.99351e-05, gnorm=6.975, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105209
2021-06-19 23:52:26 | INFO | train_inner | epoch 004: 170 / 3002 loss=2.57, ppl=5.94, wps=5753.1, ups=0.09, wpb=64784, bsz=128, num_updates=9118, lr=9.99351e-05, gnorm=2.153, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105220
2021-06-19 23:52:37 | INFO | train_inner | epoch 004: 171 / 3002 loss=2.613, ppl=6.12, wps=5902.7, ups=0.09, wpb=64895, bsz=128, num_updates=9119, lr=9.9935e-05, gnorm=2.156, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105231
2021-06-19 23:52:48 | INFO | train_inner | epoch 004: 172 / 3002 loss=2.667, ppl=6.35, wps=5752.7, ups=0.09, wpb=64763, bsz=128, num_updates=9120, lr=9.9935e-05, gnorm=2.391, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105243
2021-06-19 23:52:59 | INFO | train_inner | epoch 004: 173 / 3002 loss=2.543, ppl=5.83, wps=5842.5, ups=0.09, wpb=64862, bsz=128, num_updates=9121, lr=9.9935e-05, gnorm=2.123, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105254
2021-06-19 23:53:10 | INFO | train_inner | epoch 004: 174 / 3002 loss=2.645, ppl=6.26, wps=5962.4, ups=0.09, wpb=64819, bsz=128, num_updates=9122, lr=9.9935e-05, gnorm=2.196, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105264
2021-06-19 23:53:21 | INFO | train_inner | epoch 004: 175 / 3002 loss=2.43, ppl=5.39, wps=5858.5, ups=0.09, wpb=64863, bsz=128, num_updates=9123, lr=9.9935e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105276
2021-06-19 23:53:32 | INFO | train_inner | epoch 004: 176 / 3002 loss=2.513, ppl=5.71, wps=5910.5, ups=0.09, wpb=64856, bsz=128, num_updates=9124, lr=9.9935e-05, gnorm=2.215, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105287
2021-06-19 23:53:43 | INFO | train_inner | epoch 004: 177 / 3002 loss=2.588, ppl=6.01, wps=5837.7, ups=0.09, wpb=64793, bsz=128, num_updates=9125, lr=9.9935e-05, gnorm=2.889, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105298
2021-06-19 23:53:55 | INFO | train_inner | epoch 004: 178 / 3002 loss=2.662, ppl=6.33, wps=5755.7, ups=0.09, wpb=64763, bsz=128, num_updates=9126, lr=9.9935e-05, gnorm=2.177, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105309
2021-06-19 23:54:06 | INFO | train_inner | epoch 004: 179 / 3002 loss=2.641, ppl=6.24, wps=5841.6, ups=0.09, wpb=64830, bsz=128, num_updates=9127, lr=9.9935e-05, gnorm=2.113, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105320
2021-06-19 23:54:17 | INFO | train_inner | epoch 004: 180 / 3002 loss=2.738, ppl=6.67, wps=5878.5, ups=0.09, wpb=64803, bsz=128, num_updates=9128, lr=9.9935e-05, gnorm=2.21, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105331
2021-06-19 23:54:28 | INFO | train_inner | epoch 004: 181 / 3002 loss=2.489, ppl=5.61, wps=5937.8, ups=0.09, wpb=64854, bsz=128, num_updates=9129, lr=9.9935e-05, gnorm=2.077, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105342
2021-06-19 23:54:39 | INFO | train_inner | epoch 004: 182 / 3002 loss=2.5, ppl=5.66, wps=5912.9, ups=0.09, wpb=64861, bsz=128, num_updates=9130, lr=9.9935e-05, gnorm=2.764, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105353
2021-06-19 23:54:50 | INFO | train_inner | epoch 004: 183 / 3002 loss=2.582, ppl=5.99, wps=5874.4, ups=0.09, wpb=64821, bsz=128, num_updates=9131, lr=9.99349e-05, gnorm=2.185, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105364
2021-06-19 23:55:01 | INFO | train_inner | epoch 004: 184 / 3002 loss=2.502, ppl=5.66, wps=5843.9, ups=0.09, wpb=64911, bsz=128, num_updates=9132, lr=9.99349e-05, gnorm=2.171, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105375
2021-06-19 23:55:12 | INFO | train_inner | epoch 004: 185 / 3002 loss=2.412, ppl=5.32, wps=5839.1, ups=0.09, wpb=64831, bsz=128, num_updates=9133, lr=9.99349e-05, gnorm=2.135, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105386
2021-06-19 23:55:23 | INFO | train_inner | epoch 004: 186 / 3002 loss=2.555, ppl=5.88, wps=5902, ups=0.09, wpb=64817, bsz=128, num_updates=9134, lr=9.99349e-05, gnorm=2.151, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105397
2021-06-19 23:55:34 | INFO | train_inner | epoch 004: 187 / 3002 loss=2.709, ppl=6.54, wps=5862.6, ups=0.09, wpb=64722, bsz=128, num_updates=9135, lr=9.99349e-05, gnorm=2.27, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105408
2021-06-19 23:55:45 | INFO | train_inner | epoch 004: 188 / 3002 loss=2.625, ppl=6.17, wps=5822.1, ups=0.09, wpb=64842, bsz=128, num_updates=9136, lr=9.99349e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105419
2021-06-19 23:55:56 | INFO | train_inner | epoch 004: 189 / 3002 loss=2.45, ppl=5.47, wps=5922, ups=0.09, wpb=64821, bsz=128, num_updates=9137, lr=9.99349e-05, gnorm=3.674, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105430
2021-06-19 23:56:07 | INFO | train_inner | epoch 004: 190 / 3002 loss=2.688, ppl=6.45, wps=5890.9, ups=0.09, wpb=64855, bsz=128, num_updates=9138, lr=9.99349e-05, gnorm=2.208, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105441
2021-06-19 23:56:18 | INFO | train_inner | epoch 004: 191 / 3002 loss=2.61, ppl=6.11, wps=5905.1, ups=0.09, wpb=64819, bsz=128, num_updates=9139, lr=9.99349e-05, gnorm=2.089, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105452
2021-06-19 23:56:29 | INFO | train_inner | epoch 004: 192 / 3002 loss=2.567, ppl=5.93, wps=6049.3, ups=0.09, wpb=64900, bsz=128, num_updates=9140, lr=9.99349e-05, gnorm=2.201, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105463
2021-06-19 23:56:40 | INFO | train_inner | epoch 004: 193 / 3002 loss=2.707, ppl=6.53, wps=5812.4, ups=0.09, wpb=64863, bsz=128, num_updates=9141, lr=9.99349e-05, gnorm=2.074, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105474
2021-06-19 23:56:51 | INFO | train_inner | epoch 004: 194 / 3002 loss=2.686, ppl=6.43, wps=5955.1, ups=0.09, wpb=64832, bsz=128, num_updates=9142, lr=9.99349e-05, gnorm=2.079, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105485
2021-06-19 23:57:02 | INFO | train_inner | epoch 004: 195 / 3002 loss=2.619, ppl=6.14, wps=5857.3, ups=0.09, wpb=64826, bsz=128, num_updates=9143, lr=9.99349e-05, gnorm=2.126, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105496
2021-06-19 23:57:13 | INFO | train_inner | epoch 004: 196 / 3002 loss=2.549, ppl=5.85, wps=5897.9, ups=0.09, wpb=64830, bsz=128, num_updates=9144, lr=9.99348e-05, gnorm=2.149, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105507
2021-06-19 23:57:24 | INFO | train_inner | epoch 004: 197 / 3002 loss=2.601, ppl=6.07, wps=5853.1, ups=0.09, wpb=64699, bsz=128, num_updates=9145, lr=9.99348e-05, gnorm=2.066, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105518
2021-06-19 23:57:35 | INFO | train_inner | epoch 004: 198 / 3002 loss=2.611, ppl=6.11, wps=5780.7, ups=0.09, wpb=64870, bsz=128, num_updates=9146, lr=9.99348e-05, gnorm=2.159, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105529
2021-06-19 23:57:46 | INFO | train_inner | epoch 004: 199 / 3002 loss=2.626, ppl=6.17, wps=5836.8, ups=0.09, wpb=64817, bsz=128, num_updates=9147, lr=9.99348e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105540
2021-06-19 23:57:57 | INFO | train_inner | epoch 004: 200 / 3002 loss=2.643, ppl=6.24, wps=5732.1, ups=0.09, wpb=64804, bsz=128, num_updates=9148, lr=9.99348e-05, gnorm=2.219, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105552
2021-06-19 23:58:08 | INFO | train_inner | epoch 004: 201 / 3002 loss=2.696, ppl=6.48, wps=5919.1, ups=0.09, wpb=64843, bsz=128, num_updates=9149, lr=9.99348e-05, gnorm=2.243, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105563
2021-06-19 23:58:19 | INFO | train_inner | epoch 004: 202 / 3002 loss=2.574, ppl=5.96, wps=5920.8, ups=0.09, wpb=64828, bsz=128, num_updates=9150, lr=9.99348e-05, gnorm=2.152, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105574
2021-06-19 23:58:30 | INFO | train_inner | epoch 004: 203 / 3002 loss=2.572, ppl=5.95, wps=5859.2, ups=0.09, wpb=64891, bsz=128, num_updates=9151, lr=9.99348e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105585
2021-06-19 23:58:41 | INFO | train_inner | epoch 004: 204 / 3002 loss=2.514, ppl=5.71, wps=5844, ups=0.09, wpb=64762, bsz=128, num_updates=9152, lr=9.99348e-05, gnorm=2.583, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105596
2021-06-19 23:58:53 | INFO | train_inner | epoch 004: 205 / 3002 loss=2.621, ppl=6.15, wps=5827, ups=0.09, wpb=64835, bsz=128, num_updates=9153, lr=9.99348e-05, gnorm=2.233, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105607
2021-06-19 23:59:03 | INFO | train_inner | epoch 004: 206 / 3002 loss=2.621, ppl=6.15, wps=5995.2, ups=0.09, wpb=64714, bsz=128, num_updates=9154, lr=9.99348e-05, gnorm=2.061, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105618
2021-06-19 23:59:15 | INFO | train_inner | epoch 004: 207 / 3002 loss=2.49, ppl=5.62, wps=5735.6, ups=0.09, wpb=64793, bsz=128, num_updates=9155, lr=9.99348e-05, gnorm=2.189, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105629
2021-06-19 23:59:26 | INFO | train_inner | epoch 004: 208 / 3002 loss=2.561, ppl=5.9, wps=5774.1, ups=0.09, wpb=64871, bsz=128, num_updates=9156, lr=9.99347e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105640
2021-06-19 23:59:37 | INFO | train_inner | epoch 004: 209 / 3002 loss=2.508, ppl=5.69, wps=5771.6, ups=0.09, wpb=64929, bsz=128, num_updates=9157, lr=9.99347e-05, gnorm=2.4, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105652
2021-06-19 23:59:48 | INFO | train_inner | epoch 004: 210 / 3002 loss=2.679, ppl=6.41, wps=5871, ups=0.09, wpb=64847, bsz=128, num_updates=9158, lr=9.99347e-05, gnorm=2.183, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105663
2021-06-19 23:59:59 | INFO | train_inner | epoch 004: 211 / 3002 loss=2.614, ppl=6.12, wps=5787.9, ups=0.09, wpb=64863, bsz=128, num_updates=9159, lr=9.99347e-05, gnorm=2.144, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105674
2021-06-20 00:00:11 | INFO | train_inner | epoch 004: 212 / 3002 loss=2.595, ppl=6.04, wps=5787.9, ups=0.09, wpb=64900, bsz=128, num_updates=9160, lr=9.99347e-05, gnorm=2.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105685
2021-06-20 00:00:22 | INFO | train_inner | epoch 004: 213 / 3002 loss=2.563, ppl=5.91, wps=5870.9, ups=0.09, wpb=64911, bsz=128, num_updates=9161, lr=9.99347e-05, gnorm=2.372, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105696
2021-06-20 00:00:33 | INFO | train_inner | epoch 004: 214 / 3002 loss=2.579, ppl=5.98, wps=5780.4, ups=0.09, wpb=64856, bsz=128, num_updates=9162, lr=9.99347e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105707
2021-06-20 00:00:44 | INFO | train_inner | epoch 004: 215 / 3002 loss=2.537, ppl=5.8, wps=5843.6, ups=0.09, wpb=64823, bsz=128, num_updates=9163, lr=9.99347e-05, gnorm=2.042, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105718
2021-06-20 00:00:55 | INFO | train_inner | epoch 004: 216 / 3002 loss=2.567, ppl=5.93, wps=5809.2, ups=0.09, wpb=64910, bsz=128, num_updates=9164, lr=9.99347e-05, gnorm=2.237, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105730
2021-06-20 00:01:06 | INFO | train_inner | epoch 004: 217 / 3002 loss=2.707, ppl=6.53, wps=5966.1, ups=0.09, wpb=64833, bsz=128, num_updates=9165, lr=9.99347e-05, gnorm=2.134, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105740
2021-06-20 00:01:17 | INFO | train_inner | epoch 004: 218 / 3002 loss=2.502, ppl=5.66, wps=5923.6, ups=0.09, wpb=64791, bsz=128, num_updates=9166, lr=9.99347e-05, gnorm=2.296, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105751
2021-06-20 00:01:28 | INFO | train_inner | epoch 004: 219 / 3002 loss=2.484, ppl=5.59, wps=5956, ups=0.09, wpb=64811, bsz=128, num_updates=9167, lr=9.99347e-05, gnorm=2.079, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=105762
2021-06-20 00:01:39 | INFO | train_inner | epoch 004: 220 / 3002 loss=2.503, ppl=5.67, wps=5861.6, ups=0.09, wpb=64745, bsz=128, num_updates=9168, lr=9.99347e-05, gnorm=2.165, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105773
2021-06-20 00:01:50 | INFO | train_inner | epoch 004: 221 / 3002 loss=2.69, ppl=6.45, wps=5871.2, ups=0.09, wpb=64823, bsz=128, num_updates=9169, lr=9.99346e-05, gnorm=2.069, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105784
2021-06-20 00:02:01 | INFO | train_inner | epoch 004: 222 / 3002 loss=2.496, ppl=5.64, wps=5859.8, ups=0.09, wpb=64773, bsz=128, num_updates=9170, lr=9.99346e-05, gnorm=2.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105795
2021-06-20 00:02:12 | INFO | train_inner | epoch 004: 223 / 3002 loss=2.618, ppl=6.14, wps=5761, ups=0.09, wpb=64850, bsz=128, num_updates=9171, lr=9.99346e-05, gnorm=2.265, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105807
2021-06-20 00:02:23 | INFO | train_inner | epoch 004: 224 / 3002 loss=2.77, ppl=6.82, wps=5914.5, ups=0.09, wpb=64812, bsz=128, num_updates=9172, lr=9.99346e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105818
2021-06-20 00:02:34 | INFO | train_inner | epoch 004: 225 / 3002 loss=2.47, ppl=5.54, wps=5884.1, ups=0.09, wpb=64882, bsz=128, num_updates=9173, lr=9.99346e-05, gnorm=2.026, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=105829
2021-06-20 00:02:45 | INFO | train_inner | epoch 004: 226 / 3002 loss=2.576, ppl=5.96, wps=5885.8, ups=0.09, wpb=64834, bsz=128, num_updates=9174, lr=9.99346e-05, gnorm=2.026, loss_scale=1, train_wall=11, gb_free=2.8, wall=105840
2021-06-20 00:02:57 | INFO | train_inner | epoch 004: 227 / 3002 loss=2.651, ppl=6.28, wps=5788.2, ups=0.09, wpb=64903, bsz=128, num_updates=9175, lr=9.99346e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=105851
2021-06-20 00:03:07 | INFO | train_inner | epoch 004: 228 / 3002 loss=2.468, ppl=5.53, wps=6010.3, ups=0.09, wpb=64812, bsz=128, num_updates=9176, lr=9.99346e-05, gnorm=2.174, loss_scale=1, train_wall=10, gb_free=2.8, wall=105862
2021-06-20 00:03:19 | INFO | train_inner | epoch 004: 229 / 3002 loss=2.5, ppl=5.66, wps=5759.2, ups=0.09, wpb=64826, bsz=128, num_updates=9177, lr=9.99346e-05, gnorm=2.714, loss_scale=1, train_wall=11, gb_free=2.8, wall=105873
2021-06-20 00:03:30 | INFO | train_inner | epoch 004: 230 / 3002 loss=2.629, ppl=6.19, wps=5815.7, ups=0.09, wpb=64791, bsz=128, num_updates=9178, lr=9.99346e-05, gnorm=2.208, loss_scale=1, train_wall=11, gb_free=2.8, wall=105884
2021-06-20 00:03:41 | INFO | train_inner | epoch 004: 231 / 3002 loss=2.544, ppl=5.83, wps=5819.2, ups=0.09, wpb=64832, bsz=128, num_updates=9179, lr=9.99346e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=105895
2021-06-20 00:03:52 | INFO | train_inner | epoch 004: 232 / 3002 loss=2.638, ppl=6.22, wps=5813.5, ups=0.09, wpb=64756, bsz=128, num_updates=9180, lr=9.99346e-05, gnorm=2.234, loss_scale=1, train_wall=11, gb_free=2.8, wall=105906
2021-06-20 00:04:03 | INFO | train_inner | epoch 004: 233 / 3002 loss=2.556, ppl=5.88, wps=5855.5, ups=0.09, wpb=64863, bsz=128, num_updates=9181, lr=9.99345e-05, gnorm=2.203, loss_scale=1, train_wall=11, gb_free=2.8, wall=105917
2021-06-20 00:04:14 | INFO | train_inner | epoch 004: 234 / 3002 loss=2.723, ppl=6.6, wps=5840.9, ups=0.09, wpb=64888, bsz=128, num_updates=9182, lr=9.99345e-05, gnorm=2.23, loss_scale=1, train_wall=11, gb_free=2.8, wall=105928
2021-06-20 00:04:25 | INFO | train_inner | epoch 004: 235 / 3002 loss=2.607, ppl=6.09, wps=5724.1, ups=0.09, wpb=64888, bsz=128, num_updates=9183, lr=9.99345e-05, gnorm=2.141, loss_scale=1, train_wall=11, gb_free=2.8, wall=105940
2021-06-20 00:04:37 | INFO | train_inner | epoch 004: 236 / 3002 loss=2.504, ppl=5.67, wps=5791.6, ups=0.09, wpb=64892, bsz=128, num_updates=9184, lr=9.99345e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=105951
2021-06-20 00:04:48 | INFO | train_inner | epoch 004: 237 / 3002 loss=2.707, ppl=6.53, wps=5820.5, ups=0.09, wpb=64825, bsz=128, num_updates=9185, lr=9.99345e-05, gnorm=2.281, loss_scale=1, train_wall=11, gb_free=2.8, wall=105962
2021-06-20 00:04:59 | INFO | train_inner | epoch 004: 238 / 3002 loss=2.58, ppl=5.98, wps=5854.3, ups=0.09, wpb=64826, bsz=128, num_updates=9186, lr=9.99345e-05, gnorm=2.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=105973
2021-06-20 00:05:10 | INFO | train_inner | epoch 004: 239 / 3002 loss=2.649, ppl=6.27, wps=5849.7, ups=0.09, wpb=64822, bsz=128, num_updates=9187, lr=9.99345e-05, gnorm=2.462, loss_scale=1, train_wall=11, gb_free=2.8, wall=105984
2021-06-20 00:05:21 | INFO | train_inner | epoch 004: 240 / 3002 loss=2.518, ppl=5.73, wps=5787.5, ups=0.09, wpb=64780, bsz=128, num_updates=9188, lr=9.99345e-05, gnorm=2.064, loss_scale=1, train_wall=11, gb_free=2.8, wall=105996
2021-06-20 00:05:32 | INFO | train_inner | epoch 004: 241 / 3002 loss=2.585, ppl=6, wps=5795.5, ups=0.09, wpb=64792, bsz=128, num_updates=9189, lr=9.99345e-05, gnorm=2.285, loss_scale=1, train_wall=11, gb_free=2.8, wall=106007
2021-06-20 00:05:43 | INFO | train_inner | epoch 004: 242 / 3002 loss=2.498, ppl=5.65, wps=5860.6, ups=0.09, wpb=64859, bsz=128, num_updates=9190, lr=9.99345e-05, gnorm=2.116, loss_scale=1, train_wall=11, gb_free=2.8, wall=106018
2021-06-20 00:05:54 | INFO | train_inner | epoch 004: 243 / 3002 loss=2.476, ppl=5.56, wps=5899.3, ups=0.09, wpb=64857, bsz=128, num_updates=9191, lr=9.99345e-05, gnorm=2.086, loss_scale=1, train_wall=11, gb_free=2.8, wall=106029
2021-06-20 00:06:05 | INFO | train_inner | epoch 004: 244 / 3002 loss=2.482, ppl=5.59, wps=5865.5, ups=0.09, wpb=64830, bsz=128, num_updates=9192, lr=9.99345e-05, gnorm=5.791, loss_scale=1, train_wall=11, gb_free=2.8, wall=106040
2021-06-20 00:06:17 | INFO | train_inner | epoch 004: 245 / 3002 loss=2.514, ppl=5.71, wps=5798.1, ups=0.09, wpb=64776, bsz=128, num_updates=9193, lr=9.99345e-05, gnorm=2.148, loss_scale=1, train_wall=11, gb_free=2.8, wall=106051
2021-06-20 00:06:27 | INFO | train_inner | epoch 004: 246 / 3002 loss=2.606, ppl=6.09, wps=6002.4, ups=0.09, wpb=64864, bsz=128, num_updates=9194, lr=9.99344e-05, gnorm=2.286, loss_scale=1, train_wall=10, gb_free=2.8, wall=106062
2021-06-20 00:06:38 | INFO | train_inner | epoch 004: 247 / 3002 loss=2.662, ppl=6.33, wps=5904, ups=0.09, wpb=64839, bsz=128, num_updates=9195, lr=9.99344e-05, gnorm=4.868, loss_scale=1, train_wall=11, gb_free=2.8, wall=106073
2021-06-20 00:06:50 | INFO | train_inner | epoch 004: 248 / 3002 loss=2.529, ppl=5.77, wps=5826, ups=0.09, wpb=64792, bsz=128, num_updates=9196, lr=9.99344e-05, gnorm=2.099, loss_scale=1, train_wall=11, gb_free=2.8, wall=106084
2021-06-20 00:07:01 | INFO | train_inner | epoch 004: 249 / 3002 loss=2.667, ppl=6.35, wps=5842.3, ups=0.09, wpb=64771, bsz=128, num_updates=9197, lr=9.99344e-05, gnorm=2.239, loss_scale=1, train_wall=11, gb_free=2.8, wall=106095
2021-06-20 00:07:11 | INFO | train_inner | epoch 004: 250 / 3002 loss=2.586, ppl=6.01, wps=5991, ups=0.09, wpb=64889, bsz=128, num_updates=9198, lr=9.99344e-05, gnorm=2.136, loss_scale=1, train_wall=10, gb_free=2.8, wall=106106
2021-06-20 00:07:23 | INFO | train_inner | epoch 004: 251 / 3002 loss=2.583, ppl=5.99, wps=5816.1, ups=0.09, wpb=64884, bsz=128, num_updates=9199, lr=9.99344e-05, gnorm=9.029, loss_scale=1, train_wall=11, gb_free=2.8, wall=106117
2021-06-20 00:07:34 | INFO | train_inner | epoch 004: 252 / 3002 loss=2.513, ppl=5.71, wps=5760.1, ups=0.09, wpb=64876, bsz=128, num_updates=9200, lr=9.99344e-05, gnorm=2.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=106128
2021-06-20 00:07:45 | INFO | train_inner | epoch 004: 253 / 3002 loss=2.74, ppl=6.68, wps=5780.8, ups=0.09, wpb=64871, bsz=128, num_updates=9201, lr=9.99344e-05, gnorm=2.164, loss_scale=1, train_wall=11, gb_free=2.8, wall=106139
2021-06-20 00:07:56 | INFO | train_inner | epoch 004: 254 / 3002 loss=2.587, ppl=6.01, wps=5852.7, ups=0.09, wpb=64879, bsz=128, num_updates=9202, lr=9.99344e-05, gnorm=2.01, loss_scale=1, train_wall=11, gb_free=2.8, wall=106151
2021-06-20 00:08:07 | INFO | train_inner | epoch 004: 255 / 3002 loss=2.643, ppl=6.24, wps=5901.5, ups=0.09, wpb=64807, bsz=128, num_updates=9203, lr=9.99344e-05, gnorm=2.075, loss_scale=1, train_wall=11, gb_free=2.8, wall=106162
2021-06-20 00:08:18 | INFO | train_inner | epoch 004: 256 / 3002 loss=2.598, ppl=6.05, wps=5751.4, ups=0.09, wpb=64773, bsz=128, num_updates=9204, lr=9.99344e-05, gnorm=2.206, loss_scale=1, train_wall=11, gb_free=2.8, wall=106173
2021-06-20 00:08:30 | INFO | train_inner | epoch 004: 257 / 3002 loss=2.533, ppl=5.79, wps=5855.4, ups=0.09, wpb=64862, bsz=128, num_updates=9205, lr=9.99344e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=106184
2021-06-20 00:08:41 | INFO | train_inner | epoch 004: 258 / 3002 loss=2.64, ppl=6.23, wps=5861.3, ups=0.09, wpb=64792, bsz=128, num_updates=9206, lr=9.99343e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=106195
2021-06-20 00:08:52 | INFO | train_inner | epoch 004: 259 / 3002 loss=2.74, ppl=6.68, wps=5899.7, ups=0.09, wpb=64816, bsz=128, num_updates=9207, lr=9.99343e-05, gnorm=3.521, loss_scale=1, train_wall=11, gb_free=2.8, wall=106206
2021-06-20 00:09:03 | INFO | train_inner | epoch 004: 260 / 3002 loss=2.61, ppl=6.11, wps=5843.6, ups=0.09, wpb=64771, bsz=128, num_updates=9208, lr=9.99343e-05, gnorm=2.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=106217
2021-06-20 00:09:14 | INFO | train_inner | epoch 004: 261 / 3002 loss=2.507, ppl=5.69, wps=5732.9, ups=0.09, wpb=64805, bsz=128, num_updates=9209, lr=9.99343e-05, gnorm=2.022, loss_scale=1, train_wall=11, gb_free=2.8, wall=106228
2021-06-20 00:09:25 | INFO | train_inner | epoch 004: 262 / 3002 loss=2.492, ppl=5.63, wps=5857.4, ups=0.09, wpb=64939, bsz=128, num_updates=9210, lr=9.99343e-05, gnorm=2.335, loss_scale=1, train_wall=11, gb_free=2.8, wall=106239
2021-06-20 00:09:36 | INFO | train_inner | epoch 004: 263 / 3002 loss=2.521, ppl=5.74, wps=5843.2, ups=0.09, wpb=64833, bsz=128, num_updates=9211, lr=9.99343e-05, gnorm=2.17, loss_scale=1, train_wall=11, gb_free=2.8, wall=106250
2021-06-20 00:09:47 | INFO | train_inner | epoch 004: 264 / 3002 loss=2.646, ppl=6.26, wps=5716.9, ups=0.09, wpb=64837, bsz=128, num_updates=9212, lr=9.99343e-05, gnorm=20.779, loss_scale=1, train_wall=11, gb_free=2.8, wall=106262
2021-06-20 00:09:58 | INFO | train_inner | epoch 004: 265 / 3002 loss=2.627, ppl=6.18, wps=5919.3, ups=0.09, wpb=64901, bsz=128, num_updates=9213, lr=9.99343e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=106273
2021-06-20 00:10:09 | INFO | train_inner | epoch 004: 266 / 3002 loss=2.551, ppl=5.86, wps=5868.8, ups=0.09, wpb=64849, bsz=128, num_updates=9214, lr=9.99343e-05, gnorm=2.213, loss_scale=1, train_wall=11, gb_free=2.8, wall=106284
2021-06-20 00:10:20 | INFO | train_inner | epoch 004: 267 / 3002 loss=2.655, ppl=6.3, wps=5946.8, ups=0.09, wpb=64873, bsz=128, num_updates=9215, lr=9.99343e-05, gnorm=2.71, loss_scale=1, train_wall=10, gb_free=2.8, wall=106295
2021-06-20 00:10:31 | INFO | train_inner | epoch 004: 268 / 3002 loss=2.589, ppl=6.02, wps=5942.5, ups=0.09, wpb=64811, bsz=128, num_updates=9216, lr=9.99343e-05, gnorm=2.077, loss_scale=1, train_wall=10, gb_free=2.8, wall=106306
2021-06-20 00:10:43 | INFO | train_inner | epoch 004: 269 / 3002 loss=2.666, ppl=6.35, wps=5764.2, ups=0.09, wpb=64834, bsz=128, num_updates=9217, lr=9.99343e-05, gnorm=2.629, loss_scale=1, train_wall=11, gb_free=2.8, wall=106317
2021-06-20 00:10:54 | INFO | train_inner | epoch 004: 270 / 3002 loss=2.61, ppl=6.11, wps=5787.6, ups=0.09, wpb=64826, bsz=128, num_updates=9218, lr=9.99343e-05, gnorm=2.9, loss_scale=1, train_wall=11, gb_free=2.8, wall=106328
2021-06-20 00:11:05 | INFO | train_inner | epoch 004: 271 / 3002 loss=2.502, ppl=5.66, wps=5780.6, ups=0.09, wpb=64810, bsz=128, num_updates=9219, lr=9.99342e-05, gnorm=2.14, loss_scale=1, train_wall=11, gb_free=2.8, wall=106339
2021-06-20 00:11:16 | INFO | train_inner | epoch 004: 272 / 3002 loss=2.555, ppl=5.88, wps=5881.2, ups=0.09, wpb=64822, bsz=128, num_updates=9220, lr=9.99342e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=106350
2021-06-20 00:11:27 | INFO | train_inner | epoch 004: 273 / 3002 loss=2.687, ppl=6.44, wps=5997.5, ups=0.09, wpb=64854, bsz=128, num_updates=9221, lr=9.99342e-05, gnorm=2.767, loss_scale=1, train_wall=10, gb_free=2.8, wall=106361
2021-06-20 00:11:38 | INFO | train_inner | epoch 004: 274 / 3002 loss=2.481, ppl=5.58, wps=5913.6, ups=0.09, wpb=64860, bsz=128, num_updates=9222, lr=9.99342e-05, gnorm=2.259, loss_scale=1, train_wall=11, gb_free=2.8, wall=106372
2021-06-20 00:11:49 | INFO | train_inner | epoch 004: 275 / 3002 loss=2.718, ppl=6.58, wps=5939.1, ups=0.09, wpb=64849, bsz=128, num_updates=9223, lr=9.99342e-05, gnorm=2.287, loss_scale=1, train_wall=10, gb_free=2.8, wall=106383
2021-06-20 00:12:00 | INFO | train_inner | epoch 004: 276 / 3002 loss=2.504, ppl=5.67, wps=5783.2, ups=0.09, wpb=64808, bsz=128, num_updates=9224, lr=9.99342e-05, gnorm=2.177, loss_scale=1, train_wall=11, gb_free=2.8, wall=106394
2021-06-20 00:12:11 | INFO | train_inner | epoch 004: 277 / 3002 loss=2.662, ppl=6.33, wps=5905.8, ups=0.09, wpb=64814, bsz=128, num_updates=9225, lr=9.99342e-05, gnorm=5.048, loss_scale=1, train_wall=11, gb_free=2.8, wall=106405
2021-06-20 00:12:22 | INFO | train_inner | epoch 004: 278 / 3002 loss=2.63, ppl=6.19, wps=5879.4, ups=0.09, wpb=64889, bsz=128, num_updates=9226, lr=9.99342e-05, gnorm=2.088, loss_scale=1, train_wall=11, gb_free=2.8, wall=106416
2021-06-20 00:12:33 | INFO | train_inner | epoch 004: 279 / 3002 loss=2.489, ppl=5.61, wps=5839.4, ups=0.09, wpb=64875, bsz=128, num_updates=9227, lr=9.99342e-05, gnorm=2.123, loss_scale=1, train_wall=11, gb_free=2.8, wall=106427
2021-06-20 00:12:44 | INFO | train_inner | epoch 004: 280 / 3002 loss=2.533, ppl=5.79, wps=5813.8, ups=0.09, wpb=64821, bsz=128, num_updates=9228, lr=9.99342e-05, gnorm=2.236, loss_scale=1, train_wall=11, gb_free=2.8, wall=106439
2021-06-20 00:12:55 | INFO | train_inner | epoch 004: 281 / 3002 loss=2.713, ppl=6.56, wps=5769.3, ups=0.09, wpb=64823, bsz=128, num_updates=9229, lr=9.99342e-05, gnorm=2.226, loss_scale=1, train_wall=11, gb_free=2.8, wall=106450
2021-06-20 00:13:06 | INFO | train_inner | epoch 004: 282 / 3002 loss=2.691, ppl=6.46, wps=5891.9, ups=0.09, wpb=64861, bsz=128, num_updates=9230, lr=9.99342e-05, gnorm=2.17, loss_scale=1, train_wall=11, gb_free=2.8, wall=106461
2021-06-20 00:13:18 | INFO | train_inner | epoch 004: 283 / 3002 loss=2.637, ppl=6.22, wps=5771.2, ups=0.09, wpb=64895, bsz=128, num_updates=9231, lr=9.99341e-05, gnorm=2.309, loss_scale=1, train_wall=11, gb_free=2.8, wall=106472
2021-06-20 00:13:29 | INFO | train_inner | epoch 004: 284 / 3002 loss=2.497, ppl=5.64, wps=5976, ups=0.09, wpb=64936, bsz=128, num_updates=9232, lr=9.99341e-05, gnorm=2.154, loss_scale=1, train_wall=10, gb_free=2.8, wall=106483
2021-06-20 00:13:40 | INFO | train_inner | epoch 004: 285 / 3002 loss=2.727, ppl=6.62, wps=5872.2, ups=0.09, wpb=64827, bsz=128, num_updates=9233, lr=9.99341e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=106494
2021-06-20 00:13:51 | INFO | train_inner | epoch 004: 286 / 3002 loss=2.512, ppl=5.7, wps=5763, ups=0.09, wpb=64817, bsz=128, num_updates=9234, lr=9.99341e-05, gnorm=2.266, loss_scale=1, train_wall=11, gb_free=2.8, wall=106505
2021-06-20 00:14:02 | INFO | train_inner | epoch 004: 287 / 3002 loss=2.508, ppl=5.69, wps=5867.8, ups=0.09, wpb=64824, bsz=128, num_updates=9235, lr=9.99341e-05, gnorm=3.325, loss_scale=1, train_wall=11, gb_free=2.8, wall=106516
2021-06-20 00:14:13 | INFO | train_inner | epoch 004: 288 / 3002 loss=2.57, ppl=5.94, wps=5797.9, ups=0.09, wpb=64840, bsz=128, num_updates=9236, lr=9.99341e-05, gnorm=2.412, loss_scale=1, train_wall=11, gb_free=2.8, wall=106527
2021-06-20 00:14:24 | INFO | train_inner | epoch 004: 289 / 3002 loss=2.574, ppl=5.95, wps=5911.9, ups=0.09, wpb=64810, bsz=128, num_updates=9237, lr=9.99341e-05, gnorm=2.231, loss_scale=1, train_wall=11, gb_free=2.8, wall=106538
2021-06-20 00:14:35 | INFO | train_inner | epoch 004: 290 / 3002 loss=2.751, ppl=6.73, wps=5894.2, ups=0.09, wpb=64847, bsz=128, num_updates=9238, lr=9.99341e-05, gnorm=2.229, loss_scale=1, train_wall=11, gb_free=2.8, wall=106549
2021-06-20 00:14:46 | INFO | train_inner | epoch 004: 291 / 3002 loss=2.647, ppl=6.26, wps=5860.9, ups=0.09, wpb=64823, bsz=128, num_updates=9239, lr=9.99341e-05, gnorm=2.318, loss_scale=1, train_wall=11, gb_free=2.8, wall=106560
2021-06-20 00:14:57 | INFO | train_inner | epoch 004: 292 / 3002 loss=2.548, ppl=5.85, wps=5774.7, ups=0.09, wpb=64723, bsz=128, num_updates=9240, lr=9.99341e-05, gnorm=2.073, loss_scale=1, train_wall=11, gb_free=2.8, wall=106572
2021-06-20 00:15:08 | INFO | train_inner | epoch 004: 293 / 3002 loss=2.507, ppl=5.68, wps=5886.4, ups=0.09, wpb=64795, bsz=128, num_updates=9241, lr=9.99341e-05, gnorm=3.227, loss_scale=1, train_wall=11, gb_free=2.8, wall=106583
2021-06-20 00:15:19 | INFO | train_inner | epoch 004: 294 / 3002 loss=2.623, ppl=6.16, wps=5873.8, ups=0.09, wpb=64892, bsz=128, num_updates=9242, lr=9.99341e-05, gnorm=2.615, loss_scale=1, train_wall=11, gb_free=2.8, wall=106594
2021-06-20 00:15:30 | INFO | train_inner | epoch 004: 295 / 3002 loss=2.721, ppl=6.59, wps=5838, ups=0.09, wpb=64911, bsz=128, num_updates=9243, lr=9.99341e-05, gnorm=2.33, loss_scale=1, train_wall=11, gb_free=2.8, wall=106605
2021-06-20 00:15:42 | INFO | train_inner | epoch 004: 296 / 3002 loss=2.611, ppl=6.11, wps=5845.6, ups=0.09, wpb=64843, bsz=128, num_updates=9244, lr=9.9934e-05, gnorm=2.386, loss_scale=1, train_wall=11, gb_free=2.8, wall=106616
2021-06-20 00:15:53 | INFO | train_inner | epoch 004: 297 / 3002 loss=2.581, ppl=5.98, wps=5851.2, ups=0.09, wpb=64800, bsz=128, num_updates=9245, lr=9.9934e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=106627
2021-06-20 00:16:04 | INFO | train_inner | epoch 004: 298 / 3002 loss=2.606, ppl=6.09, wps=5884.7, ups=0.09, wpb=64817, bsz=128, num_updates=9246, lr=9.9934e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=106638
2021-06-20 00:16:15 | INFO | train_inner | epoch 004: 299 / 3002 loss=2.737, ppl=6.67, wps=5814.6, ups=0.09, wpb=64812, bsz=128, num_updates=9247, lr=9.9934e-05, gnorm=2.096, loss_scale=1, train_wall=11, gb_free=2.8, wall=106649
2021-06-20 00:16:26 | INFO | train_inner | epoch 004: 300 / 3002 loss=2.782, ppl=6.88, wps=5983.1, ups=0.09, wpb=64857, bsz=128, num_updates=9248, lr=9.9934e-05, gnorm=2.222, loss_scale=1, train_wall=10, gb_free=2.8, wall=106660
2021-06-20 00:16:37 | INFO | train_inner | epoch 004: 301 / 3002 loss=2.599, ppl=6.06, wps=5842.6, ups=0.09, wpb=64831, bsz=128, num_updates=9249, lr=9.9934e-05, gnorm=2.13, loss_scale=1, train_wall=11, gb_free=2.8, wall=106671
2021-06-20 00:16:48 | INFO | train_inner | epoch 004: 302 / 3002 loss=2.601, ppl=6.07, wps=5802.6, ups=0.09, wpb=64789, bsz=128, num_updates=9250, lr=9.9934e-05, gnorm=2.195, loss_scale=1, train_wall=11, gb_free=2.8, wall=106682
2021-06-20 00:16:59 | INFO | train_inner | epoch 004: 303 / 3002 loss=2.703, ppl=6.51, wps=5937.5, ups=0.09, wpb=64758, bsz=128, num_updates=9251, lr=9.9934e-05, gnorm=2.878, loss_scale=1, train_wall=10, gb_free=2.8, wall=106693
2021-06-20 00:17:10 | INFO | train_inner | epoch 004: 304 / 3002 loss=2.566, ppl=5.92, wps=5866.9, ups=0.09, wpb=64889, bsz=128, num_updates=9252, lr=9.9934e-05, gnorm=2.275, loss_scale=1, train_wall=11, gb_free=2.8, wall=106704
2021-06-20 00:17:21 | INFO | train_inner | epoch 004: 305 / 3002 loss=2.626, ppl=6.17, wps=5957.6, ups=0.09, wpb=64819, bsz=128, num_updates=9253, lr=9.9934e-05, gnorm=2.153, loss_scale=1, train_wall=10, gb_free=2.8, wall=106715
2021-06-20 00:17:32 | INFO | train_inner | epoch 004: 306 / 3002 loss=2.642, ppl=6.24, wps=5949.1, ups=0.09, wpb=64818, bsz=128, num_updates=9254, lr=9.9934e-05, gnorm=2.195, loss_scale=1, train_wall=10, gb_free=2.8, wall=106726
2021-06-20 00:17:43 | INFO | train_inner | epoch 004: 307 / 3002 loss=2.438, ppl=5.42, wps=5854.7, ups=0.09, wpb=64808, bsz=128, num_updates=9255, lr=9.9934e-05, gnorm=2.094, loss_scale=1, train_wall=11, gb_free=2.8, wall=106737
2021-06-20 00:17:54 | INFO | train_inner | epoch 004: 308 / 3002 loss=2.673, ppl=6.38, wps=5931.7, ups=0.09, wpb=64860, bsz=128, num_updates=9256, lr=9.99339e-05, gnorm=2.033, loss_scale=1, train_wall=10, gb_free=2.8, wall=106748
2021-06-20 00:18:05 | INFO | train_inner | epoch 004: 309 / 3002 loss=2.583, ppl=5.99, wps=5875.8, ups=0.09, wpb=64804, bsz=128, num_updates=9257, lr=9.99339e-05, gnorm=2.152, loss_scale=1, train_wall=11, gb_free=2.8, wall=106759
2021-06-20 00:18:16 | INFO | train_inner | epoch 004: 310 / 3002 loss=2.573, ppl=5.95, wps=5961.6, ups=0.09, wpb=64763, bsz=128, num_updates=9258, lr=9.99339e-05, gnorm=2.224, loss_scale=1, train_wall=10, gb_free=2.8, wall=106770
2021-06-20 00:18:26 | INFO | train_inner | epoch 004: 311 / 3002 loss=2.657, ppl=6.31, wps=5906.2, ups=0.09, wpb=64787, bsz=128, num_updates=9259, lr=9.99339e-05, gnorm=2.082, loss_scale=1, train_wall=10, gb_free=2.8, wall=106781
2021-06-20 00:18:38 | INFO | train_inner | epoch 004: 312 / 3002 loss=2.649, ppl=6.27, wps=5794, ups=0.09, wpb=64861, bsz=128, num_updates=9260, lr=9.99339e-05, gnorm=3.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=106792
2021-06-20 00:18:49 | INFO | train_inner | epoch 004: 313 / 3002 loss=2.756, ppl=6.76, wps=5751.8, ups=0.09, wpb=64853, bsz=128, num_updates=9261, lr=9.99339e-05, gnorm=2.28, loss_scale=1, train_wall=11, gb_free=2.8, wall=106803
2021-06-20 00:19:00 | INFO | train_inner | epoch 004: 314 / 3002 loss=2.802, ppl=6.97, wps=5800.6, ups=0.09, wpb=64796, bsz=128, num_updates=9262, lr=9.99339e-05, gnorm=2.51, loss_scale=1, train_wall=11, gb_free=2.8, wall=106814
2021-06-20 00:19:11 | INFO | train_inner | epoch 004: 315 / 3002 loss=2.686, ppl=6.43, wps=5782.1, ups=0.09, wpb=64896, bsz=128, num_updates=9263, lr=9.99339e-05, gnorm=2.225, loss_scale=1, train_wall=11, gb_free=2.8, wall=106826
2021-06-20 00:19:22 | INFO | train_inner | epoch 004: 316 / 3002 loss=2.601, ppl=6.07, wps=5823.6, ups=0.09, wpb=64843, bsz=128, num_updates=9264, lr=9.99339e-05, gnorm=2.935, loss_scale=1, train_wall=11, gb_free=2.8, wall=106837
2021-06-20 00:19:33 | INFO | train_inner | epoch 004: 317 / 3002 loss=2.414, ppl=5.33, wps=5899.3, ups=0.09, wpb=64813, bsz=128, num_updates=9265, lr=9.99339e-05, gnorm=2.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=106848
2021-06-20 00:19:45 | INFO | train_inner | epoch 004: 318 / 3002 loss=2.496, ppl=5.64, wps=5846.7, ups=0.09, wpb=64796, bsz=128, num_updates=9266, lr=9.99339e-05, gnorm=15.59, loss_scale=1, train_wall=11, gb_free=2.8, wall=106859
2021-06-20 00:19:56 | INFO | train_inner | epoch 004: 319 / 3002 loss=2.582, ppl=5.99, wps=5844.5, ups=0.09, wpb=64833, bsz=128, num_updates=9267, lr=9.99339e-05, gnorm=2.537, loss_scale=1, train_wall=11, gb_free=2.8, wall=106870
2021-06-20 00:20:07 | INFO | train_inner | epoch 004: 320 / 3002 loss=2.781, ppl=6.87, wps=5857.1, ups=0.09, wpb=64792, bsz=128, num_updates=9268, lr=9.99339e-05, gnorm=5.251, loss_scale=1, train_wall=11, gb_free=2.8, wall=106881
2021-06-20 00:20:18 | INFO | train_inner | epoch 004: 321 / 3002 loss=2.766, ppl=6.8, wps=5917.4, ups=0.09, wpb=64777, bsz=128, num_updates=9269, lr=9.99338e-05, gnorm=2.287, loss_scale=1, train_wall=10, gb_free=2.8, wall=106892
2021-06-20 00:20:29 | INFO | train_inner | epoch 004: 322 / 3002 loss=2.646, ppl=6.26, wps=5814.4, ups=0.09, wpb=64860, bsz=128, num_updates=9270, lr=9.99338e-05, gnorm=2.685, loss_scale=1, train_wall=11, gb_free=2.8, wall=106903
2021-06-20 00:20:40 | INFO | train_inner | epoch 004: 323 / 3002 loss=2.581, ppl=5.99, wps=5862.4, ups=0.09, wpb=64809, bsz=128, num_updates=9271, lr=9.99338e-05, gnorm=2.499, loss_scale=1, train_wall=11, gb_free=2.8, wall=106914
2021-06-20 00:20:51 | INFO | train_inner | epoch 004: 324 / 3002 loss=2.537, ppl=5.8, wps=5926.2, ups=0.09, wpb=64937, bsz=128, num_updates=9272, lr=9.99338e-05, gnorm=2.517, loss_scale=1, train_wall=10, gb_free=2.8, wall=106925
2021-06-20 00:21:02 | INFO | train_inner | epoch 004: 325 / 3002 loss=2.71, ppl=6.55, wps=5845.4, ups=0.09, wpb=64833, bsz=128, num_updates=9273, lr=9.99338e-05, gnorm=21.364, loss_scale=1, train_wall=11, gb_free=2.8, wall=106936
2021-06-20 00:21:13 | INFO | train_inner | epoch 004: 326 / 3002 loss=2.727, ppl=6.62, wps=5846.4, ups=0.09, wpb=64849, bsz=128, num_updates=9274, lr=9.99338e-05, gnorm=2.289, loss_scale=1, train_wall=11, gb_free=2.8, wall=106947
2021-06-20 00:21:24 | INFO | train_inner | epoch 004: 327 / 3002 loss=2.655, ppl=6.3, wps=5838.5, ups=0.09, wpb=64854, bsz=128, num_updates=9275, lr=9.99338e-05, gnorm=4.001, loss_scale=1, train_wall=11, gb_free=2.8, wall=106958
2021-06-20 00:21:35 | INFO | train_inner | epoch 004: 328 / 3002 loss=2.563, ppl=5.91, wps=5892.5, ups=0.09, wpb=64855, bsz=128, num_updates=9276, lr=9.99338e-05, gnorm=2.384, loss_scale=1, train_wall=11, gb_free=2.8, wall=106969
2021-06-20 00:21:46 | INFO | train_inner | epoch 004: 329 / 3002 loss=2.822, ppl=7.07, wps=5871.6, ups=0.09, wpb=64815, bsz=128, num_updates=9277, lr=9.99338e-05, gnorm=3.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=106981
2021-06-20 00:21:57 | INFO | train_inner | epoch 004: 330 / 3002 loss=2.505, ppl=5.68, wps=5881.6, ups=0.09, wpb=64882, bsz=128, num_updates=9278, lr=9.99338e-05, gnorm=2.454, loss_scale=1, train_wall=11, gb_free=2.8, wall=106992
2021-06-20 00:22:08 | INFO | train_inner | epoch 004: 331 / 3002 loss=2.556, ppl=5.88, wps=5892.8, ups=0.09, wpb=64895, bsz=128, num_updates=9279, lr=9.99338e-05, gnorm=7.276, loss_scale=1, train_wall=11, gb_free=2.8, wall=107003
2021-06-20 00:22:19 | INFO | train_inner | epoch 004: 332 / 3002 loss=2.674, ppl=6.38, wps=5881, ups=0.09, wpb=64760, bsz=128, num_updates=9280, lr=9.99338e-05, gnorm=2.383, loss_scale=1, train_wall=11, gb_free=2.8, wall=107014
2021-06-20 00:22:30 | INFO | train_inner | epoch 004: 333 / 3002 loss=2.772, ppl=6.83, wps=5910.3, ups=0.09, wpb=64662, bsz=128, num_updates=9281, lr=9.99337e-05, gnorm=2.817, loss_scale=1, train_wall=10, gb_free=2.8, wall=107025
2021-06-20 00:22:41 | INFO | train_inner | epoch 004: 334 / 3002 loss=2.514, ppl=5.71, wps=5811.7, ups=0.09, wpb=64817, bsz=128, num_updates=9282, lr=9.99337e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=107036
2021-06-20 00:22:52 | INFO | train_inner | epoch 004: 335 / 3002 loss=2.629, ppl=6.18, wps=5853.6, ups=0.09, wpb=64889, bsz=128, num_updates=9283, lr=9.99337e-05, gnorm=2.326, loss_scale=1, train_wall=11, gb_free=2.8, wall=107047
2021-06-20 00:23:04 | INFO | train_inner | epoch 004: 336 / 3002 loss=2.67, ppl=6.36, wps=5810.6, ups=0.09, wpb=64805, bsz=128, num_updates=9284, lr=9.99337e-05, gnorm=2.533, loss_scale=1, train_wall=11, gb_free=2.8, wall=107058
2021-06-20 00:23:15 | INFO | train_inner | epoch 004: 337 / 3002 loss=2.684, ppl=6.43, wps=5917.5, ups=0.09, wpb=64818, bsz=128, num_updates=9285, lr=9.99337e-05, gnorm=2.359, loss_scale=1, train_wall=10, gb_free=2.8, wall=107069
2021-06-20 00:23:26 | INFO | train_inner | epoch 004: 338 / 3002 loss=2.512, ppl=5.7, wps=5848.6, ups=0.09, wpb=64841, bsz=128, num_updates=9286, lr=9.99337e-05, gnorm=2.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=107080
2021-06-20 00:23:37 | INFO | train_inner | epoch 004: 339 / 3002 loss=2.568, ppl=5.93, wps=5906.9, ups=0.09, wpb=64837, bsz=128, num_updates=9287, lr=9.99337e-05, gnorm=2.309, loss_scale=1, train_wall=11, gb_free=2.8, wall=107091
2021-06-20 00:23:48 | INFO | train_inner | epoch 004: 340 / 3002 loss=2.721, ppl=6.59, wps=5847.7, ups=0.09, wpb=64871, bsz=128, num_updates=9288, lr=9.99337e-05, gnorm=2.432, loss_scale=1, train_wall=11, gb_free=2.8, wall=107102
2021-06-20 00:23:59 | INFO | train_inner | epoch 004: 341 / 3002 loss=2.764, ppl=6.79, wps=5802.9, ups=0.09, wpb=64777, bsz=128, num_updates=9289, lr=9.99337e-05, gnorm=2.156, loss_scale=1, train_wall=11, gb_free=2.8, wall=107113
2021-06-20 00:24:10 | INFO | train_inner | epoch 004: 342 / 3002 loss=2.529, ppl=5.77, wps=5786.4, ups=0.09, wpb=64905, bsz=128, num_updates=9290, lr=9.99337e-05, gnorm=25.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=107124
2021-06-20 00:24:21 | INFO | train_inner | epoch 004: 343 / 3002 loss=2.655, ppl=6.3, wps=5892.1, ups=0.09, wpb=64910, bsz=128, num_updates=9291, lr=9.99337e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=107135
2021-06-20 00:24:32 | INFO | train_inner | epoch 004: 344 / 3002 loss=2.686, ppl=6.43, wps=5845.7, ups=0.09, wpb=64852, bsz=128, num_updates=9292, lr=9.99337e-05, gnorm=4.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=107147
2021-06-20 00:24:43 | INFO | train_inner | epoch 004: 345 / 3002 loss=2.425, ppl=5.37, wps=5947.3, ups=0.09, wpb=64865, bsz=128, num_updates=9293, lr=9.99337e-05, gnorm=2.792, loss_scale=1, train_wall=10, gb_free=2.8, wall=107157
2021-06-20 00:24:54 | INFO | train_inner | epoch 004: 346 / 3002 loss=2.696, ppl=6.48, wps=5832, ups=0.09, wpb=64831, bsz=128, num_updates=9294, lr=9.99336e-05, gnorm=2.638, loss_scale=1, train_wall=11, gb_free=2.8, wall=107169
2021-06-20 00:25:05 | INFO | train_inner | epoch 004: 347 / 3002 loss=2.562, ppl=5.9, wps=5893.1, ups=0.09, wpb=64902, bsz=128, num_updates=9295, lr=9.99336e-05, gnorm=3.358, loss_scale=1, train_wall=11, gb_free=2.8, wall=107180
2021-06-20 00:25:16 | INFO | train_inner | epoch 004: 348 / 3002 loss=2.643, ppl=6.25, wps=5867, ups=0.09, wpb=64768, bsz=128, num_updates=9296, lr=9.99336e-05, gnorm=4.069, loss_scale=1, train_wall=11, gb_free=2.8, wall=107191
2021-06-20 00:25:27 | INFO | train_inner | epoch 004: 349 / 3002 loss=2.813, ppl=7.03, wps=5893.6, ups=0.09, wpb=64849, bsz=128, num_updates=9297, lr=9.99336e-05, gnorm=2.208, loss_scale=1, train_wall=11, gb_free=2.8, wall=107202
2021-06-20 00:25:38 | INFO | train_inner | epoch 004: 350 / 3002 loss=2.674, ppl=6.38, wps=5892.2, ups=0.09, wpb=64910, bsz=128, num_updates=9298, lr=9.99336e-05, gnorm=5.482, loss_scale=1, train_wall=11, gb_free=2.8, wall=107213
2021-06-20 00:25:49 | INFO | train_inner | epoch 004: 351 / 3002 loss=2.564, ppl=5.92, wps=5801.4, ups=0.09, wpb=64867, bsz=128, num_updates=9299, lr=9.99336e-05, gnorm=2.459, loss_scale=1, train_wall=11, gb_free=2.8, wall=107224
2021-06-20 00:26:00 | INFO | train_inner | epoch 004: 352 / 3002 loss=2.606, ppl=6.09, wps=5870.9, ups=0.09, wpb=64824, bsz=128, num_updates=9300, lr=9.99336e-05, gnorm=2.322, loss_scale=1, train_wall=11, gb_free=2.8, wall=107235
2021-06-20 00:26:11 | INFO | train_inner | epoch 004: 353 / 3002 loss=2.62, ppl=6.15, wps=5966.7, ups=0.09, wpb=64836, bsz=128, num_updates=9301, lr=9.99336e-05, gnorm=2.191, loss_scale=1, train_wall=10, gb_free=2.8, wall=107246
2021-06-20 00:26:22 | INFO | train_inner | epoch 004: 354 / 3002 loss=2.871, ppl=7.32, wps=5808.5, ups=0.09, wpb=64715, bsz=128, num_updates=9302, lr=9.99336e-05, gnorm=2.916, loss_scale=2, train_wall=11, gb_free=2.8, wall=107257
2021-06-20 00:26:34 | INFO | train_inner | epoch 004: 355 / 3002 loss=2.696, ppl=6.48, wps=5830.8, ups=0.09, wpb=64916, bsz=128, num_updates=9303, lr=9.99336e-05, gnorm=2.359, loss_scale=2, train_wall=11, gb_free=2.8, wall=107268
2021-06-20 00:26:45 | INFO | train_inner | epoch 004: 356 / 3002 loss=2.626, ppl=6.17, wps=5906.1, ups=0.09, wpb=64891, bsz=128, num_updates=9304, lr=9.99336e-05, gnorm=2.282, loss_scale=2, train_wall=11, gb_free=2.8, wall=107279
2021-06-20 00:26:56 | INFO | train_inner | epoch 004: 357 / 3002 loss=2.704, ppl=6.51, wps=5823.7, ups=0.09, wpb=64894, bsz=128, num_updates=9305, lr=9.99336e-05, gnorm=2.428, loss_scale=2, train_wall=11, gb_free=2.8, wall=107290
2021-06-20 00:27:07 | INFO | train_inner | epoch 004: 358 / 3002 loss=2.682, ppl=6.42, wps=5765.2, ups=0.09, wpb=64845, bsz=128, num_updates=9306, lr=9.99335e-05, gnorm=5.246, loss_scale=2, train_wall=11, gb_free=2.8, wall=107301
2021-06-20 00:27:18 | INFO | train_inner | epoch 004: 359 / 3002 loss=2.491, ppl=5.62, wps=5848, ups=0.09, wpb=64836, bsz=128, num_updates=9307, lr=9.99335e-05, gnorm=2.689, loss_scale=2, train_wall=11, gb_free=2.8, wall=107312
2021-06-20 00:27:29 | INFO | train_inner | epoch 004: 360 / 3002 loss=2.553, ppl=5.87, wps=5795.7, ups=0.09, wpb=64818, bsz=128, num_updates=9308, lr=9.99335e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=107324
2021-06-20 00:27:40 | INFO | train_inner | epoch 004: 361 / 3002 loss=2.65, ppl=6.28, wps=5802.2, ups=0.09, wpb=64831, bsz=128, num_updates=9309, lr=9.99335e-05, gnorm=3.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=107335
2021-06-20 00:27:51 | INFO | train_inner | epoch 004: 362 / 3002 loss=2.657, ppl=6.31, wps=5940.6, ups=0.09, wpb=64802, bsz=128, num_updates=9310, lr=9.99335e-05, gnorm=2.641, loss_scale=2, train_wall=10, gb_free=2.8, wall=107346
2021-06-20 00:28:02 | INFO | train_inner | epoch 004: 363 / 3002 loss=2.698, ppl=6.49, wps=5890.8, ups=0.09, wpb=64851, bsz=128, num_updates=9311, lr=9.99335e-05, gnorm=2.84, loss_scale=2, train_wall=11, gb_free=2.8, wall=107357
2021-06-20 00:28:13 | INFO | train_inner | epoch 004: 364 / 3002 loss=2.768, ppl=6.81, wps=5880.2, ups=0.09, wpb=64812, bsz=128, num_updates=9312, lr=9.99335e-05, gnorm=3.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=107368
2021-06-20 00:28:24 | INFO | train_inner | epoch 004: 365 / 3002 loss=2.666, ppl=6.35, wps=5942.6, ups=0.09, wpb=64775, bsz=128, num_updates=9313, lr=9.99335e-05, gnorm=20.947, loss_scale=2, train_wall=10, gb_free=2.8, wall=107379
2021-06-20 00:28:35 | INFO | train_inner | epoch 004: 366 / 3002 loss=2.624, ppl=6.17, wps=5954.5, ups=0.09, wpb=64891, bsz=128, num_updates=9314, lr=9.99335e-05, gnorm=3.057, loss_scale=2, train_wall=10, gb_free=2.8, wall=107390
2021-06-20 00:28:46 | INFO | train_inner | epoch 004: 367 / 3002 loss=2.679, ppl=6.4, wps=5867.6, ups=0.09, wpb=64832, bsz=128, num_updates=9315, lr=9.99335e-05, gnorm=2.472, loss_scale=2, train_wall=11, gb_free=2.8, wall=107401
2021-06-20 00:28:57 | INFO | train_inner | epoch 004: 368 / 3002 loss=2.609, ppl=6.1, wps=5846.5, ups=0.09, wpb=64773, bsz=128, num_updates=9316, lr=9.99335e-05, gnorm=2.887, loss_scale=2, train_wall=11, gb_free=2.8, wall=107412
2021-06-20 00:29:08 | INFO | train_inner | epoch 004: 369 / 3002 loss=2.513, ppl=5.71, wps=5975.2, ups=0.09, wpb=64882, bsz=128, num_updates=9317, lr=9.99335e-05, gnorm=2.295, loss_scale=2, train_wall=10, gb_free=2.8, wall=107423
2021-06-20 00:29:19 | INFO | train_inner | epoch 004: 370 / 3002 loss=2.725, ppl=6.61, wps=5820.6, ups=0.09, wpb=64755, bsz=128, num_updates=9318, lr=9.99335e-05, gnorm=2.659, loss_scale=2, train_wall=11, gb_free=2.8, wall=107434
2021-06-20 00:29:31 | INFO | train_inner | epoch 004: 371 / 3002 loss=2.763, ppl=6.79, wps=5780.2, ups=0.09, wpb=64846, bsz=128, num_updates=9319, lr=9.99334e-05, gnorm=5.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=107445
2021-06-20 00:29:42 | INFO | train_inner | epoch 004: 372 / 3002 loss=2.594, ppl=6.04, wps=5734.8, ups=0.09, wpb=64750, bsz=128, num_updates=9320, lr=9.99334e-05, gnorm=13.491, loss_scale=2, train_wall=11, gb_free=2.8, wall=107456
2021-06-20 00:29:53 | INFO | train_inner | epoch 004: 373 / 3002 loss=2.78, ppl=6.87, wps=5857.3, ups=0.09, wpb=64851, bsz=128, num_updates=9321, lr=9.99334e-05, gnorm=2.396, loss_scale=2, train_wall=11, gb_free=2.8, wall=107467
2021-06-20 00:30:04 | INFO | train_inner | epoch 004: 374 / 3002 loss=2.732, ppl=6.64, wps=5918.3, ups=0.09, wpb=64815, bsz=128, num_updates=9322, lr=9.99334e-05, gnorm=3.121, loss_scale=2, train_wall=10, gb_free=2.8, wall=107478
2021-06-20 00:30:15 | INFO | train_inner | epoch 004: 375 / 3002 loss=2.679, ppl=6.4, wps=5930.8, ups=0.09, wpb=64831, bsz=128, num_updates=9323, lr=9.99334e-05, gnorm=2.283, loss_scale=2, train_wall=10, gb_free=2.8, wall=107489
2021-06-20 00:30:26 | INFO | train_inner | epoch 004: 376 / 3002 loss=2.607, ppl=6.09, wps=5785.4, ups=0.09, wpb=64838, bsz=128, num_updates=9324, lr=9.99334e-05, gnorm=2.979, loss_scale=2, train_wall=11, gb_free=2.8, wall=107500
2021-06-20 00:30:37 | INFO | train_inner | epoch 004: 377 / 3002 loss=2.781, ppl=6.88, wps=5758, ups=0.09, wpb=64804, bsz=128, num_updates=9325, lr=9.99334e-05, gnorm=2.377, loss_scale=2, train_wall=11, gb_free=2.8, wall=107512
2021-06-20 00:30:48 | INFO | train_inner | epoch 004: 378 / 3002 loss=2.734, ppl=6.65, wps=5805.4, ups=0.09, wpb=64732, bsz=128, num_updates=9326, lr=9.99334e-05, gnorm=2.322, loss_scale=2, train_wall=11, gb_free=2.8, wall=107523
2021-06-20 00:30:59 | INFO | train_inner | epoch 004: 379 / 3002 loss=2.553, ppl=5.87, wps=5849.7, ups=0.09, wpb=64810, bsz=128, num_updates=9327, lr=9.99334e-05, gnorm=2.589, loss_scale=2, train_wall=11, gb_free=2.8, wall=107534
2021-06-20 00:31:11 | INFO | train_inner | epoch 004: 380 / 3002 loss=2.606, ppl=6.09, wps=5807.3, ups=0.09, wpb=64868, bsz=128, num_updates=9328, lr=9.99334e-05, gnorm=2.242, loss_scale=2, train_wall=11, gb_free=2.8, wall=107545
2021-06-20 00:31:22 | INFO | train_inner | epoch 004: 381 / 3002 loss=2.737, ppl=6.67, wps=5839.6, ups=0.09, wpb=64764, bsz=128, num_updates=9329, lr=9.99334e-05, gnorm=2.244, loss_scale=2, train_wall=11, gb_free=2.8, wall=107556
2021-06-20 00:31:33 | INFO | train_inner | epoch 004: 382 / 3002 loss=2.554, ppl=5.87, wps=5841.2, ups=0.09, wpb=64780, bsz=128, num_updates=9330, lr=9.99334e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=107567
2021-06-20 00:31:44 | INFO | train_inner | epoch 004: 383 / 3002 loss=2.58, ppl=5.98, wps=5936.2, ups=0.09, wpb=64861, bsz=128, num_updates=9331, lr=9.99333e-05, gnorm=2.184, loss_scale=2, train_wall=10, gb_free=2.8, wall=107578
2021-06-20 00:31:55 | INFO | train_inner | epoch 004: 384 / 3002 loss=2.568, ppl=5.93, wps=5753.3, ups=0.09, wpb=64828, bsz=128, num_updates=9332, lr=9.99333e-05, gnorm=2.153, loss_scale=2, train_wall=11, gb_free=2.8, wall=107589
2021-06-20 00:32:06 | INFO | train_inner | epoch 004: 385 / 3002 loss=2.541, ppl=5.82, wps=5802, ups=0.09, wpb=64783, bsz=128, num_updates=9333, lr=9.99333e-05, gnorm=2.303, loss_scale=2, train_wall=11, gb_free=2.8, wall=107601
2021-06-20 00:32:17 | INFO | train_inner | epoch 004: 386 / 3002 loss=2.678, ppl=6.4, wps=5911, ups=0.09, wpb=64889, bsz=128, num_updates=9334, lr=9.99333e-05, gnorm=2.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=107611
2021-06-20 00:32:28 | INFO | train_inner | epoch 004: 387 / 3002 loss=2.755, ppl=6.75, wps=5855.9, ups=0.09, wpb=64795, bsz=128, num_updates=9335, lr=9.99333e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=107623
2021-06-20 00:32:39 | INFO | train_inner | epoch 004: 388 / 3002 loss=2.551, ppl=5.86, wps=5816.3, ups=0.09, wpb=64813, bsz=128, num_updates=9336, lr=9.99333e-05, gnorm=3.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=107634
2021-06-20 00:32:50 | INFO | train_inner | epoch 004: 389 / 3002 loss=2.436, ppl=5.41, wps=5844.6, ups=0.09, wpb=64766, bsz=128, num_updates=9337, lr=9.99333e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=107645
2021-06-20 00:33:02 | INFO | train_inner | epoch 004: 390 / 3002 loss=2.482, ppl=5.59, wps=5820.7, ups=0.09, wpb=64867, bsz=128, num_updates=9338, lr=9.99333e-05, gnorm=2.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=107656
2021-06-20 00:33:13 | INFO | train_inner | epoch 004: 391 / 3002 loss=2.581, ppl=5.98, wps=5916.7, ups=0.09, wpb=64862, bsz=128, num_updates=9339, lr=9.99333e-05, gnorm=2.453, loss_scale=2, train_wall=10, gb_free=2.8, wall=107667
2021-06-20 00:33:24 | INFO | train_inner | epoch 004: 392 / 3002 loss=2.475, ppl=5.56, wps=5902.5, ups=0.09, wpb=64813, bsz=128, num_updates=9340, lr=9.99333e-05, gnorm=2.32, loss_scale=2, train_wall=11, gb_free=2.8, wall=107678
2021-06-20 00:33:35 | INFO | train_inner | epoch 004: 393 / 3002 loss=2.53, ppl=5.77, wps=5745.3, ups=0.09, wpb=64864, bsz=128, num_updates=9341, lr=9.99333e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=107689
2021-06-20 00:33:46 | INFO | train_inner | epoch 004: 394 / 3002 loss=2.655, ppl=6.3, wps=5958.9, ups=0.09, wpb=64814, bsz=128, num_updates=9342, lr=9.99333e-05, gnorm=2.125, loss_scale=2, train_wall=10, gb_free=2.8, wall=107700
2021-06-20 00:33:57 | INFO | train_inner | epoch 004: 395 / 3002 loss=2.717, ppl=6.58, wps=5985.2, ups=0.09, wpb=64749, bsz=128, num_updates=9343, lr=9.99333e-05, gnorm=2.165, loss_scale=2, train_wall=10, gb_free=2.8, wall=107711
2021-06-20 00:34:08 | INFO | train_inner | epoch 004: 396 / 3002 loss=2.617, ppl=6.13, wps=5836.9, ups=0.09, wpb=64932, bsz=128, num_updates=9344, lr=9.99332e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=107722
2021-06-20 00:34:19 | INFO | train_inner | epoch 004: 397 / 3002 loss=2.828, ppl=7.1, wps=5849.8, ups=0.09, wpb=64849, bsz=128, num_updates=9345, lr=9.99332e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=107733
2021-06-20 00:34:30 | INFO | train_inner | epoch 004: 398 / 3002 loss=2.556, ppl=5.88, wps=5799.9, ups=0.09, wpb=64875, bsz=128, num_updates=9346, lr=9.99332e-05, gnorm=2.047, loss_scale=2, train_wall=11, gb_free=2.8, wall=107744
2021-06-20 00:34:41 | INFO | train_inner | epoch 004: 399 / 3002 loss=2.768, ppl=6.81, wps=5842, ups=0.09, wpb=64734, bsz=128, num_updates=9347, lr=9.99332e-05, gnorm=2.449, loss_scale=2, train_wall=11, gb_free=2.8, wall=107755
2021-06-20 00:34:52 | INFO | train_inner | epoch 004: 400 / 3002 loss=2.607, ppl=6.09, wps=5851.4, ups=0.09, wpb=64834, bsz=128, num_updates=9348, lr=9.99332e-05, gnorm=2.22, loss_scale=2, train_wall=11, gb_free=2.8, wall=107766
2021-06-20 00:35:03 | INFO | train_inner | epoch 004: 401 / 3002 loss=2.424, ppl=5.37, wps=5796.2, ups=0.09, wpb=64770, bsz=128, num_updates=9349, lr=9.99332e-05, gnorm=2.299, loss_scale=2, train_wall=11, gb_free=2.8, wall=107778
2021-06-20 00:35:14 | INFO | train_inner | epoch 004: 402 / 3002 loss=2.615, ppl=6.13, wps=5847.4, ups=0.09, wpb=64710, bsz=128, num_updates=9350, lr=9.99332e-05, gnorm=2.303, loss_scale=2, train_wall=11, gb_free=2.8, wall=107789
2021-06-20 00:35:25 | INFO | train_inner | epoch 004: 403 / 3002 loss=2.548, ppl=5.85, wps=5903, ups=0.09, wpb=64834, bsz=128, num_updates=9351, lr=9.99332e-05, gnorm=2.05, loss_scale=2, train_wall=11, gb_free=2.8, wall=107800
2021-06-20 00:35:36 | INFO | train_inner | epoch 004: 404 / 3002 loss=2.546, ppl=5.84, wps=5932.3, ups=0.09, wpb=64842, bsz=128, num_updates=9352, lr=9.99332e-05, gnorm=2.168, loss_scale=2, train_wall=10, gb_free=2.8, wall=107811
2021-06-20 00:35:47 | INFO | train_inner | epoch 004: 405 / 3002 loss=2.712, ppl=6.55, wps=5785.2, ups=0.09, wpb=64820, bsz=128, num_updates=9353, lr=9.99332e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=107822
2021-06-20 00:35:58 | INFO | train_inner | epoch 004: 406 / 3002 loss=2.583, ppl=5.99, wps=5903.1, ups=0.09, wpb=64868, bsz=128, num_updates=9354, lr=9.99332e-05, gnorm=2.206, loss_scale=2, train_wall=11, gb_free=2.8, wall=107833
2021-06-20 00:36:09 | INFO | train_inner | epoch 004: 407 / 3002 loss=2.532, ppl=5.78, wps=5969.4, ups=0.09, wpb=64887, bsz=128, num_updates=9355, lr=9.99332e-05, gnorm=2.12, loss_scale=2, train_wall=10, gb_free=2.8, wall=107844
2021-06-20 00:36:20 | INFO | train_inner | epoch 004: 408 / 3002 loss=2.549, ppl=5.85, wps=5861.1, ups=0.09, wpb=64852, bsz=128, num_updates=9356, lr=9.99331e-05, gnorm=2.281, loss_scale=2, train_wall=11, gb_free=2.8, wall=107855
2021-06-20 00:36:32 | INFO | train_inner | epoch 004: 409 / 3002 loss=2.643, ppl=6.25, wps=5784.2, ups=0.09, wpb=64830, bsz=128, num_updates=9357, lr=9.99331e-05, gnorm=2.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=107866
2021-06-20 00:36:43 | INFO | train_inner | epoch 004: 410 / 3002 loss=2.6, ppl=6.06, wps=5802, ups=0.09, wpb=64760, bsz=128, num_updates=9358, lr=9.99331e-05, gnorm=2.386, loss_scale=2, train_wall=11, gb_free=2.8, wall=107877
2021-06-20 00:36:54 | INFO | train_inner | epoch 004: 411 / 3002 loss=2.669, ppl=6.36, wps=5886.4, ups=0.09, wpb=64808, bsz=128, num_updates=9359, lr=9.99331e-05, gnorm=2.582, loss_scale=2, train_wall=11, gb_free=2.8, wall=107888
2021-06-20 00:37:05 | INFO | train_inner | epoch 004: 412 / 3002 loss=2.575, ppl=5.96, wps=5936.7, ups=0.09, wpb=64836, bsz=128, num_updates=9360, lr=9.99331e-05, gnorm=2.317, loss_scale=2, train_wall=10, gb_free=2.8, wall=107899
2021-06-20 00:37:16 | INFO | train_inner | epoch 004: 413 / 3002 loss=2.545, ppl=5.84, wps=5765.5, ups=0.09, wpb=64856, bsz=128, num_updates=9361, lr=9.99331e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=107910
2021-06-20 00:37:27 | INFO | train_inner | epoch 004: 414 / 3002 loss=2.574, ppl=5.96, wps=5969.7, ups=0.09, wpb=64817, bsz=128, num_updates=9362, lr=9.99331e-05, gnorm=5.134, loss_scale=2, train_wall=10, gb_free=2.8, wall=107921
2021-06-20 00:37:38 | INFO | train_inner | epoch 004: 415 / 3002 loss=2.566, ppl=5.92, wps=5823.1, ups=0.09, wpb=64859, bsz=128, num_updates=9363, lr=9.99331e-05, gnorm=2.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=107932
2021-06-20 00:37:49 | INFO | train_inner | epoch 004: 416 / 3002 loss=2.58, ppl=5.98, wps=5851.6, ups=0.09, wpb=64872, bsz=128, num_updates=9364, lr=9.99331e-05, gnorm=2.404, loss_scale=2, train_wall=11, gb_free=2.8, wall=107943
2021-06-20 00:38:00 | INFO | train_inner | epoch 004: 417 / 3002 loss=2.48, ppl=5.58, wps=5932.9, ups=0.09, wpb=64907, bsz=128, num_updates=9365, lr=9.99331e-05, gnorm=2.599, loss_scale=2, train_wall=11, gb_free=2.8, wall=107954
2021-06-20 00:38:11 | INFO | train_inner | epoch 004: 418 / 3002 loss=2.57, ppl=5.94, wps=5821.9, ups=0.09, wpb=64835, bsz=128, num_updates=9366, lr=9.99331e-05, gnorm=2.474, loss_scale=2, train_wall=11, gb_free=2.8, wall=107965
2021-06-20 00:38:22 | INFO | train_inner | epoch 004: 419 / 3002 loss=2.568, ppl=5.93, wps=5851.9, ups=0.09, wpb=64803, bsz=128, num_updates=9367, lr=9.99331e-05, gnorm=2.468, loss_scale=2, train_wall=11, gb_free=2.8, wall=107976
2021-06-20 00:38:33 | INFO | train_inner | epoch 004: 420 / 3002 loss=2.65, ppl=6.28, wps=5859.2, ups=0.09, wpb=64892, bsz=128, num_updates=9368, lr=9.99331e-05, gnorm=2.246, loss_scale=2, train_wall=11, gb_free=2.8, wall=107988
2021-06-20 00:38:44 | INFO | train_inner | epoch 004: 421 / 3002 loss=2.669, ppl=6.36, wps=5807.9, ups=0.09, wpb=64916, bsz=128, num_updates=9369, lr=9.9933e-05, gnorm=9.313, loss_scale=2, train_wall=11, gb_free=2.8, wall=107999
2021-06-20 00:38:55 | INFO | train_inner | epoch 004: 422 / 3002 loss=2.524, ppl=5.75, wps=5981, ups=0.09, wpb=64777, bsz=128, num_updates=9370, lr=9.9933e-05, gnorm=2.195, loss_scale=2, train_wall=10, gb_free=2.8, wall=108010
2021-06-20 00:39:06 | INFO | train_inner | epoch 004: 423 / 3002 loss=2.499, ppl=5.65, wps=5792.4, ups=0.09, wpb=64754, bsz=128, num_updates=9371, lr=9.9933e-05, gnorm=2.384, loss_scale=2, train_wall=11, gb_free=2.8, wall=108021
2021-06-20 00:39:17 | INFO | train_inner | epoch 004: 424 / 3002 loss=2.648, ppl=6.27, wps=5981.6, ups=0.09, wpb=64867, bsz=128, num_updates=9372, lr=9.9933e-05, gnorm=2.388, loss_scale=2, train_wall=10, gb_free=2.8, wall=108032
2021-06-20 00:39:28 | INFO | train_inner | epoch 004: 425 / 3002 loss=2.585, ppl=6, wps=5762.3, ups=0.09, wpb=64784, bsz=128, num_updates=9373, lr=9.9933e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=108043
2021-06-20 00:39:39 | INFO | train_inner | epoch 004: 426 / 3002 loss=2.585, ppl=6, wps=5925.3, ups=0.09, wpb=64860, bsz=128, num_updates=9374, lr=9.9933e-05, gnorm=2.444, loss_scale=2, train_wall=10, gb_free=2.8, wall=108054
2021-06-20 00:39:50 | INFO | train_inner | epoch 004: 427 / 3002 loss=2.395, ppl=5.26, wps=5922.1, ups=0.09, wpb=64877, bsz=128, num_updates=9375, lr=9.9933e-05, gnorm=3.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=108065
2021-06-20 00:40:01 | INFO | train_inner | epoch 004: 428 / 3002 loss=2.66, ppl=6.32, wps=5870.3, ups=0.09, wpb=64830, bsz=128, num_updates=9376, lr=9.9933e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=108076
2021-06-20 00:40:13 | INFO | train_inner | epoch 004: 429 / 3002 loss=2.678, ppl=6.4, wps=5811.5, ups=0.09, wpb=64812, bsz=128, num_updates=9377, lr=9.9933e-05, gnorm=2.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=108087
2021-06-20 00:40:24 | INFO | train_inner | epoch 004: 430 / 3002 loss=2.65, ppl=6.28, wps=5832, ups=0.09, wpb=64856, bsz=128, num_updates=9378, lr=9.9933e-05, gnorm=2.148, loss_scale=2, train_wall=11, gb_free=2.8, wall=108098
2021-06-20 00:40:35 | INFO | train_inner | epoch 004: 431 / 3002 loss=2.737, ppl=6.67, wps=5788.4, ups=0.09, wpb=64766, bsz=128, num_updates=9379, lr=9.9933e-05, gnorm=6.462, loss_scale=2, train_wall=11, gb_free=2.8, wall=108109
2021-06-20 00:40:46 | INFO | train_inner | epoch 004: 432 / 3002 loss=2.688, ppl=6.45, wps=5860.1, ups=0.09, wpb=64783, bsz=128, num_updates=9380, lr=9.9933e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=108120
2021-06-20 00:40:57 | INFO | train_inner | epoch 004: 433 / 3002 loss=2.748, ppl=6.72, wps=5934.6, ups=0.09, wpb=64836, bsz=128, num_updates=9381, lr=9.99329e-05, gnorm=3.639, loss_scale=2, train_wall=10, gb_free=2.8, wall=108131
2021-06-20 00:41:08 | INFO | train_inner | epoch 004: 434 / 3002 loss=2.613, ppl=6.12, wps=5868.9, ups=0.09, wpb=64903, bsz=128, num_updates=9382, lr=9.99329e-05, gnorm=3.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=108142
2021-06-20 00:41:19 | INFO | train_inner | epoch 004: 435 / 3002 loss=2.591, ppl=6.02, wps=5916.5, ups=0.09, wpb=64831, bsz=128, num_updates=9383, lr=9.99329e-05, gnorm=2.123, loss_scale=2, train_wall=10, gb_free=2.8, wall=108153
2021-06-20 00:41:30 | INFO | train_inner | epoch 004: 436 / 3002 loss=2.71, ppl=6.55, wps=5738.5, ups=0.09, wpb=64789, bsz=128, num_updates=9384, lr=9.99329e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=108165
2021-06-20 00:41:41 | INFO | train_inner | epoch 004: 437 / 3002 loss=2.611, ppl=6.11, wps=5866.7, ups=0.09, wpb=64779, bsz=128, num_updates=9385, lr=9.99329e-05, gnorm=2.858, loss_scale=2, train_wall=11, gb_free=2.8, wall=108176
2021-06-20 00:41:52 | INFO | train_inner | epoch 004: 438 / 3002 loss=2.736, ppl=6.66, wps=5856.1, ups=0.09, wpb=64869, bsz=128, num_updates=9386, lr=9.99329e-05, gnorm=2.357, loss_scale=2, train_wall=11, gb_free=2.8, wall=108187
2021-06-20 00:42:03 | INFO | train_inner | epoch 004: 439 / 3002 loss=2.698, ppl=6.49, wps=5982.2, ups=0.09, wpb=64829, bsz=128, num_updates=9387, lr=9.99329e-05, gnorm=6.58, loss_scale=2, train_wall=10, gb_free=2.8, wall=108197
2021-06-20 00:42:14 | INFO | train_inner | epoch 004: 440 / 3002 loss=2.632, ppl=6.2, wps=5965.1, ups=0.09, wpb=64857, bsz=128, num_updates=9388, lr=9.99329e-05, gnorm=2.228, loss_scale=2, train_wall=10, gb_free=2.8, wall=108208
2021-06-20 00:42:25 | INFO | train_inner | epoch 004: 441 / 3002 loss=2.547, ppl=5.84, wps=5771, ups=0.09, wpb=64724, bsz=128, num_updates=9389, lr=9.99329e-05, gnorm=2.184, loss_scale=2, train_wall=11, gb_free=2.8, wall=108220
2021-06-20 00:42:36 | INFO | train_inner | epoch 004: 442 / 3002 loss=2.648, ppl=6.27, wps=5872.4, ups=0.09, wpb=64729, bsz=128, num_updates=9390, lr=9.99329e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=108231
2021-06-20 00:42:47 | INFO | train_inner | epoch 004: 443 / 3002 loss=2.62, ppl=6.15, wps=5914.7, ups=0.09, wpb=64846, bsz=128, num_updates=9391, lr=9.99329e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=108242
2021-06-20 00:42:58 | INFO | train_inner | epoch 004: 444 / 3002 loss=2.709, ppl=6.54, wps=5883.8, ups=0.09, wpb=64715, bsz=128, num_updates=9392, lr=9.99329e-05, gnorm=2.952, loss_scale=2, train_wall=11, gb_free=2.8, wall=108253
2021-06-20 00:43:09 | INFO | train_inner | epoch 004: 445 / 3002 loss=2.51, ppl=5.7, wps=5851, ups=0.09, wpb=64812, bsz=128, num_updates=9393, lr=9.99329e-05, gnorm=2.148, loss_scale=2, train_wall=11, gb_free=2.8, wall=108264
2021-06-20 00:43:21 | INFO | train_inner | epoch 004: 446 / 3002 loss=2.544, ppl=5.83, wps=5771, ups=0.09, wpb=64839, bsz=128, num_updates=9394, lr=9.99328e-05, gnorm=2.166, loss_scale=2, train_wall=11, gb_free=2.8, wall=108275
2021-06-20 00:43:31 | INFO | train_inner | epoch 004: 447 / 3002 loss=2.444, ppl=5.44, wps=5968.1, ups=0.09, wpb=64875, bsz=128, num_updates=9395, lr=9.99328e-05, gnorm=2.279, loss_scale=2, train_wall=10, gb_free=2.8, wall=108286
2021-06-20 00:43:42 | INFO | train_inner | epoch 004: 448 / 3002 loss=2.611, ppl=6.11, wps=5893.2, ups=0.09, wpb=64816, bsz=128, num_updates=9396, lr=9.99328e-05, gnorm=2.589, loss_scale=2, train_wall=11, gb_free=2.8, wall=108297
2021-06-20 00:43:54 | INFO | train_inner | epoch 004: 449 / 3002 loss=2.524, ppl=5.75, wps=5748.1, ups=0.09, wpb=64793, bsz=128, num_updates=9397, lr=9.99328e-05, gnorm=2.135, loss_scale=2, train_wall=11, gb_free=2.8, wall=108308
2021-06-20 00:44:05 | INFO | train_inner | epoch 004: 450 / 3002 loss=2.45, ppl=5.46, wps=5862.7, ups=0.09, wpb=64851, bsz=128, num_updates=9398, lr=9.99328e-05, gnorm=3.454, loss_scale=2, train_wall=11, gb_free=2.8, wall=108319
2021-06-20 00:44:16 | INFO | train_inner | epoch 004: 451 / 3002 loss=2.631, ppl=6.2, wps=5818, ups=0.09, wpb=64846, bsz=128, num_updates=9399, lr=9.99328e-05, gnorm=2.468, loss_scale=2, train_wall=11, gb_free=2.8, wall=108330
2021-06-20 00:44:27 | INFO | train_inner | epoch 004: 452 / 3002 loss=2.519, ppl=5.73, wps=5807.6, ups=0.09, wpb=64814, bsz=128, num_updates=9400, lr=9.99328e-05, gnorm=2.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=108341
2021-06-20 00:44:38 | INFO | train_inner | epoch 004: 453 / 3002 loss=2.61, ppl=6.1, wps=5761.7, ups=0.09, wpb=64836, bsz=128, num_updates=9401, lr=9.99328e-05, gnorm=4.066, loss_scale=2, train_wall=11, gb_free=2.8, wall=108353
2021-06-20 00:44:49 | INFO | train_inner | epoch 004: 454 / 3002 loss=2.45, ppl=5.47, wps=5845.3, ups=0.09, wpb=64775, bsz=128, num_updates=9402, lr=9.99328e-05, gnorm=2.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=108364
2021-06-20 00:45:01 | INFO | train_inner | epoch 004: 455 / 3002 loss=2.581, ppl=5.98, wps=5815.7, ups=0.09, wpb=64903, bsz=128, num_updates=9403, lr=9.99328e-05, gnorm=2.731, loss_scale=2, train_wall=11, gb_free=2.8, wall=108375
2021-06-20 00:45:11 | INFO | train_inner | epoch 004: 456 / 3002 loss=2.681, ppl=6.41, wps=5942.4, ups=0.09, wpb=64798, bsz=128, num_updates=9404, lr=9.99328e-05, gnorm=2.104, loss_scale=2, train_wall=10, gb_free=2.8, wall=108386
2021-06-20 00:45:23 | INFO | train_inner | epoch 004: 457 / 3002 loss=2.647, ppl=6.26, wps=5811, ups=0.09, wpb=64764, bsz=128, num_updates=9405, lr=9.99328e-05, gnorm=2.165, loss_scale=2, train_wall=11, gb_free=2.8, wall=108397
2021-06-20 00:45:33 | INFO | train_inner | epoch 004: 458 / 3002 loss=2.551, ppl=5.86, wps=5951.2, ups=0.09, wpb=64852, bsz=128, num_updates=9406, lr=9.99327e-05, gnorm=2.111, loss_scale=2, train_wall=10, gb_free=2.8, wall=108408
2021-06-20 00:45:45 | INFO | train_inner | epoch 004: 459 / 3002 loss=2.645, ppl=6.26, wps=5840.1, ups=0.09, wpb=64777, bsz=128, num_updates=9407, lr=9.99327e-05, gnorm=2.175, loss_scale=2, train_wall=11, gb_free=2.8, wall=108419
2021-06-20 00:45:56 | INFO | train_inner | epoch 004: 460 / 3002 loss=2.614, ppl=6.12, wps=5928, ups=0.09, wpb=64733, bsz=128, num_updates=9408, lr=9.99327e-05, gnorm=2.419, loss_scale=2, train_wall=10, gb_free=2.8, wall=108430
2021-06-20 00:46:07 | INFO | train_inner | epoch 004: 461 / 3002 loss=2.586, ppl=6.01, wps=5869.3, ups=0.09, wpb=64859, bsz=128, num_updates=9409, lr=9.99327e-05, gnorm=2.094, loss_scale=2, train_wall=11, gb_free=2.8, wall=108441
2021-06-20 00:46:18 | INFO | train_inner | epoch 004: 462 / 3002 loss=2.422, ppl=5.36, wps=5881, ups=0.09, wpb=64792, bsz=128, num_updates=9410, lr=9.99327e-05, gnorm=2.321, loss_scale=2, train_wall=11, gb_free=2.8, wall=108452
2021-06-20 00:46:29 | INFO | train_inner | epoch 004: 463 / 3002 loss=2.672, ppl=6.37, wps=5868.4, ups=0.09, wpb=64779, bsz=128, num_updates=9411, lr=9.99327e-05, gnorm=2.778, loss_scale=2, train_wall=11, gb_free=2.8, wall=108463
2021-06-20 00:46:40 | INFO | train_inner | epoch 004: 464 / 3002 loss=2.576, ppl=5.96, wps=5906.3, ups=0.09, wpb=64747, bsz=128, num_updates=9412, lr=9.99327e-05, gnorm=17.074, loss_scale=2, train_wall=10, gb_free=2.8, wall=108474
2021-06-20 00:46:51 | INFO | train_inner | epoch 004: 465 / 3002 loss=2.735, ppl=6.66, wps=5813.8, ups=0.09, wpb=64809, bsz=128, num_updates=9413, lr=9.99327e-05, gnorm=2.185, loss_scale=2, train_wall=11, gb_free=2.8, wall=108485
2021-06-20 00:47:02 | INFO | train_inner | epoch 004: 466 / 3002 loss=2.605, ppl=6.09, wps=5964.9, ups=0.09, wpb=64804, bsz=128, num_updates=9414, lr=9.99327e-05, gnorm=2.099, loss_scale=2, train_wall=10, gb_free=2.8, wall=108496
2021-06-20 00:47:13 | INFO | train_inner | epoch 004: 467 / 3002 loss=2.465, ppl=5.52, wps=5852.3, ups=0.09, wpb=64881, bsz=128, num_updates=9415, lr=9.99327e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=108507
2021-06-20 00:47:24 | INFO | train_inner | epoch 004: 468 / 3002 loss=2.512, ppl=5.71, wps=5929.9, ups=0.09, wpb=64814, bsz=128, num_updates=9416, lr=9.99327e-05, gnorm=2.241, loss_scale=2, train_wall=10, gb_free=2.8, wall=108518
2021-06-20 00:47:35 | INFO | train_inner | epoch 004: 469 / 3002 loss=2.731, ppl=6.64, wps=5805.7, ups=0.09, wpb=64814, bsz=128, num_updates=9417, lr=9.99327e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=108529
2021-06-20 00:47:46 | INFO | train_inner | epoch 004: 470 / 3002 loss=2.599, ppl=6.06, wps=5704.1, ups=0.09, wpb=64774, bsz=128, num_updates=9418, lr=9.99327e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=108540
2021-06-20 00:47:57 | INFO | train_inner | epoch 004: 471 / 3002 loss=2.794, ppl=6.94, wps=5873.3, ups=0.09, wpb=64849, bsz=128, num_updates=9419, lr=9.99326e-05, gnorm=2.822, loss_scale=2, train_wall=11, gb_free=2.8, wall=108552
2021-06-20 00:48:08 | INFO | train_inner | epoch 004: 472 / 3002 loss=2.554, ppl=5.87, wps=5780, ups=0.09, wpb=64874, bsz=128, num_updates=9420, lr=9.99326e-05, gnorm=2.333, loss_scale=2, train_wall=11, gb_free=2.8, wall=108563
2021-06-20 00:48:19 | INFO | train_inner | epoch 004: 473 / 3002 loss=2.52, ppl=5.73, wps=5856.4, ups=0.09, wpb=64894, bsz=128, num_updates=9421, lr=9.99326e-05, gnorm=3.897, loss_scale=2, train_wall=11, gb_free=2.8, wall=108574
2021-06-20 00:48:31 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-20 00:48:42 | INFO | train_inner | epoch 004: 475 / 3002 loss=2.563, ppl=5.91, wps=2906.8, ups=0.04, wpb=64802, bsz=128, num_updates=9422, lr=9.99326e-05, gnorm=2.925, loss_scale=1, train_wall=21, gb_free=2.8, wall=108596
2021-06-20 00:48:53 | INFO | train_inner | epoch 004: 476 / 3002 loss=2.609, ppl=6.1, wps=5844.3, ups=0.09, wpb=64838, bsz=128, num_updates=9423, lr=9.99326e-05, gnorm=2.412, loss_scale=1, train_wall=11, gb_free=2.8, wall=108607
2021-06-20 00:49:04 | INFO | train_inner | epoch 004: 477 / 3002 loss=2.596, ppl=6.04, wps=5906.6, ups=0.09, wpb=64897, bsz=128, num_updates=9424, lr=9.99326e-05, gnorm=2.413, loss_scale=1, train_wall=11, gb_free=2.8, wall=108618
2021-06-20 00:49:15 | INFO | train_inner | epoch 004: 478 / 3002 loss=2.552, ppl=5.87, wps=5925.5, ups=0.09, wpb=64868, bsz=128, num_updates=9425, lr=9.99326e-05, gnorm=2.152, loss_scale=1, train_wall=11, gb_free=2.8, wall=108629
2021-06-20 00:49:26 | INFO | train_inner | epoch 004: 479 / 3002 loss=2.864, ppl=7.28, wps=5720.3, ups=0.09, wpb=64705, bsz=128, num_updates=9426, lr=9.99326e-05, gnorm=2.305, loss_scale=1, train_wall=11, gb_free=2.8, wall=108640
2021-06-20 00:49:37 | INFO | train_inner | epoch 004: 480 / 3002 loss=2.588, ppl=6.01, wps=5708.6, ups=0.09, wpb=64851, bsz=128, num_updates=9427, lr=9.99326e-05, gnorm=3.71, loss_scale=1, train_wall=11, gb_free=2.8, wall=108652
2021-06-20 00:49:49 | INFO | train_inner | epoch 004: 481 / 3002 loss=2.762, ppl=6.78, wps=5832.4, ups=0.09, wpb=64773, bsz=128, num_updates=9428, lr=9.99326e-05, gnorm=2.371, loss_scale=1, train_wall=11, gb_free=2.8, wall=108663
2021-06-20 00:50:00 | INFO | train_inner | epoch 004: 482 / 3002 loss=2.726, ppl=6.62, wps=5906, ups=0.09, wpb=64835, bsz=128, num_updates=9429, lr=9.99326e-05, gnorm=2.681, loss_scale=1, train_wall=11, gb_free=2.8, wall=108674
2021-06-20 00:50:11 | INFO | train_inner | epoch 004: 483 / 3002 loss=2.673, ppl=6.38, wps=5810.1, ups=0.09, wpb=64737, bsz=128, num_updates=9430, lr=9.99326e-05, gnorm=2.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=108685
2021-06-20 00:50:22 | INFO | train_inner | epoch 004: 484 / 3002 loss=2.723, ppl=6.6, wps=5734.2, ups=0.09, wpb=64842, bsz=128, num_updates=9431, lr=9.99325e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=108696
2021-06-20 00:50:33 | INFO | train_inner | epoch 004: 485 / 3002 loss=2.644, ppl=6.25, wps=5757.7, ups=0.09, wpb=64868, bsz=128, num_updates=9432, lr=9.99325e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=108708
2021-06-20 00:50:45 | INFO | train_inner | epoch 004: 486 / 3002 loss=2.683, ppl=6.42, wps=5759.8, ups=0.09, wpb=64775, bsz=128, num_updates=9433, lr=9.99325e-05, gnorm=2.416, loss_scale=1, train_wall=11, gb_free=2.8, wall=108719
2021-06-20 00:50:56 | INFO | train_inner | epoch 004: 487 / 3002 loss=2.572, ppl=5.95, wps=5811.7, ups=0.09, wpb=64811, bsz=128, num_updates=9434, lr=9.99325e-05, gnorm=2.278, loss_scale=1, train_wall=11, gb_free=2.8, wall=108730
2021-06-20 00:51:07 | INFO | train_inner | epoch 004: 488 / 3002 loss=2.74, ppl=6.68, wps=5777.6, ups=0.09, wpb=64739, bsz=128, num_updates=9435, lr=9.99325e-05, gnorm=2.353, loss_scale=1, train_wall=11, gb_free=2.8, wall=108741
2021-06-20 00:51:18 | INFO | train_inner | epoch 004: 489 / 3002 loss=2.465, ppl=5.52, wps=5911.1, ups=0.09, wpb=64802, bsz=128, num_updates=9436, lr=9.99325e-05, gnorm=2.628, loss_scale=1, train_wall=11, gb_free=2.8, wall=108752
2021-06-20 00:51:29 | INFO | train_inner | epoch 004: 490 / 3002 loss=2.597, ppl=6.05, wps=5843.9, ups=0.09, wpb=64847, bsz=128, num_updates=9437, lr=9.99325e-05, gnorm=2.042, loss_scale=1, train_wall=11, gb_free=2.8, wall=108763
2021-06-20 00:51:40 | INFO | train_inner | epoch 004: 491 / 3002 loss=2.581, ppl=5.98, wps=5914.9, ups=0.09, wpb=64837, bsz=128, num_updates=9438, lr=9.99325e-05, gnorm=2.652, loss_scale=1, train_wall=10, gb_free=2.8, wall=108774
2021-06-20 00:51:51 | INFO | train_inner | epoch 004: 492 / 3002 loss=2.628, ppl=6.18, wps=5792.4, ups=0.09, wpb=64842, bsz=128, num_updates=9439, lr=9.99325e-05, gnorm=2.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=108785
2021-06-20 00:52:02 | INFO | train_inner | epoch 004: 493 / 3002 loss=2.531, ppl=5.78, wps=5740.1, ups=0.09, wpb=64809, bsz=128, num_updates=9440, lr=9.99325e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=108797
2021-06-20 00:52:13 | INFO | train_inner | epoch 004: 494 / 3002 loss=2.607, ppl=6.09, wps=5859.9, ups=0.09, wpb=64869, bsz=128, num_updates=9441, lr=9.99325e-05, gnorm=2.317, loss_scale=1, train_wall=11, gb_free=2.8, wall=108808
2021-06-20 00:52:24 | INFO | train_inner | epoch 004: 495 / 3002 loss=2.55, ppl=5.86, wps=5916, ups=0.09, wpb=64890, bsz=128, num_updates=9442, lr=9.99325e-05, gnorm=2.223, loss_scale=1, train_wall=11, gb_free=2.8, wall=108819
2021-06-20 00:52:36 | INFO | train_inner | epoch 004: 496 / 3002 loss=2.623, ppl=6.16, wps=5776.4, ups=0.09, wpb=64811, bsz=128, num_updates=9443, lr=9.99325e-05, gnorm=2.219, loss_scale=1, train_wall=11, gb_free=2.8, wall=108830
2021-06-20 00:52:47 | INFO | train_inner | epoch 004: 497 / 3002 loss=2.709, ppl=6.54, wps=5878.8, ups=0.09, wpb=64814, bsz=128, num_updates=9444, lr=9.99324e-05, gnorm=2.212, loss_scale=1, train_wall=11, gb_free=2.8, wall=108841
2021-06-20 00:52:58 | INFO | train_inner | epoch 004: 498 / 3002 loss=2.621, ppl=6.15, wps=5829.4, ups=0.09, wpb=64828, bsz=128, num_updates=9445, lr=9.99324e-05, gnorm=2.178, loss_scale=1, train_wall=11, gb_free=2.8, wall=108852
2021-06-20 00:53:09 | INFO | train_inner | epoch 004: 499 / 3002 loss=2.684, ppl=6.43, wps=5880.4, ups=0.09, wpb=64836, bsz=128, num_updates=9446, lr=9.99324e-05, gnorm=2.232, loss_scale=1, train_wall=11, gb_free=2.8, wall=108863
2021-06-20 00:53:20 | INFO | train_inner | epoch 004: 500 / 3002 loss=2.641, ppl=6.24, wps=5850, ups=0.09, wpb=64809, bsz=128, num_updates=9447, lr=9.99324e-05, gnorm=2.764, loss_scale=1, train_wall=11, gb_free=2.8, wall=108874
2021-06-20 00:53:31 | INFO | train_inner | epoch 004: 501 / 3002 loss=2.654, ppl=6.29, wps=5855.3, ups=0.09, wpb=64826, bsz=128, num_updates=9448, lr=9.99324e-05, gnorm=2.214, loss_scale=1, train_wall=11, gb_free=2.8, wall=108885
2021-06-20 00:53:42 | INFO | train_inner | epoch 004: 502 / 3002 loss=2.61, ppl=6.1, wps=5848.4, ups=0.09, wpb=64765, bsz=128, num_updates=9449, lr=9.99324e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=108896
2021-06-20 00:53:53 | INFO | train_inner | epoch 004: 503 / 3002 loss=2.731, ppl=6.64, wps=5871.6, ups=0.09, wpb=64799, bsz=128, num_updates=9450, lr=9.99324e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=108907
2021-06-20 00:54:04 | INFO | train_inner | epoch 004: 504 / 3002 loss=2.69, ppl=6.45, wps=5860.5, ups=0.09, wpb=64825, bsz=128, num_updates=9451, lr=9.99324e-05, gnorm=3.985, loss_scale=1, train_wall=11, gb_free=2.8, wall=108918
2021-06-20 00:54:15 | INFO | train_inner | epoch 004: 505 / 3002 loss=2.637, ppl=6.22, wps=5820.1, ups=0.09, wpb=64867, bsz=128, num_updates=9452, lr=9.99324e-05, gnorm=2.226, loss_scale=1, train_wall=11, gb_free=2.8, wall=108930
2021-06-20 00:54:26 | INFO | train_inner | epoch 004: 506 / 3002 loss=2.753, ppl=6.74, wps=5867, ups=0.09, wpb=64789, bsz=128, num_updates=9453, lr=9.99324e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=108941
2021-06-20 00:54:37 | INFO | train_inner | epoch 004: 507 / 3002 loss=2.337, ppl=5.05, wps=5948.5, ups=0.09, wpb=64905, bsz=128, num_updates=9454, lr=9.99324e-05, gnorm=2.104, loss_scale=1, train_wall=10, gb_free=2.8, wall=108952
2021-06-20 00:54:48 | INFO | train_inner | epoch 004: 508 / 3002 loss=2.605, ppl=6.08, wps=5927.8, ups=0.09, wpb=64763, bsz=128, num_updates=9455, lr=9.99324e-05, gnorm=2.106, loss_scale=1, train_wall=10, gb_free=2.8, wall=108962
2021-06-20 00:54:59 | INFO | train_inner | epoch 004: 509 / 3002 loss=2.699, ppl=6.49, wps=5898.3, ups=0.09, wpb=64873, bsz=128, num_updates=9456, lr=9.99323e-05, gnorm=2.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=108973
2021-06-20 00:55:10 | INFO | train_inner | epoch 004: 510 / 3002 loss=2.473, ppl=5.55, wps=5877.8, ups=0.09, wpb=64794, bsz=128, num_updates=9457, lr=9.99323e-05, gnorm=2.045, loss_scale=1, train_wall=11, gb_free=2.8, wall=108985
2021-06-20 00:55:21 | INFO | train_inner | epoch 004: 511 / 3002 loss=2.654, ppl=6.3, wps=5764.4, ups=0.09, wpb=64870, bsz=128, num_updates=9458, lr=9.99323e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=108996
2021-06-20 00:55:33 | INFO | train_inner | epoch 004: 512 / 3002 loss=2.558, ppl=5.89, wps=5833.2, ups=0.09, wpb=64916, bsz=128, num_updates=9459, lr=9.99323e-05, gnorm=2.059, loss_scale=1, train_wall=11, gb_free=2.8, wall=109007
2021-06-20 00:55:44 | INFO | train_inner | epoch 004: 513 / 3002 loss=2.678, ppl=6.4, wps=5816.8, ups=0.09, wpb=64774, bsz=128, num_updates=9460, lr=9.99323e-05, gnorm=2.136, loss_scale=1, train_wall=11, gb_free=2.8, wall=109018
2021-06-20 00:55:55 | INFO | train_inner | epoch 004: 514 / 3002 loss=2.603, ppl=6.08, wps=5792.9, ups=0.09, wpb=64895, bsz=128, num_updates=9461, lr=9.99323e-05, gnorm=4.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=109029
2021-06-20 00:56:06 | INFO | train_inner | epoch 004: 515 / 3002 loss=2.543, ppl=5.83, wps=5734.6, ups=0.09, wpb=64787, bsz=128, num_updates=9462, lr=9.99323e-05, gnorm=1.995, loss_scale=1, train_wall=11, gb_free=2.8, wall=109041
2021-06-20 00:56:17 | INFO | train_inner | epoch 004: 516 / 3002 loss=2.674, ppl=6.38, wps=5848.9, ups=0.09, wpb=64796, bsz=128, num_updates=9463, lr=9.99323e-05, gnorm=2.221, loss_scale=1, train_wall=11, gb_free=2.8, wall=109052
2021-06-20 00:56:28 | INFO | train_inner | epoch 004: 517 / 3002 loss=2.483, ppl=5.59, wps=5835.2, ups=0.09, wpb=64829, bsz=128, num_updates=9464, lr=9.99323e-05, gnorm=2.166, loss_scale=1, train_wall=11, gb_free=2.8, wall=109063
2021-06-20 00:56:40 | INFO | train_inner | epoch 004: 518 / 3002 loss=2.696, ppl=6.48, wps=5825.4, ups=0.09, wpb=64810, bsz=128, num_updates=9465, lr=9.99323e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=109074
2021-06-20 00:56:51 | INFO | train_inner | epoch 004: 519 / 3002 loss=2.496, ppl=5.64, wps=5722.3, ups=0.09, wpb=64876, bsz=128, num_updates=9466, lr=9.99323e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=109085
2021-06-20 00:57:02 | INFO | train_inner | epoch 004: 520 / 3002 loss=2.603, ppl=6.08, wps=5752.4, ups=0.09, wpb=64804, bsz=128, num_updates=9467, lr=9.99323e-05, gnorm=2.138, loss_scale=1, train_wall=11, gb_free=2.8, wall=109096
2021-06-20 00:57:13 | INFO | train_inner | epoch 004: 521 / 3002 loss=2.557, ppl=5.89, wps=5964.1, ups=0.09, wpb=64860, bsz=128, num_updates=9468, lr=9.99323e-05, gnorm=2.081, loss_scale=1, train_wall=10, gb_free=2.8, wall=109107
2021-06-20 00:57:24 | INFO | train_inner | epoch 004: 522 / 3002 loss=2.401, ppl=5.28, wps=5941.9, ups=0.09, wpb=64808, bsz=128, num_updates=9469, lr=9.99322e-05, gnorm=2.144, loss_scale=1, train_wall=10, gb_free=2.8, wall=109118
2021-06-20 00:57:35 | INFO | train_inner | epoch 004: 523 / 3002 loss=2.448, ppl=5.46, wps=5862.6, ups=0.09, wpb=64927, bsz=128, num_updates=9470, lr=9.99322e-05, gnorm=2.867, loss_scale=1, train_wall=11, gb_free=2.8, wall=109129
2021-06-20 00:57:46 | INFO | train_inner | epoch 004: 524 / 3002 loss=2.689, ppl=6.45, wps=5864.8, ups=0.09, wpb=64847, bsz=128, num_updates=9471, lr=9.99322e-05, gnorm=2.076, loss_scale=1, train_wall=11, gb_free=2.8, wall=109140
2021-06-20 00:57:57 | INFO | train_inner | epoch 004: 525 / 3002 loss=2.629, ppl=6.19, wps=5819.8, ups=0.09, wpb=64784, bsz=128, num_updates=9472, lr=9.99322e-05, gnorm=2.322, loss_scale=1, train_wall=11, gb_free=2.8, wall=109152
2021-06-20 00:58:08 | INFO | train_inner | epoch 004: 526 / 3002 loss=2.519, ppl=5.73, wps=5770.6, ups=0.09, wpb=64835, bsz=128, num_updates=9473, lr=9.99322e-05, gnorm=25.692, loss_scale=1, train_wall=11, gb_free=2.8, wall=109163
2021-06-20 00:58:19 | INFO | train_inner | epoch 004: 527 / 3002 loss=2.746, ppl=6.71, wps=5960.7, ups=0.09, wpb=64764, bsz=128, num_updates=9474, lr=9.99322e-05, gnorm=4.611, loss_scale=1, train_wall=10, gb_free=2.8, wall=109174
2021-06-20 00:58:30 | INFO | train_inner | epoch 004: 528 / 3002 loss=2.572, ppl=5.94, wps=5821.3, ups=0.09, wpb=64884, bsz=128, num_updates=9475, lr=9.99322e-05, gnorm=3.616, loss_scale=1, train_wall=11, gb_free=2.8, wall=109185
2021-06-20 00:58:42 | INFO | train_inner | epoch 004: 529 / 3002 loss=2.598, ppl=6.05, wps=5801.6, ups=0.09, wpb=64872, bsz=128, num_updates=9476, lr=9.99322e-05, gnorm=2.261, loss_scale=1, train_wall=11, gb_free=2.8, wall=109196
2021-06-20 00:58:53 | INFO | train_inner | epoch 004: 530 / 3002 loss=2.679, ppl=6.41, wps=5855.3, ups=0.09, wpb=64767, bsz=128, num_updates=9477, lr=9.99322e-05, gnorm=2.277, loss_scale=1, train_wall=11, gb_free=2.8, wall=109207
2021-06-20 00:59:04 | INFO | train_inner | epoch 004: 531 / 3002 loss=2.613, ppl=6.12, wps=5771.1, ups=0.09, wpb=64802, bsz=128, num_updates=9478, lr=9.99322e-05, gnorm=2.278, loss_scale=1, train_wall=11, gb_free=2.8, wall=109218
2021-06-20 00:59:15 | INFO | train_inner | epoch 004: 532 / 3002 loss=2.664, ppl=6.34, wps=5824.4, ups=0.09, wpb=64888, bsz=128, num_updates=9479, lr=9.99322e-05, gnorm=2.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=109229
2021-06-20 00:59:26 | INFO | train_inner | epoch 004: 533 / 3002 loss=2.559, ppl=5.89, wps=5871, ups=0.09, wpb=64806, bsz=128, num_updates=9480, lr=9.99322e-05, gnorm=2.041, loss_scale=1, train_wall=11, gb_free=2.8, wall=109240
2021-06-20 00:59:37 | INFO | train_inner | epoch 004: 534 / 3002 loss=2.672, ppl=6.37, wps=5929.1, ups=0.09, wpb=64792, bsz=128, num_updates=9481, lr=9.99321e-05, gnorm=2.248, loss_scale=1, train_wall=10, gb_free=2.8, wall=109251
2021-06-20 00:59:48 | INFO | train_inner | epoch 004: 535 / 3002 loss=2.646, ppl=6.26, wps=5869.8, ups=0.09, wpb=64855, bsz=128, num_updates=9482, lr=9.99321e-05, gnorm=2.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=109262
2021-06-20 00:59:59 | INFO | train_inner | epoch 004: 536 / 3002 loss=2.542, ppl=5.82, wps=5921.9, ups=0.09, wpb=64800, bsz=128, num_updates=9483, lr=9.99321e-05, gnorm=2.177, loss_scale=1, train_wall=10, gb_free=2.8, wall=109273
2021-06-20 01:00:10 | INFO | train_inner | epoch 004: 537 / 3002 loss=2.575, ppl=5.96, wps=5891.8, ups=0.09, wpb=64805, bsz=128, num_updates=9484, lr=9.99321e-05, gnorm=2.744, loss_scale=1, train_wall=11, gb_free=2.8, wall=109284
2021-06-20 01:00:21 | INFO | train_inner | epoch 004: 538 / 3002 loss=2.611, ppl=6.11, wps=5842.7, ups=0.09, wpb=64834, bsz=128, num_updates=9485, lr=9.99321e-05, gnorm=2.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=109295
2021-06-20 01:00:32 | INFO | train_inner | epoch 004: 539 / 3002 loss=2.554, ppl=5.87, wps=5865, ups=0.09, wpb=64746, bsz=128, num_updates=9486, lr=9.99321e-05, gnorm=2.951, loss_scale=1, train_wall=11, gb_free=2.8, wall=109306
2021-06-20 01:00:43 | INFO | train_inner | epoch 004: 540 / 3002 loss=2.51, ppl=5.69, wps=5864.8, ups=0.09, wpb=64884, bsz=128, num_updates=9487, lr=9.99321e-05, gnorm=2.489, loss_scale=1, train_wall=11, gb_free=2.8, wall=109318
2021-06-20 01:00:54 | INFO | train_inner | epoch 004: 541 / 3002 loss=2.668, ppl=6.36, wps=5951.5, ups=0.09, wpb=64888, bsz=128, num_updates=9488, lr=9.99321e-05, gnorm=2.395, loss_scale=1, train_wall=10, gb_free=2.8, wall=109328
2021-06-20 01:01:05 | INFO | train_inner | epoch 004: 542 / 3002 loss=2.621, ppl=6.15, wps=5915.1, ups=0.09, wpb=64865, bsz=128, num_updates=9489, lr=9.99321e-05, gnorm=2.049, loss_scale=1, train_wall=11, gb_free=2.8, wall=109339
2021-06-20 01:01:16 | INFO | train_inner | epoch 004: 543 / 3002 loss=2.549, ppl=5.85, wps=5850.2, ups=0.09, wpb=64805, bsz=128, num_updates=9490, lr=9.99321e-05, gnorm=2.11, loss_scale=1, train_wall=11, gb_free=2.8, wall=109350
2021-06-20 01:01:27 | INFO | train_inner | epoch 004: 544 / 3002 loss=2.632, ppl=6.2, wps=5814.5, ups=0.09, wpb=64783, bsz=128, num_updates=9491, lr=9.99321e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=109362
2021-06-20 01:01:38 | INFO | train_inner | epoch 004: 545 / 3002 loss=2.697, ppl=6.49, wps=5881.4, ups=0.09, wpb=64831, bsz=128, num_updates=9492, lr=9.99321e-05, gnorm=2.625, loss_scale=1, train_wall=11, gb_free=2.8, wall=109373
2021-06-20 01:01:49 | INFO | train_inner | epoch 004: 546 / 3002 loss=2.533, ppl=5.79, wps=5915.6, ups=0.09, wpb=64899, bsz=128, num_updates=9493, lr=9.99321e-05, gnorm=2.371, loss_scale=1, train_wall=11, gb_free=2.8, wall=109384
2021-06-20 01:02:00 | INFO | train_inner | epoch 004: 547 / 3002 loss=2.475, ppl=5.56, wps=5833.2, ups=0.09, wpb=64886, bsz=128, num_updates=9494, lr=9.9932e-05, gnorm=2.185, loss_scale=1, train_wall=11, gb_free=2.8, wall=109395
2021-06-20 01:02:11 | INFO | train_inner | epoch 004: 548 / 3002 loss=2.591, ppl=6.03, wps=5848.7, ups=0.09, wpb=64861, bsz=128, num_updates=9495, lr=9.9932e-05, gnorm=2.741, loss_scale=1, train_wall=11, gb_free=2.8, wall=109406
2021-06-20 01:02:23 | INFO | train_inner | epoch 004: 549 / 3002 loss=2.666, ppl=6.35, wps=5846.4, ups=0.09, wpb=64863, bsz=128, num_updates=9496, lr=9.9932e-05, gnorm=2.097, loss_scale=1, train_wall=11, gb_free=2.8, wall=109417
2021-06-20 01:02:34 | INFO | train_inner | epoch 004: 550 / 3002 loss=2.586, ppl=6, wps=5868.5, ups=0.09, wpb=64891, bsz=128, num_updates=9497, lr=9.9932e-05, gnorm=2.447, loss_scale=1, train_wall=11, gb_free=2.8, wall=109428
2021-06-20 01:02:45 | INFO | train_inner | epoch 004: 551 / 3002 loss=2.529, ppl=5.77, wps=5786.6, ups=0.09, wpb=64758, bsz=128, num_updates=9498, lr=9.9932e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=109439
2021-06-20 01:02:56 | INFO | train_inner | epoch 004: 552 / 3002 loss=2.525, ppl=5.75, wps=5931.1, ups=0.09, wpb=64766, bsz=128, num_updates=9499, lr=9.9932e-05, gnorm=2.228, loss_scale=1, train_wall=10, gb_free=2.8, wall=109450
2021-06-20 01:03:07 | INFO | train_inner | epoch 004: 553 / 3002 loss=2.479, ppl=5.57, wps=5815.1, ups=0.09, wpb=64828, bsz=128, num_updates=9500, lr=9.9932e-05, gnorm=2.169, loss_scale=1, train_wall=11, gb_free=2.8, wall=109461
2021-06-20 01:03:18 | INFO | train_inner | epoch 004: 554 / 3002 loss=2.63, ppl=6.19, wps=5874.3, ups=0.09, wpb=64848, bsz=128, num_updates=9501, lr=9.9932e-05, gnorm=2.349, loss_scale=1, train_wall=11, gb_free=2.8, wall=109472
2021-06-20 01:03:29 | INFO | train_inner | epoch 004: 555 / 3002 loss=2.452, ppl=5.47, wps=5754, ups=0.09, wpb=64805, bsz=128, num_updates=9502, lr=9.9932e-05, gnorm=3.079, loss_scale=1, train_wall=11, gb_free=2.8, wall=109484
2021-06-20 01:03:40 | INFO | train_inner | epoch 004: 556 / 3002 loss=2.559, ppl=5.89, wps=5853, ups=0.09, wpb=64772, bsz=128, num_updates=9503, lr=9.9932e-05, gnorm=2.155, loss_scale=1, train_wall=11, gb_free=2.8, wall=109495
2021-06-20 01:03:52 | INFO | train_inner | epoch 004: 557 / 3002 loss=2.582, ppl=5.99, wps=5748.7, ups=0.09, wpb=64803, bsz=128, num_updates=9504, lr=9.9932e-05, gnorm=2.987, loss_scale=1, train_wall=11, gb_free=2.8, wall=109506
2021-06-20 01:04:03 | INFO | train_inner | epoch 004: 558 / 3002 loss=2.642, ppl=6.24, wps=5901.7, ups=0.09, wpb=64849, bsz=128, num_updates=9505, lr=9.9932e-05, gnorm=6.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=109517
2021-06-20 01:04:14 | INFO | train_inner | epoch 004: 559 / 3002 loss=2.656, ppl=6.3, wps=5792.9, ups=0.09, wpb=64857, bsz=128, num_updates=9506, lr=9.99319e-05, gnorm=2.51, loss_scale=1, train_wall=11, gb_free=2.8, wall=109528
2021-06-20 01:04:25 | INFO | train_inner | epoch 004: 560 / 3002 loss=2.597, ppl=6.05, wps=5715.5, ups=0.09, wpb=64800, bsz=128, num_updates=9507, lr=9.99319e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=109539
2021-06-20 01:04:36 | INFO | train_inner | epoch 004: 561 / 3002 loss=2.473, ppl=5.55, wps=5913.1, ups=0.09, wpb=64913, bsz=128, num_updates=9508, lr=9.99319e-05, gnorm=2.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=109550
2021-06-20 01:04:47 | INFO | train_inner | epoch 004: 562 / 3002 loss=2.599, ppl=6.06, wps=5945.3, ups=0.09, wpb=64891, bsz=128, num_updates=9509, lr=9.99319e-05, gnorm=2.155, loss_scale=1, train_wall=10, gb_free=2.8, wall=109561
2021-06-20 01:04:58 | INFO | train_inner | epoch 004: 563 / 3002 loss=2.671, ppl=6.37, wps=5797.3, ups=0.09, wpb=64884, bsz=128, num_updates=9510, lr=9.99319e-05, gnorm=2.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=109572
2021-06-20 01:05:09 | INFO | train_inner | epoch 004: 564 / 3002 loss=2.613, ppl=6.12, wps=5847.3, ups=0.09, wpb=64772, bsz=128, num_updates=9511, lr=9.99319e-05, gnorm=2.314, loss_scale=1, train_wall=11, gb_free=2.8, wall=109584
2021-06-20 01:05:20 | INFO | train_inner | epoch 004: 565 / 3002 loss=2.698, ppl=6.49, wps=5878.7, ups=0.09, wpb=64824, bsz=128, num_updates=9512, lr=9.99319e-05, gnorm=2.211, loss_scale=1, train_wall=11, gb_free=2.8, wall=109595
2021-06-20 01:05:31 | INFO | train_inner | epoch 004: 566 / 3002 loss=2.461, ppl=5.51, wps=5810, ups=0.09, wpb=64868, bsz=128, num_updates=9513, lr=9.99319e-05, gnorm=3.937, loss_scale=1, train_wall=11, gb_free=2.8, wall=109606
2021-06-20 01:05:43 | INFO | train_inner | epoch 004: 567 / 3002 loss=2.607, ppl=6.09, wps=5796.7, ups=0.09, wpb=64802, bsz=128, num_updates=9514, lr=9.99319e-05, gnorm=2.937, loss_scale=1, train_wall=11, gb_free=2.8, wall=109617
2021-06-20 01:05:53 | INFO | train_inner | epoch 004: 568 / 3002 loss=2.581, ppl=5.98, wps=5947.4, ups=0.09, wpb=64839, bsz=128, num_updates=9515, lr=9.99319e-05, gnorm=2.191, loss_scale=1, train_wall=10, gb_free=2.8, wall=109628
2021-06-20 01:06:05 | INFO | train_inner | epoch 004: 569 / 3002 loss=2.612, ppl=6.11, wps=5773.6, ups=0.09, wpb=64766, bsz=128, num_updates=9516, lr=9.99319e-05, gnorm=2.248, loss_scale=1, train_wall=11, gb_free=2.8, wall=109639
2021-06-20 01:06:16 | INFO | train_inner | epoch 004: 570 / 3002 loss=2.708, ppl=6.54, wps=5881, ups=0.09, wpb=64950, bsz=128, num_updates=9517, lr=9.99319e-05, gnorm=2.136, loss_scale=1, train_wall=11, gb_free=2.8, wall=109650
2021-06-20 01:06:27 | INFO | train_inner | epoch 004: 571 / 3002 loss=2.538, ppl=5.81, wps=5922.4, ups=0.09, wpb=64902, bsz=128, num_updates=9518, lr=9.99319e-05, gnorm=2.244, loss_scale=1, train_wall=11, gb_free=2.8, wall=109661
2021-06-20 01:06:38 | INFO | train_inner | epoch 004: 572 / 3002 loss=2.518, ppl=5.73, wps=5862.2, ups=0.09, wpb=64932, bsz=128, num_updates=9519, lr=9.99318e-05, gnorm=2.486, loss_scale=1, train_wall=11, gb_free=2.8, wall=109672
2021-06-20 01:06:49 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-20 01:07:00 | INFO | train_inner | epoch 004: 574 / 3002 loss=2.528, ppl=5.77, wps=2901.3, ups=0.04, wpb=64786, bsz=128, num_updates=9520, lr=9.99318e-05, gnorm=2.404, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=109694
2021-06-20 01:07:11 | INFO | train_inner | epoch 004: 575 / 3002 loss=2.584, ppl=6, wps=5923.9, ups=0.09, wpb=64902, bsz=128, num_updates=9521, lr=9.99318e-05, gnorm=2.253, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109705
2021-06-20 01:07:22 | INFO | train_inner | epoch 004: 576 / 3002 loss=2.693, ppl=6.47, wps=6041.5, ups=0.09, wpb=64812, bsz=128, num_updates=9522, lr=9.99318e-05, gnorm=2.223, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109716
2021-06-20 01:07:33 | INFO | train_inner | epoch 004: 577 / 3002 loss=2.61, ppl=6.11, wps=5842.1, ups=0.09, wpb=64818, bsz=128, num_updates=9523, lr=9.99318e-05, gnorm=2.299, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109727
2021-06-20 01:07:44 | INFO | train_inner | epoch 004: 578 / 3002 loss=2.471, ppl=5.55, wps=5806.1, ups=0.09, wpb=64842, bsz=128, num_updates=9524, lr=9.99318e-05, gnorm=18.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109738
2021-06-20 01:07:55 | INFO | train_inner | epoch 004: 579 / 3002 loss=2.508, ppl=5.69, wps=5963.8, ups=0.09, wpb=64832, bsz=128, num_updates=9525, lr=9.99318e-05, gnorm=2.172, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109749
2021-06-20 01:08:06 | INFO | train_inner | epoch 004: 580 / 3002 loss=2.622, ppl=6.16, wps=5847.1, ups=0.09, wpb=64884, bsz=128, num_updates=9526, lr=9.99318e-05, gnorm=2.046, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109760
2021-06-20 01:08:17 | INFO | train_inner | epoch 004: 581 / 3002 loss=2.487, ppl=5.6, wps=5797.9, ups=0.09, wpb=64848, bsz=128, num_updates=9527, lr=9.99318e-05, gnorm=2.317, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109772
2021-06-20 01:08:28 | INFO | train_inner | epoch 004: 582 / 3002 loss=2.526, ppl=5.76, wps=5848.9, ups=0.09, wpb=64783, bsz=128, num_updates=9528, lr=9.99318e-05, gnorm=2.064, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109783
2021-06-20 01:08:39 | INFO | train_inner | epoch 004: 583 / 3002 loss=2.596, ppl=6.04, wps=5887.7, ups=0.09, wpb=64784, bsz=128, num_updates=9529, lr=9.99318e-05, gnorm=2.175, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109794
2021-06-20 01:08:50 | INFO | train_inner | epoch 004: 584 / 3002 loss=2.566, ppl=5.92, wps=5853.1, ups=0.09, wpb=64817, bsz=128, num_updates=9530, lr=9.99318e-05, gnorm=5.772, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109805
2021-06-20 01:09:01 | INFO | train_inner | epoch 004: 585 / 3002 loss=2.597, ppl=6.05, wps=5841.1, ups=0.09, wpb=64829, bsz=128, num_updates=9531, lr=9.99317e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109816
2021-06-20 01:09:13 | INFO | train_inner | epoch 004: 586 / 3002 loss=2.501, ppl=5.66, wps=5847.1, ups=0.09, wpb=64793, bsz=128, num_updates=9532, lr=9.99317e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109827
2021-06-20 01:09:24 | INFO | train_inner | epoch 004: 587 / 3002 loss=2.539, ppl=5.81, wps=5855.5, ups=0.09, wpb=64870, bsz=128, num_updates=9533, lr=9.99317e-05, gnorm=2.098, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109838
2021-06-20 01:09:35 | INFO | train_inner | epoch 004: 588 / 3002 loss=2.549, ppl=5.85, wps=5779.9, ups=0.09, wpb=64838, bsz=128, num_updates=9534, lr=9.99317e-05, gnorm=2.146, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109849
2021-06-20 01:09:46 | INFO | train_inner | epoch 004: 589 / 3002 loss=2.638, ppl=6.23, wps=5856.2, ups=0.09, wpb=64866, bsz=128, num_updates=9535, lr=9.99317e-05, gnorm=2.124, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109860
2021-06-20 01:09:57 | INFO | train_inner | epoch 004: 590 / 3002 loss=2.739, ppl=6.68, wps=5940, ups=0.09, wpb=64813, bsz=128, num_updates=9536, lr=9.99317e-05, gnorm=2.199, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109871
2021-06-20 01:10:08 | INFO | train_inner | epoch 004: 591 / 3002 loss=2.594, ppl=6.04, wps=5958.9, ups=0.09, wpb=64772, bsz=128, num_updates=9537, lr=9.99317e-05, gnorm=3.385, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109882
2021-06-20 01:10:19 | INFO | train_inner | epoch 004: 592 / 3002 loss=2.424, ppl=5.37, wps=5904.7, ups=0.09, wpb=64911, bsz=128, num_updates=9538, lr=9.99317e-05, gnorm=2.98, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109893
2021-06-20 01:10:30 | INFO | train_inner | epoch 004: 593 / 3002 loss=2.622, ppl=6.16, wps=5808.2, ups=0.09, wpb=64852, bsz=128, num_updates=9539, lr=9.99317e-05, gnorm=2.562, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109904
2021-06-20 01:10:41 | INFO | train_inner | epoch 004: 594 / 3002 loss=2.622, ppl=6.16, wps=5929.5, ups=0.09, wpb=64816, bsz=128, num_updates=9540, lr=9.99317e-05, gnorm=2.122, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109915
2021-06-20 01:10:52 | INFO | train_inner | epoch 004: 595 / 3002 loss=2.613, ppl=6.12, wps=5779.7, ups=0.09, wpb=64860, bsz=128, num_updates=9541, lr=9.99317e-05, gnorm=1.988, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109926
2021-06-20 01:11:03 | INFO | train_inner | epoch 004: 596 / 3002 loss=2.535, ppl=5.8, wps=5853, ups=0.09, wpb=64844, bsz=128, num_updates=9542, lr=9.99317e-05, gnorm=2.109, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109937
2021-06-20 01:11:14 | INFO | train_inner | epoch 004: 597 / 3002 loss=2.731, ppl=6.64, wps=5870.4, ups=0.09, wpb=64756, bsz=128, num_updates=9543, lr=9.99317e-05, gnorm=2.137, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109948
2021-06-20 01:11:25 | INFO | train_inner | epoch 004: 598 / 3002 loss=2.571, ppl=5.94, wps=5859, ups=0.09, wpb=64816, bsz=128, num_updates=9544, lr=9.99316e-05, gnorm=2.162, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109960
2021-06-20 01:11:36 | INFO | train_inner | epoch 004: 599 / 3002 loss=2.669, ppl=6.36, wps=5789.7, ups=0.09, wpb=64833, bsz=128, num_updates=9545, lr=9.99316e-05, gnorm=3.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=109971
2021-06-20 01:11:47 | INFO | train_inner | epoch 004: 600 / 3002 loss=2.521, ppl=5.74, wps=5923, ups=0.09, wpb=64868, bsz=128, num_updates=9546, lr=9.99316e-05, gnorm=2.169, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109982
2021-06-20 01:11:58 | INFO | train_inner | epoch 004: 601 / 3002 loss=2.754, ppl=6.75, wps=5955.5, ups=0.09, wpb=64871, bsz=128, num_updates=9547, lr=9.99316e-05, gnorm=2.444, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=109993
2021-06-20 01:12:09 | INFO | train_inner | epoch 004: 602 / 3002 loss=2.483, ppl=5.59, wps=5931.4, ups=0.09, wpb=64882, bsz=128, num_updates=9548, lr=9.99316e-05, gnorm=2.037, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110004
2021-06-20 01:12:20 | INFO | train_inner | epoch 004: 603 / 3002 loss=2.542, ppl=5.82, wps=5786.9, ups=0.09, wpb=64872, bsz=128, num_updates=9549, lr=9.99316e-05, gnorm=2.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110015
2021-06-20 01:12:31 | INFO | train_inner | epoch 004: 604 / 3002 loss=2.469, ppl=5.53, wps=5917.8, ups=0.09, wpb=64786, bsz=128, num_updates=9550, lr=9.99316e-05, gnorm=2.115, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110026
2021-06-20 01:12:42 | INFO | train_inner | epoch 004: 605 / 3002 loss=2.703, ppl=6.51, wps=5912.9, ups=0.09, wpb=64756, bsz=128, num_updates=9551, lr=9.99316e-05, gnorm=2.169, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110037
2021-06-20 01:12:53 | INFO | train_inner | epoch 004: 606 / 3002 loss=2.495, ppl=5.64, wps=5893.4, ups=0.09, wpb=64855, bsz=128, num_updates=9552, lr=9.99316e-05, gnorm=2.088, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110048
2021-06-20 01:13:04 | INFO | train_inner | epoch 004: 607 / 3002 loss=2.679, ppl=6.4, wps=6003.4, ups=0.09, wpb=64830, bsz=128, num_updates=9553, lr=9.99316e-05, gnorm=2.101, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110058
2021-06-20 01:13:15 | INFO | train_inner | epoch 004: 608 / 3002 loss=2.542, ppl=5.82, wps=5911.7, ups=0.09, wpb=64872, bsz=128, num_updates=9554, lr=9.99316e-05, gnorm=2.535, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110069
2021-06-20 01:13:26 | INFO | train_inner | epoch 004: 609 / 3002 loss=2.586, ppl=6, wps=5927.5, ups=0.09, wpb=64836, bsz=128, num_updates=9555, lr=9.99316e-05, gnorm=2.646, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110080
2021-06-20 01:13:37 | INFO | train_inner | epoch 004: 610 / 3002 loss=2.566, ppl=5.92, wps=5766.8, ups=0.09, wpb=64859, bsz=128, num_updates=9556, lr=9.99315e-05, gnorm=2.171, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110092
2021-06-20 01:13:49 | INFO | train_inner | epoch 004: 611 / 3002 loss=2.562, ppl=5.91, wps=5727.8, ups=0.09, wpb=64848, bsz=128, num_updates=9557, lr=9.99315e-05, gnorm=4.397, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110103
2021-06-20 01:14:00 | INFO | train_inner | epoch 004: 612 / 3002 loss=2.695, ppl=6.47, wps=5800.3, ups=0.09, wpb=64752, bsz=128, num_updates=9558, lr=9.99315e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110114
2021-06-20 01:14:11 | INFO | train_inner | epoch 004: 613 / 3002 loss=2.72, ppl=6.59, wps=5892.7, ups=0.09, wpb=64829, bsz=128, num_updates=9559, lr=9.99315e-05, gnorm=2.205, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110125
2021-06-20 01:14:22 | INFO | train_inner | epoch 004: 614 / 3002 loss=2.659, ppl=6.32, wps=5813.2, ups=0.09, wpb=64678, bsz=128, num_updates=9560, lr=9.99315e-05, gnorm=2.408, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110136
2021-06-20 01:14:33 | INFO | train_inner | epoch 004: 615 / 3002 loss=2.679, ppl=6.41, wps=5934, ups=0.09, wpb=64913, bsz=128, num_updates=9561, lr=9.99315e-05, gnorm=2.098, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110147
2021-06-20 01:14:44 | INFO | train_inner | epoch 004: 616 / 3002 loss=2.542, ppl=5.82, wps=5852.1, ups=0.09, wpb=64930, bsz=128, num_updates=9562, lr=9.99315e-05, gnorm=2.323, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110158
2021-06-20 01:14:55 | INFO | train_inner | epoch 004: 617 / 3002 loss=2.529, ppl=5.77, wps=5825.1, ups=0.09, wpb=64810, bsz=128, num_updates=9563, lr=9.99315e-05, gnorm=2.188, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110169
2021-06-20 01:15:06 | INFO | train_inner | epoch 004: 618 / 3002 loss=2.602, ppl=6.07, wps=5958.6, ups=0.09, wpb=64908, bsz=128, num_updates=9564, lr=9.99315e-05, gnorm=2.055, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110180
2021-06-20 01:15:17 | INFO | train_inner | epoch 004: 619 / 3002 loss=2.632, ppl=6.2, wps=5852.4, ups=0.09, wpb=64858, bsz=128, num_updates=9565, lr=9.99315e-05, gnorm=2.244, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110191
2021-06-20 01:15:28 | INFO | train_inner | epoch 004: 620 / 3002 loss=2.582, ppl=5.99, wps=5966.5, ups=0.09, wpb=64810, bsz=128, num_updates=9566, lr=9.99315e-05, gnorm=2.112, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110202
2021-06-20 01:15:39 | INFO | train_inner | epoch 004: 621 / 3002 loss=2.692, ppl=6.46, wps=6001.8, ups=0.09, wpb=64864, bsz=128, num_updates=9567, lr=9.99315e-05, gnorm=2.818, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110213
2021-06-20 01:15:50 | INFO | train_inner | epoch 004: 622 / 3002 loss=2.435, ppl=5.41, wps=5886.3, ups=0.09, wpb=64766, bsz=128, num_updates=9568, lr=9.99315e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110224
2021-06-20 01:16:01 | INFO | train_inner | epoch 004: 623 / 3002 loss=2.556, ppl=5.88, wps=5814.8, ups=0.09, wpb=64819, bsz=128, num_updates=9569, lr=9.99314e-05, gnorm=2.047, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110235
2021-06-20 01:16:12 | INFO | train_inner | epoch 004: 624 / 3002 loss=2.601, ppl=6.07, wps=5862.3, ups=0.09, wpb=64815, bsz=128, num_updates=9570, lr=9.99314e-05, gnorm=2.151, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110246
2021-06-20 01:16:23 | INFO | train_inner | epoch 004: 625 / 3002 loss=2.52, ppl=5.73, wps=5832.5, ups=0.09, wpb=64910, bsz=128, num_updates=9571, lr=9.99314e-05, gnorm=2.333, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110257
2021-06-20 01:16:34 | INFO | train_inner | epoch 004: 626 / 3002 loss=2.666, ppl=6.35, wps=6008.6, ups=0.09, wpb=64796, bsz=128, num_updates=9572, lr=9.99314e-05, gnorm=2.249, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110268
2021-06-20 01:16:45 | INFO | train_inner | epoch 004: 627 / 3002 loss=2.605, ppl=6.08, wps=5786.4, ups=0.09, wpb=64840, bsz=128, num_updates=9573, lr=9.99314e-05, gnorm=2.084, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110279
2021-06-20 01:16:56 | INFO | train_inner | epoch 004: 628 / 3002 loss=2.575, ppl=5.96, wps=5917.8, ups=0.09, wpb=64909, bsz=128, num_updates=9574, lr=9.99314e-05, gnorm=2.266, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110290
2021-06-20 01:17:07 | INFO | train_inner | epoch 004: 629 / 3002 loss=2.724, ppl=6.61, wps=5881.9, ups=0.09, wpb=64811, bsz=128, num_updates=9575, lr=9.99314e-05, gnorm=2.3, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110301
2021-06-20 01:17:18 | INFO | train_inner | epoch 004: 630 / 3002 loss=2.591, ppl=6.02, wps=5836.7, ups=0.09, wpb=64822, bsz=128, num_updates=9576, lr=9.99314e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110312
2021-06-20 01:17:29 | INFO | train_inner | epoch 004: 631 / 3002 loss=2.679, ppl=6.41, wps=5825.4, ups=0.09, wpb=64791, bsz=128, num_updates=9577, lr=9.99314e-05, gnorm=2.134, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110324
2021-06-20 01:17:40 | INFO | train_inner | epoch 004: 632 / 3002 loss=2.578, ppl=5.97, wps=5847.8, ups=0.09, wpb=64840, bsz=128, num_updates=9578, lr=9.99314e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110335
2021-06-20 01:17:51 | INFO | train_inner | epoch 004: 633 / 3002 loss=2.62, ppl=6.15, wps=5862.4, ups=0.09, wpb=64822, bsz=128, num_updates=9579, lr=9.99314e-05, gnorm=2.467, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110346
2021-06-20 01:18:02 | INFO | train_inner | epoch 004: 634 / 3002 loss=2.499, ppl=5.65, wps=5903.8, ups=0.09, wpb=64826, bsz=128, num_updates=9580, lr=9.99314e-05, gnorm=2.174, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110357
2021-06-20 01:18:13 | INFO | train_inner | epoch 004: 635 / 3002 loss=2.656, ppl=6.3, wps=5876.4, ups=0.09, wpb=64847, bsz=128, num_updates=9581, lr=9.99313e-05, gnorm=2.178, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110368
2021-06-20 01:18:24 | INFO | train_inner | epoch 004: 636 / 3002 loss=2.521, ppl=5.74, wps=5906.6, ups=0.09, wpb=64854, bsz=128, num_updates=9582, lr=9.99313e-05, gnorm=2.176, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110379
2021-06-20 01:18:35 | INFO | train_inner | epoch 004: 637 / 3002 loss=2.55, ppl=5.86, wps=6029.4, ups=0.09, wpb=64851, bsz=128, num_updates=9583, lr=9.99313e-05, gnorm=2.337, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110389
2021-06-20 01:18:46 | INFO | train_inner | epoch 004: 638 / 3002 loss=2.767, ppl=6.81, wps=5820.7, ups=0.09, wpb=64791, bsz=128, num_updates=9584, lr=9.99313e-05, gnorm=2.477, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110401
2021-06-20 01:18:57 | INFO | train_inner | epoch 004: 639 / 3002 loss=2.625, ppl=6.17, wps=5942.6, ups=0.09, wpb=64872, bsz=128, num_updates=9585, lr=9.99313e-05, gnorm=2.108, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110412
2021-06-20 01:19:08 | INFO | train_inner | epoch 004: 640 / 3002 loss=2.613, ppl=6.12, wps=5972.1, ups=0.09, wpb=64840, bsz=128, num_updates=9586, lr=9.99313e-05, gnorm=2.15, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110422
2021-06-20 01:19:19 | INFO | train_inner | epoch 004: 641 / 3002 loss=2.528, ppl=5.77, wps=5958.4, ups=0.09, wpb=64781, bsz=128, num_updates=9587, lr=9.99313e-05, gnorm=2.773, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110433
2021-06-20 01:19:30 | INFO | train_inner | epoch 004: 642 / 3002 loss=2.68, ppl=6.41, wps=5792.3, ups=0.09, wpb=64735, bsz=128, num_updates=9588, lr=9.99313e-05, gnorm=2.065, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110444
2021-06-20 01:19:41 | INFO | train_inner | epoch 004: 643 / 3002 loss=2.355, ppl=5.11, wps=5749, ups=0.09, wpb=64846, bsz=128, num_updates=9589, lr=9.99313e-05, gnorm=2.204, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110456
2021-06-20 01:19:52 | INFO | train_inner | epoch 004: 644 / 3002 loss=2.567, ppl=5.92, wps=5864.7, ups=0.09, wpb=64844, bsz=128, num_updates=9590, lr=9.99313e-05, gnorm=4.867, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110467
2021-06-20 01:20:04 | INFO | train_inner | epoch 004: 645 / 3002 loss=2.47, ppl=5.54, wps=5750.9, ups=0.09, wpb=64779, bsz=128, num_updates=9591, lr=9.99313e-05, gnorm=2.108, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110478
2021-06-20 01:20:15 | INFO | train_inner | epoch 004: 646 / 3002 loss=2.595, ppl=6.04, wps=5845.1, ups=0.09, wpb=64860, bsz=128, num_updates=9592, lr=9.99313e-05, gnorm=2.19, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110489
2021-06-20 01:20:26 | INFO | train_inner | epoch 004: 647 / 3002 loss=2.567, ppl=5.93, wps=5925.5, ups=0.09, wpb=64917, bsz=128, num_updates=9593, lr=9.99313e-05, gnorm=2.133, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110500
2021-06-20 01:20:37 | INFO | train_inner | epoch 004: 648 / 3002 loss=2.551, ppl=5.86, wps=5890.3, ups=0.09, wpb=64822, bsz=128, num_updates=9594, lr=9.99312e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110511
2021-06-20 01:20:48 | INFO | train_inner | epoch 004: 649 / 3002 loss=2.646, ppl=6.26, wps=5974.8, ups=0.09, wpb=64865, bsz=128, num_updates=9595, lr=9.99312e-05, gnorm=2.161, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110522
2021-06-20 01:20:59 | INFO | train_inner | epoch 004: 650 / 3002 loss=2.696, ppl=6.48, wps=5896.7, ups=0.09, wpb=64828, bsz=128, num_updates=9596, lr=9.99312e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110533
2021-06-20 01:21:10 | INFO | train_inner | epoch 004: 651 / 3002 loss=2.526, ppl=5.76, wps=5811.7, ups=0.09, wpb=64842, bsz=128, num_updates=9597, lr=9.99312e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110544
2021-06-20 01:21:21 | INFO | train_inner | epoch 004: 652 / 3002 loss=2.485, ppl=5.6, wps=5734.2, ups=0.09, wpb=64834, bsz=128, num_updates=9598, lr=9.99312e-05, gnorm=2.307, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110555
2021-06-20 01:21:32 | INFO | train_inner | epoch 004: 653 / 3002 loss=2.55, ppl=5.86, wps=5833.1, ups=0.09, wpb=64848, bsz=128, num_updates=9599, lr=9.99312e-05, gnorm=2.556, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110567
2021-06-20 01:21:43 | INFO | train_inner | epoch 004: 654 / 3002 loss=2.511, ppl=5.7, wps=5826.2, ups=0.09, wpb=64873, bsz=128, num_updates=9600, lr=9.99312e-05, gnorm=4.799, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110578
2021-06-20 01:21:54 | INFO | train_inner | epoch 004: 655 / 3002 loss=2.668, ppl=6.36, wps=5844.4, ups=0.09, wpb=64796, bsz=128, num_updates=9601, lr=9.99312e-05, gnorm=2.185, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110589
2021-06-20 01:22:06 | INFO | train_inner | epoch 004: 656 / 3002 loss=2.604, ppl=6.08, wps=5745.8, ups=0.09, wpb=64748, bsz=128, num_updates=9602, lr=9.99312e-05, gnorm=2.52, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110600
2021-06-20 01:22:17 | INFO | train_inner | epoch 004: 657 / 3002 loss=2.579, ppl=5.98, wps=5890.5, ups=0.09, wpb=64846, bsz=128, num_updates=9603, lr=9.99312e-05, gnorm=2.325, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110611
2021-06-20 01:22:28 | INFO | train_inner | epoch 004: 658 / 3002 loss=2.571, ppl=5.94, wps=5858.2, ups=0.09, wpb=64859, bsz=128, num_updates=9604, lr=9.99312e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110622
2021-06-20 01:22:39 | INFO | train_inner | epoch 004: 659 / 3002 loss=2.647, ppl=6.26, wps=5795.4, ups=0.09, wpb=64771, bsz=128, num_updates=9605, lr=9.99312e-05, gnorm=2.039, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110633
2021-06-20 01:22:50 | INFO | train_inner | epoch 004: 660 / 3002 loss=2.483, ppl=5.59, wps=5832.9, ups=0.09, wpb=64730, bsz=128, num_updates=9606, lr=9.99311e-05, gnorm=2.67, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110644
2021-06-20 01:23:01 | INFO | train_inner | epoch 004: 661 / 3002 loss=2.687, ppl=6.44, wps=6034.4, ups=0.09, wpb=64834, bsz=128, num_updates=9607, lr=9.99311e-05, gnorm=3.469, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110655
2021-06-20 01:23:12 | INFO | train_inner | epoch 004: 662 / 3002 loss=2.566, ppl=5.92, wps=5851.6, ups=0.09, wpb=64785, bsz=128, num_updates=9608, lr=9.99311e-05, gnorm=2.219, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110666
2021-06-20 01:23:23 | INFO | train_inner | epoch 004: 663 / 3002 loss=2.428, ppl=5.38, wps=5896.7, ups=0.09, wpb=64835, bsz=128, num_updates=9609, lr=9.99311e-05, gnorm=2.333, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110677
2021-06-20 01:23:34 | INFO | train_inner | epoch 004: 664 / 3002 loss=2.597, ppl=6.05, wps=5800.6, ups=0.09, wpb=64845, bsz=128, num_updates=9610, lr=9.99311e-05, gnorm=4.445, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110688
2021-06-20 01:23:45 | INFO | train_inner | epoch 004: 665 / 3002 loss=2.684, ppl=6.43, wps=5871, ups=0.09, wpb=64867, bsz=128, num_updates=9611, lr=9.99311e-05, gnorm=4.749, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110699
2021-06-20 01:23:56 | INFO | train_inner | epoch 004: 666 / 3002 loss=2.644, ppl=6.25, wps=5845.4, ups=0.09, wpb=64872, bsz=128, num_updates=9612, lr=9.99311e-05, gnorm=2.05, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110710
2021-06-20 01:24:07 | INFO | train_inner | epoch 004: 667 / 3002 loss=2.493, ppl=5.63, wps=5809.8, ups=0.09, wpb=64817, bsz=128, num_updates=9613, lr=9.99311e-05, gnorm=2.561, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110722
2021-06-20 01:24:18 | INFO | train_inner | epoch 004: 668 / 3002 loss=2.632, ppl=6.2, wps=6025.7, ups=0.09, wpb=64777, bsz=128, num_updates=9614, lr=9.99311e-05, gnorm=2.173, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110732
2021-06-20 01:24:29 | INFO | train_inner | epoch 004: 669 / 3002 loss=2.591, ppl=6.03, wps=5871.3, ups=0.09, wpb=64844, bsz=128, num_updates=9615, lr=9.99311e-05, gnorm=2.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110743
2021-06-20 01:24:40 | INFO | train_inner | epoch 004: 670 / 3002 loss=2.688, ppl=6.45, wps=5698.4, ups=0.09, wpb=64771, bsz=128, num_updates=9616, lr=9.99311e-05, gnorm=2.184, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110755
2021-06-20 01:24:52 | INFO | train_inner | epoch 004: 671 / 3002 loss=2.587, ppl=6.01, wps=5811.4, ups=0.09, wpb=64813, bsz=128, num_updates=9617, lr=9.99311e-05, gnorm=2.725, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110766
2021-06-20 01:25:03 | INFO | train_inner | epoch 004: 672 / 3002 loss=2.518, ppl=5.73, wps=5838.5, ups=0.09, wpb=64870, bsz=128, num_updates=9618, lr=9.99311e-05, gnorm=2.14, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110777
2021-06-20 01:25:14 | INFO | train_inner | epoch 004: 673 / 3002 loss=2.5, ppl=5.66, wps=5856, ups=0.09, wpb=64879, bsz=128, num_updates=9619, lr=9.9931e-05, gnorm=2.461, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110788
2021-06-20 01:25:25 | INFO | train_inner | epoch 004: 674 / 3002 loss=2.563, ppl=5.91, wps=5895, ups=0.09, wpb=64846, bsz=128, num_updates=9620, lr=9.9931e-05, gnorm=2.099, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110799
2021-06-20 01:25:36 | INFO | train_inner | epoch 004: 675 / 3002 loss=2.521, ppl=5.74, wps=5849.9, ups=0.09, wpb=64837, bsz=128, num_updates=9621, lr=9.9931e-05, gnorm=3.071, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110810
2021-06-20 01:25:47 | INFO | train_inner | epoch 004: 676 / 3002 loss=2.654, ppl=6.29, wps=5878.8, ups=0.09, wpb=64774, bsz=128, num_updates=9622, lr=9.9931e-05, gnorm=2.312, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110821
2021-06-20 01:25:58 | INFO | train_inner | epoch 004: 677 / 3002 loss=2.839, ppl=7.15, wps=5823.4, ups=0.09, wpb=64706, bsz=128, num_updates=9623, lr=9.9931e-05, gnorm=2.494, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110832
2021-06-20 01:26:09 | INFO | train_inner | epoch 004: 678 / 3002 loss=2.549, ppl=5.85, wps=5836.2, ups=0.09, wpb=64800, bsz=128, num_updates=9624, lr=9.9931e-05, gnorm=2.897, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110843
2021-06-20 01:26:20 | INFO | train_inner | epoch 004: 679 / 3002 loss=2.648, ppl=6.27, wps=5725.7, ups=0.09, wpb=64841, bsz=128, num_updates=9625, lr=9.9931e-05, gnorm=2.232, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110855
2021-06-20 01:26:32 | INFO | train_inner | epoch 004: 680 / 3002 loss=2.57, ppl=5.94, wps=5804.8, ups=0.09, wpb=64838, bsz=128, num_updates=9626, lr=9.9931e-05, gnorm=2.3, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110866
2021-06-20 01:26:43 | INFO | train_inner | epoch 004: 681 / 3002 loss=2.593, ppl=6.03, wps=5854, ups=0.09, wpb=64832, bsz=128, num_updates=9627, lr=9.9931e-05, gnorm=2.19, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110877
2021-06-20 01:26:54 | INFO | train_inner | epoch 004: 682 / 3002 loss=2.495, ppl=5.64, wps=5700.6, ups=0.09, wpb=64805, bsz=128, num_updates=9628, lr=9.9931e-05, gnorm=3.7, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110888
2021-06-20 01:27:05 | INFO | train_inner | epoch 004: 683 / 3002 loss=2.76, ppl=6.77, wps=5996.5, ups=0.09, wpb=64767, bsz=128, num_updates=9629, lr=9.9931e-05, gnorm=2.272, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110899
2021-06-20 01:27:16 | INFO | train_inner | epoch 004: 684 / 3002 loss=2.531, ppl=5.78, wps=5851.8, ups=0.09, wpb=64780, bsz=128, num_updates=9630, lr=9.9931e-05, gnorm=2.249, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110910
2021-06-20 01:27:27 | INFO | train_inner | epoch 004: 685 / 3002 loss=2.647, ppl=6.26, wps=5853.1, ups=0.09, wpb=64838, bsz=128, num_updates=9631, lr=9.99309e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110921
2021-06-20 01:27:38 | INFO | train_inner | epoch 004: 686 / 3002 loss=2.621, ppl=6.15, wps=5806.1, ups=0.09, wpb=64789, bsz=128, num_updates=9632, lr=9.99309e-05, gnorm=2.027, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110933
2021-06-20 01:27:49 | INFO | train_inner | epoch 004: 687 / 3002 loss=2.517, ppl=5.72, wps=5943.4, ups=0.09, wpb=64793, bsz=128, num_updates=9633, lr=9.99309e-05, gnorm=3.404, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=110943
2021-06-20 01:28:00 | INFO | train_inner | epoch 004: 688 / 3002 loss=2.54, ppl=5.81, wps=5811.4, ups=0.09, wpb=64868, bsz=128, num_updates=9634, lr=9.99309e-05, gnorm=2.158, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110955
2021-06-20 01:28:12 | INFO | train_inner | epoch 004: 689 / 3002 loss=2.498, ppl=5.65, wps=5740, ups=0.09, wpb=64922, bsz=128, num_updates=9635, lr=9.99309e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110966
2021-06-20 01:28:23 | INFO | train_inner | epoch 004: 690 / 3002 loss=2.431, ppl=5.39, wps=5797.5, ups=0.09, wpb=64912, bsz=128, num_updates=9636, lr=9.99309e-05, gnorm=2.013, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110977
2021-06-20 01:28:34 | INFO | train_inner | epoch 004: 691 / 3002 loss=2.652, ppl=6.29, wps=5924.3, ups=0.09, wpb=64861, bsz=128, num_updates=9637, lr=9.99309e-05, gnorm=23.92, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110988
2021-06-20 01:28:45 | INFO | train_inner | epoch 004: 692 / 3002 loss=2.484, ppl=5.59, wps=5833.6, ups=0.09, wpb=64883, bsz=128, num_updates=9638, lr=9.99309e-05, gnorm=2.229, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=110999
2021-06-20 01:28:56 | INFO | train_inner | epoch 004: 693 / 3002 loss=2.755, ppl=6.75, wps=5892.3, ups=0.09, wpb=64818, bsz=128, num_updates=9639, lr=9.99309e-05, gnorm=2.42, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111010
2021-06-20 01:29:07 | INFO | train_inner | epoch 004: 694 / 3002 loss=2.486, ppl=5.6, wps=5854.8, ups=0.09, wpb=64846, bsz=128, num_updates=9640, lr=9.99309e-05, gnorm=2.106, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111021
2021-06-20 01:29:18 | INFO | train_inner | epoch 004: 695 / 3002 loss=2.496, ppl=5.64, wps=5914.5, ups=0.09, wpb=64763, bsz=128, num_updates=9641, lr=9.99309e-05, gnorm=1.965, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111032
2021-06-20 01:29:29 | INFO | train_inner | epoch 004: 696 / 3002 loss=2.658, ppl=6.31, wps=5681.5, ups=0.09, wpb=64771, bsz=128, num_updates=9642, lr=9.99309e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111044
2021-06-20 01:29:40 | INFO | train_inner | epoch 004: 697 / 3002 loss=2.583, ppl=5.99, wps=5989.8, ups=0.09, wpb=64916, bsz=128, num_updates=9643, lr=9.99309e-05, gnorm=2.18, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=111054
2021-06-20 01:29:51 | INFO | train_inner | epoch 004: 698 / 3002 loss=2.738, ppl=6.67, wps=5804.8, ups=0.09, wpb=64837, bsz=128, num_updates=9644, lr=9.99308e-05, gnorm=2.205, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111066
2021-06-20 01:30:02 | INFO | train_inner | epoch 004: 699 / 3002 loss=2.615, ppl=6.13, wps=5961.8, ups=0.09, wpb=64869, bsz=128, num_updates=9645, lr=9.99308e-05, gnorm=2.431, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=111076
2021-06-20 01:30:13 | INFO | train_inner | epoch 004: 700 / 3002 loss=2.696, ppl=6.48, wps=5865.2, ups=0.09, wpb=64741, bsz=128, num_updates=9646, lr=9.99308e-05, gnorm=2.094, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=111088
2021-06-20 01:30:24 | INFO | train_inner | epoch 004: 701 / 3002 loss=2.618, ppl=6.14, wps=5840.5, ups=0.09, wpb=64915, bsz=128, num_updates=9647, lr=9.99308e-05, gnorm=14.524, loss_scale=1, train_wall=11, gb_free=2.8, wall=111099
2021-06-20 01:30:35 | INFO | train_inner | epoch 004: 702 / 3002 loss=2.619, ppl=6.14, wps=5854.6, ups=0.09, wpb=64841, bsz=128, num_updates=9648, lr=9.99308e-05, gnorm=2.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=111110
2021-06-20 01:30:47 | INFO | train_inner | epoch 004: 703 / 3002 loss=2.528, ppl=5.77, wps=5820.3, ups=0.09, wpb=64802, bsz=128, num_updates=9649, lr=9.99308e-05, gnorm=2.152, loss_scale=1, train_wall=11, gb_free=2.8, wall=111121
2021-06-20 01:30:58 | INFO | train_inner | epoch 004: 704 / 3002 loss=2.738, ppl=6.67, wps=5834.6, ups=0.09, wpb=64824, bsz=128, num_updates=9650, lr=9.99308e-05, gnorm=2.139, loss_scale=1, train_wall=11, gb_free=2.8, wall=111132
2021-06-20 01:31:09 | INFO | train_inner | epoch 004: 705 / 3002 loss=2.588, ppl=6.01, wps=5888.5, ups=0.09, wpb=64921, bsz=128, num_updates=9651, lr=9.99308e-05, gnorm=2.096, loss_scale=1, train_wall=11, gb_free=2.8, wall=111143
2021-06-20 01:31:20 | INFO | train_inner | epoch 004: 706 / 3002 loss=2.767, ppl=6.81, wps=5839.2, ups=0.09, wpb=64779, bsz=128, num_updates=9652, lr=9.99308e-05, gnorm=2.164, loss_scale=1, train_wall=11, gb_free=2.8, wall=111154
2021-06-20 01:31:31 | INFO | train_inner | epoch 004: 707 / 3002 loss=2.623, ppl=6.16, wps=5798.9, ups=0.09, wpb=64858, bsz=128, num_updates=9653, lr=9.99308e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=111165
2021-06-20 01:31:42 | INFO | train_inner | epoch 004: 708 / 3002 loss=2.541, ppl=5.82, wps=5894.8, ups=0.09, wpb=64807, bsz=128, num_updates=9654, lr=9.99308e-05, gnorm=2.378, loss_scale=1, train_wall=11, gb_free=2.8, wall=111176
2021-06-20 01:31:53 | INFO | train_inner | epoch 004: 709 / 3002 loss=2.529, ppl=5.77, wps=5928.3, ups=0.09, wpb=64806, bsz=128, num_updates=9655, lr=9.99308e-05, gnorm=2.14, loss_scale=1, train_wall=10, gb_free=2.8, wall=111187
2021-06-20 01:32:04 | INFO | train_inner | epoch 004: 710 / 3002 loss=2.724, ppl=6.61, wps=5847.5, ups=0.09, wpb=64783, bsz=128, num_updates=9656, lr=9.99307e-05, gnorm=6.462, loss_scale=1, train_wall=11, gb_free=2.8, wall=111198
2021-06-20 01:32:15 | INFO | train_inner | epoch 004: 711 / 3002 loss=2.636, ppl=6.22, wps=5946.4, ups=0.09, wpb=64854, bsz=128, num_updates=9657, lr=9.99307e-05, gnorm=4.893, loss_scale=1, train_wall=10, gb_free=2.8, wall=111209
2021-06-20 01:32:26 | INFO | train_inner | epoch 004: 712 / 3002 loss=2.576, ppl=5.96, wps=5699.7, ups=0.09, wpb=64878, bsz=128, num_updates=9658, lr=9.99307e-05, gnorm=2.571, loss_scale=1, train_wall=11, gb_free=2.8, wall=111221
2021-06-20 01:32:37 | INFO | train_inner | epoch 004: 713 / 3002 loss=2.556, ppl=5.88, wps=5827.8, ups=0.09, wpb=64842, bsz=128, num_updates=9659, lr=9.99307e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=111232
2021-06-20 01:32:49 | INFO | train_inner | epoch 004: 714 / 3002 loss=2.504, ppl=5.67, wps=5772.5, ups=0.09, wpb=64831, bsz=128, num_updates=9660, lr=9.99307e-05, gnorm=2.536, loss_scale=1, train_wall=11, gb_free=2.8, wall=111243
2021-06-20 01:33:00 | INFO | train_inner | epoch 004: 715 / 3002 loss=2.623, ppl=6.16, wps=5842.7, ups=0.09, wpb=64811, bsz=128, num_updates=9661, lr=9.99307e-05, gnorm=2.274, loss_scale=1, train_wall=11, gb_free=2.8, wall=111254
2021-06-20 01:33:11 | INFO | train_inner | epoch 004: 716 / 3002 loss=2.651, ppl=6.28, wps=5884.1, ups=0.09, wpb=64851, bsz=128, num_updates=9662, lr=9.99307e-05, gnorm=2.208, loss_scale=1, train_wall=11, gb_free=2.8, wall=111265
2021-06-20 01:33:22 | INFO | train_inner | epoch 004: 717 / 3002 loss=2.598, ppl=6.06, wps=5822, ups=0.09, wpb=64833, bsz=128, num_updates=9663, lr=9.99307e-05, gnorm=2.107, loss_scale=1, train_wall=11, gb_free=2.8, wall=111276
2021-06-20 01:33:33 | INFO | train_inner | epoch 004: 718 / 3002 loss=2.597, ppl=6.05, wps=5921.7, ups=0.09, wpb=64754, bsz=128, num_updates=9664, lr=9.99307e-05, gnorm=3.353, loss_scale=1, train_wall=11, gb_free=2.8, wall=111287
2021-06-20 01:33:44 | INFO | train_inner | epoch 004: 719 / 3002 loss=2.669, ppl=6.36, wps=5788.7, ups=0.09, wpb=64717, bsz=128, num_updates=9665, lr=9.99307e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=111298
2021-06-20 01:33:55 | INFO | train_inner | epoch 004: 720 / 3002 loss=2.633, ppl=6.2, wps=5929.8, ups=0.09, wpb=64864, bsz=128, num_updates=9666, lr=9.99307e-05, gnorm=3.841, loss_scale=1, train_wall=10, gb_free=2.8, wall=111309
2021-06-20 01:34:06 | INFO | train_inner | epoch 004: 721 / 3002 loss=2.565, ppl=5.92, wps=5886.7, ups=0.09, wpb=64821, bsz=128, num_updates=9667, lr=9.99307e-05, gnorm=2.012, loss_scale=1, train_wall=11, gb_free=2.8, wall=111320
2021-06-20 01:34:17 | INFO | train_inner | epoch 004: 722 / 3002 loss=2.544, ppl=5.83, wps=5818.8, ups=0.09, wpb=64881, bsz=128, num_updates=9668, lr=9.99307e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=111331
2021-06-20 01:34:28 | INFO | train_inner | epoch 004: 723 / 3002 loss=2.633, ppl=6.2, wps=5912.7, ups=0.09, wpb=64800, bsz=128, num_updates=9669, lr=9.99306e-05, gnorm=2.13, loss_scale=1, train_wall=10, gb_free=2.8, wall=111342
2021-06-20 01:34:39 | INFO | train_inner | epoch 004: 724 / 3002 loss=2.567, ppl=5.93, wps=6006.6, ups=0.09, wpb=64847, bsz=128, num_updates=9670, lr=9.99306e-05, gnorm=2.51, loss_scale=1, train_wall=10, gb_free=2.8, wall=111353
2021-06-20 01:34:50 | INFO | train_inner | epoch 004: 725 / 3002 loss=2.635, ppl=6.21, wps=5782.3, ups=0.09, wpb=64844, bsz=128, num_updates=9671, lr=9.99306e-05, gnorm=16.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=111364
2021-06-20 01:35:01 | INFO | train_inner | epoch 004: 726 / 3002 loss=2.497, ppl=5.64, wps=5809.4, ups=0.09, wpb=64837, bsz=128, num_updates=9672, lr=9.99306e-05, gnorm=2.121, loss_scale=1, train_wall=11, gb_free=2.8, wall=111376
2021-06-20 01:35:12 | INFO | train_inner | epoch 004: 727 / 3002 loss=2.584, ppl=6, wps=5858.3, ups=0.09, wpb=64856, bsz=128, num_updates=9673, lr=9.99306e-05, gnorm=2.269, loss_scale=1, train_wall=11, gb_free=2.8, wall=111387
2021-06-20 01:35:24 | INFO | train_inner | epoch 004: 728 / 3002 loss=2.702, ppl=6.51, wps=5726, ups=0.09, wpb=64850, bsz=128, num_updates=9674, lr=9.99306e-05, gnorm=3.369, loss_scale=1, train_wall=11, gb_free=2.8, wall=111398
2021-06-20 01:35:34 | INFO | train_inner | epoch 004: 729 / 3002 loss=2.595, ppl=6.04, wps=5973.2, ups=0.09, wpb=64838, bsz=128, num_updates=9675, lr=9.99306e-05, gnorm=2.153, loss_scale=1, train_wall=10, gb_free=2.8, wall=111409
2021-06-20 01:35:46 | INFO | train_inner | epoch 004: 730 / 3002 loss=2.661, ppl=6.32, wps=5793.6, ups=0.09, wpb=64838, bsz=128, num_updates=9676, lr=9.99306e-05, gnorm=2.493, loss_scale=1, train_wall=11, gb_free=2.8, wall=111420
2021-06-20 01:35:57 | INFO | train_inner | epoch 004: 731 / 3002 loss=2.572, ppl=5.95, wps=5796.5, ups=0.09, wpb=64854, bsz=128, num_updates=9677, lr=9.99306e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=111431
2021-06-20 01:36:08 | INFO | train_inner | epoch 004: 732 / 3002 loss=2.543, ppl=5.83, wps=5897.6, ups=0.09, wpb=64881, bsz=128, num_updates=9678, lr=9.99306e-05, gnorm=2.127, loss_scale=1, train_wall=11, gb_free=2.8, wall=111442
2021-06-20 01:36:19 | INFO | train_inner | epoch 004: 733 / 3002 loss=2.578, ppl=5.97, wps=5866.5, ups=0.09, wpb=64824, bsz=128, num_updates=9679, lr=9.99306e-05, gnorm=2.249, loss_scale=1, train_wall=11, gb_free=2.8, wall=111453
2021-06-20 01:36:30 | INFO | train_inner | epoch 004: 734 / 3002 loss=2.525, ppl=5.76, wps=5895.3, ups=0.09, wpb=64816, bsz=128, num_updates=9680, lr=9.99306e-05, gnorm=2.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=111464
2021-06-20 01:36:41 | INFO | train_inner | epoch 004: 735 / 3002 loss=2.537, ppl=5.81, wps=5842.6, ups=0.09, wpb=64816, bsz=128, num_updates=9681, lr=9.99305e-05, gnorm=2.249, loss_scale=1, train_wall=11, gb_free=2.8, wall=111475
2021-06-20 01:36:52 | INFO | train_inner | epoch 004: 736 / 3002 loss=2.739, ppl=6.68, wps=5838.5, ups=0.09, wpb=64852, bsz=128, num_updates=9682, lr=9.99305e-05, gnorm=3.457, loss_scale=1, train_wall=11, gb_free=2.8, wall=111486
2021-06-20 01:37:03 | INFO | train_inner | epoch 004: 737 / 3002 loss=2.569, ppl=5.93, wps=5965.7, ups=0.09, wpb=64843, bsz=128, num_updates=9683, lr=9.99305e-05, gnorm=20.225, loss_scale=1, train_wall=10, gb_free=2.8, wall=111497
2021-06-20 01:37:14 | INFO | train_inner | epoch 004: 738 / 3002 loss=2.582, ppl=5.99, wps=5841.8, ups=0.09, wpb=64732, bsz=128, num_updates=9684, lr=9.99305e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=111508
2021-06-20 01:37:25 | INFO | train_inner | epoch 004: 739 / 3002 loss=2.785, ppl=6.89, wps=5859.7, ups=0.09, wpb=64801, bsz=128, num_updates=9685, lr=9.99305e-05, gnorm=2.577, loss_scale=1, train_wall=11, gb_free=2.8, wall=111519
2021-06-20 01:37:36 | INFO | train_inner | epoch 004: 740 / 3002 loss=2.572, ppl=5.95, wps=5870.3, ups=0.09, wpb=64749, bsz=128, num_updates=9686, lr=9.99305e-05, gnorm=2.239, loss_scale=1, train_wall=11, gb_free=2.8, wall=111530
2021-06-20 01:37:47 | INFO | train_inner | epoch 004: 741 / 3002 loss=2.591, ppl=6.02, wps=5806, ups=0.09, wpb=64793, bsz=128, num_updates=9687, lr=9.99305e-05, gnorm=2.276, loss_scale=1, train_wall=11, gb_free=2.8, wall=111542
2021-06-20 01:37:58 | INFO | train_inner | epoch 004: 742 / 3002 loss=2.674, ppl=6.38, wps=5815.9, ups=0.09, wpb=64872, bsz=128, num_updates=9688, lr=9.99305e-05, gnorm=2.534, loss_scale=1, train_wall=11, gb_free=2.8, wall=111553
2021-06-20 01:38:10 | INFO | train_inner | epoch 004: 743 / 3002 loss=2.6, ppl=6.06, wps=5802, ups=0.09, wpb=64856, bsz=128, num_updates=9689, lr=9.99305e-05, gnorm=3.52, loss_scale=1, train_wall=11, gb_free=2.8, wall=111564
2021-06-20 01:38:21 | INFO | train_inner | epoch 004: 744 / 3002 loss=2.531, ppl=5.78, wps=5795.2, ups=0.09, wpb=64894, bsz=128, num_updates=9690, lr=9.99305e-05, gnorm=2.38, loss_scale=1, train_wall=11, gb_free=2.8, wall=111575
2021-06-20 01:38:32 | INFO | train_inner | epoch 004: 745 / 3002 loss=2.666, ppl=6.34, wps=5890.2, ups=0.09, wpb=64873, bsz=128, num_updates=9691, lr=9.99305e-05, gnorm=4.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=111586
2021-06-20 01:38:43 | INFO | train_inner | epoch 004: 746 / 3002 loss=2.575, ppl=5.96, wps=5783.5, ups=0.09, wpb=64839, bsz=128, num_updates=9692, lr=9.99305e-05, gnorm=2.902, loss_scale=1, train_wall=11, gb_free=2.8, wall=111597
2021-06-20 01:38:54 | INFO | train_inner | epoch 004: 747 / 3002 loss=2.499, ppl=5.65, wps=5891, ups=0.09, wpb=64908, bsz=128, num_updates=9693, lr=9.99305e-05, gnorm=3.601, loss_scale=1, train_wall=11, gb_free=2.8, wall=111608
2021-06-20 01:39:05 | INFO | train_inner | epoch 004: 748 / 3002 loss=2.569, ppl=5.93, wps=5951.1, ups=0.09, wpb=64895, bsz=128, num_updates=9694, lr=9.99304e-05, gnorm=2.31, loss_scale=1, train_wall=10, gb_free=2.8, wall=111619
2021-06-20 01:39:16 | INFO | train_inner | epoch 004: 749 / 3002 loss=2.641, ppl=6.24, wps=5891.3, ups=0.09, wpb=64894, bsz=128, num_updates=9695, lr=9.99304e-05, gnorm=2.329, loss_scale=1, train_wall=11, gb_free=2.8, wall=111630
2021-06-20 01:39:27 | INFO | train_inner | epoch 004: 750 / 3002 loss=2.512, ppl=5.71, wps=5816, ups=0.09, wpb=64833, bsz=128, num_updates=9696, lr=9.99304e-05, gnorm=2.878, loss_scale=1, train_wall=11, gb_free=2.8, wall=111641
2021-06-20 01:39:38 | INFO | train_inner | epoch 004: 751 / 3002 loss=2.647, ppl=6.26, wps=5732.1, ups=0.09, wpb=64761, bsz=128, num_updates=9697, lr=9.99304e-05, gnorm=2.854, loss_scale=1, train_wall=11, gb_free=2.8, wall=111653
2021-06-20 01:39:49 | INFO | train_inner | epoch 004: 752 / 3002 loss=2.496, ppl=5.64, wps=5848.4, ups=0.09, wpb=64700, bsz=128, num_updates=9698, lr=9.99304e-05, gnorm=8.333, loss_scale=1, train_wall=11, gb_free=2.8, wall=111664
2021-06-20 01:40:00 | INFO | train_inner | epoch 004: 753 / 3002 loss=2.567, ppl=5.93, wps=5961.2, ups=0.09, wpb=64899, bsz=128, num_updates=9699, lr=9.99304e-05, gnorm=2.099, loss_scale=1, train_wall=10, gb_free=2.8, wall=111675
2021-06-20 01:40:11 | INFO | train_inner | epoch 004: 754 / 3002 loss=2.467, ppl=5.53, wps=5853.7, ups=0.09, wpb=64744, bsz=128, num_updates=9700, lr=9.99304e-05, gnorm=2.486, loss_scale=1, train_wall=11, gb_free=2.8, wall=111686
2021-06-20 01:40:22 | INFO | train_inner | epoch 004: 755 / 3002 loss=2.721, ppl=6.59, wps=5908.7, ups=0.09, wpb=64828, bsz=128, num_updates=9701, lr=9.99304e-05, gnorm=2.148, loss_scale=1, train_wall=11, gb_free=2.8, wall=111697
2021-06-20 01:40:33 | INFO | train_inner | epoch 004: 756 / 3002 loss=2.578, ppl=5.97, wps=5915.2, ups=0.09, wpb=64881, bsz=128, num_updates=9702, lr=9.99304e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=111708
2021-06-20 01:40:45 | INFO | train_inner | epoch 004: 757 / 3002 loss=2.58, ppl=5.98, wps=5758.9, ups=0.09, wpb=64831, bsz=128, num_updates=9703, lr=9.99304e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=111719
2021-06-20 01:40:56 | INFO | train_inner | epoch 004: 758 / 3002 loss=2.626, ppl=6.17, wps=5817.6, ups=0.09, wpb=64843, bsz=128, num_updates=9704, lr=9.99304e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=111730
2021-06-20 01:41:07 | INFO | train_inner | epoch 004: 759 / 3002 loss=2.642, ppl=6.24, wps=5863.4, ups=0.09, wpb=64872, bsz=128, num_updates=9705, lr=9.99304e-05, gnorm=2.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=111741
2021-06-20 01:41:18 | INFO | train_inner | epoch 004: 760 / 3002 loss=2.399, ppl=5.27, wps=5779, ups=0.09, wpb=64814, bsz=128, num_updates=9706, lr=9.99303e-05, gnorm=2.113, loss_scale=1, train_wall=11, gb_free=2.8, wall=111752
2021-06-20 01:41:29 | INFO | train_inner | epoch 004: 761 / 3002 loss=2.57, ppl=5.94, wps=5920.3, ups=0.09, wpb=64838, bsz=128, num_updates=9707, lr=9.99303e-05, gnorm=2.39, loss_scale=1, train_wall=11, gb_free=2.8, wall=111763
2021-06-20 01:41:40 | INFO | train_inner | epoch 004: 762 / 3002 loss=2.564, ppl=5.91, wps=5764.8, ups=0.09, wpb=64784, bsz=128, num_updates=9708, lr=9.99303e-05, gnorm=2.641, loss_scale=1, train_wall=11, gb_free=2.8, wall=111775
2021-06-20 01:41:51 | INFO | train_inner | epoch 004: 763 / 3002 loss=2.451, ppl=5.47, wps=5917.4, ups=0.09, wpb=64894, bsz=128, num_updates=9709, lr=9.99303e-05, gnorm=2.158, loss_scale=1, train_wall=10, gb_free=2.8, wall=111786
2021-06-20 01:42:02 | INFO | train_inner | epoch 004: 764 / 3002 loss=2.541, ppl=5.82, wps=5780.7, ups=0.09, wpb=64794, bsz=128, num_updates=9710, lr=9.99303e-05, gnorm=2.842, loss_scale=1, train_wall=11, gb_free=2.8, wall=111797
2021-06-20 01:42:13 | INFO | train_inner | epoch 004: 765 / 3002 loss=2.464, ppl=5.52, wps=5872.7, ups=0.09, wpb=64859, bsz=128, num_updates=9711, lr=9.99303e-05, gnorm=2.344, loss_scale=1, train_wall=11, gb_free=2.8, wall=111808
2021-06-20 01:42:24 | INFO | train_inner | epoch 004: 766 / 3002 loss=2.521, ppl=5.74, wps=5933.1, ups=0.09, wpb=64865, bsz=128, num_updates=9712, lr=9.99303e-05, gnorm=4.592, loss_scale=1, train_wall=10, gb_free=2.8, wall=111819
2021-06-20 01:42:35 | INFO | train_inner | epoch 004: 767 / 3002 loss=2.384, ppl=5.22, wps=5841, ups=0.09, wpb=64804, bsz=128, num_updates=9713, lr=9.99303e-05, gnorm=2.384, loss_scale=1, train_wall=11, gb_free=2.8, wall=111830
2021-06-20 01:42:47 | INFO | train_inner | epoch 004: 768 / 3002 loss=2.586, ppl=6, wps=5795.4, ups=0.09, wpb=64825, bsz=128, num_updates=9714, lr=9.99303e-05, gnorm=2.306, loss_scale=1, train_wall=11, gb_free=2.8, wall=111841
2021-06-20 01:42:58 | INFO | train_inner | epoch 004: 769 / 3002 loss=2.583, ppl=5.99, wps=5928.6, ups=0.09, wpb=64784, bsz=128, num_updates=9715, lr=9.99303e-05, gnorm=2.155, loss_scale=1, train_wall=10, gb_free=2.8, wall=111852
2021-06-20 01:43:09 | INFO | train_inner | epoch 004: 770 / 3002 loss=2.492, ppl=5.63, wps=5829.9, ups=0.09, wpb=64769, bsz=128, num_updates=9716, lr=9.99303e-05, gnorm=2.03, loss_scale=1, train_wall=11, gb_free=2.8, wall=111863
2021-06-20 01:43:20 | INFO | train_inner | epoch 004: 771 / 3002 loss=2.752, ppl=6.74, wps=5821.8, ups=0.09, wpb=64837, bsz=128, num_updates=9717, lr=9.99303e-05, gnorm=2.037, loss_scale=1, train_wall=11, gb_free=2.8, wall=111874
2021-06-20 01:43:31 | INFO | train_inner | epoch 004: 772 / 3002 loss=2.727, ppl=6.62, wps=5752.5, ups=0.09, wpb=64814, bsz=128, num_updates=9718, lr=9.99303e-05, gnorm=2.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=111885
2021-06-20 01:43:42 | INFO | train_inner | epoch 004: 773 / 3002 loss=2.615, ppl=6.13, wps=5813.1, ups=0.09, wpb=64729, bsz=128, num_updates=9719, lr=9.99302e-05, gnorm=2.198, loss_scale=1, train_wall=11, gb_free=2.8, wall=111897
2021-06-20 01:43:53 | INFO | train_inner | epoch 004: 774 / 3002 loss=2.544, ppl=5.83, wps=5759.1, ups=0.09, wpb=64733, bsz=128, num_updates=9720, lr=9.99302e-05, gnorm=2.141, loss_scale=1, train_wall=11, gb_free=2.8, wall=111908
2021-06-20 01:44:04 | INFO | train_inner | epoch 004: 775 / 3002 loss=2.623, ppl=6.16, wps=5907.4, ups=0.09, wpb=64845, bsz=128, num_updates=9721, lr=9.99302e-05, gnorm=2.387, loss_scale=1, train_wall=11, gb_free=2.8, wall=111919
2021-06-20 01:44:15 | INFO | train_inner | epoch 004: 776 / 3002 loss=2.754, ppl=6.75, wps=5866.7, ups=0.09, wpb=64800, bsz=128, num_updates=9722, lr=9.99302e-05, gnorm=2.103, loss_scale=1, train_wall=11, gb_free=2.8, wall=111930
2021-06-20 01:44:27 | INFO | train_inner | epoch 004: 777 / 3002 loss=2.518, ppl=5.73, wps=5756, ups=0.09, wpb=64810, bsz=128, num_updates=9723, lr=9.99302e-05, gnorm=2.209, loss_scale=1, train_wall=11, gb_free=2.8, wall=111941
2021-06-20 01:44:38 | INFO | train_inner | epoch 004: 778 / 3002 loss=2.785, ppl=6.89, wps=5911.7, ups=0.09, wpb=64807, bsz=128, num_updates=9724, lr=9.99302e-05, gnorm=2.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=111952
2021-06-20 01:44:49 | INFO | train_inner | epoch 004: 779 / 3002 loss=2.466, ppl=5.52, wps=5923.9, ups=0.09, wpb=64870, bsz=128, num_updates=9725, lr=9.99302e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=111963
2021-06-20 01:45:00 | INFO | train_inner | epoch 004: 780 / 3002 loss=2.498, ppl=5.65, wps=5675.4, ups=0.09, wpb=64856, bsz=128, num_updates=9726, lr=9.99302e-05, gnorm=2.139, loss_scale=1, train_wall=11, gb_free=2.8, wall=111974
2021-06-20 01:45:11 | INFO | train_inner | epoch 004: 781 / 3002 loss=2.719, ppl=6.59, wps=5827.4, ups=0.09, wpb=64807, bsz=128, num_updates=9727, lr=9.99302e-05, gnorm=2.246, loss_scale=1, train_wall=11, gb_free=2.8, wall=111986
2021-06-20 01:45:22 | INFO | train_inner | epoch 004: 782 / 3002 loss=2.671, ppl=6.37, wps=5925.8, ups=0.09, wpb=64833, bsz=128, num_updates=9728, lr=9.99302e-05, gnorm=4.814, loss_scale=1, train_wall=10, gb_free=2.8, wall=111996
2021-06-20 01:45:33 | INFO | train_inner | epoch 004: 783 / 3002 loss=2.502, ppl=5.66, wps=5970.7, ups=0.09, wpb=64881, bsz=128, num_updates=9729, lr=9.99302e-05, gnorm=22.29, loss_scale=1, train_wall=10, gb_free=2.8, wall=112007
2021-06-20 01:45:44 | INFO | train_inner | epoch 004: 784 / 3002 loss=2.658, ppl=6.31, wps=5755, ups=0.09, wpb=64840, bsz=128, num_updates=9730, lr=9.99302e-05, gnorm=2.092, loss_scale=1, train_wall=11, gb_free=2.8, wall=112019
2021-06-20 01:45:56 | INFO | train_inner | epoch 004: 785 / 3002 loss=2.573, ppl=5.95, wps=5766.9, ups=0.09, wpb=64838, bsz=128, num_updates=9731, lr=9.99301e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=112030
2021-06-20 01:46:07 | INFO | train_inner | epoch 004: 786 / 3002 loss=2.509, ppl=5.69, wps=5821.4, ups=0.09, wpb=64812, bsz=128, num_updates=9732, lr=9.99301e-05, gnorm=2.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=112041
2021-06-20 01:46:18 | INFO | train_inner | epoch 004: 787 / 3002 loss=2.634, ppl=6.21, wps=5922, ups=0.09, wpb=64841, bsz=128, num_updates=9733, lr=9.99301e-05, gnorm=2.28, loss_scale=1, train_wall=10, gb_free=2.8, wall=112052
2021-06-20 01:46:29 | INFO | train_inner | epoch 004: 788 / 3002 loss=2.543, ppl=5.83, wps=5793.3, ups=0.09, wpb=64778, bsz=128, num_updates=9734, lr=9.99301e-05, gnorm=2.178, loss_scale=1, train_wall=11, gb_free=2.8, wall=112063
2021-06-20 01:46:40 | INFO | train_inner | epoch 004: 789 / 3002 loss=2.473, ppl=5.55, wps=5929.3, ups=0.09, wpb=64858, bsz=128, num_updates=9735, lr=9.99301e-05, gnorm=2.147, loss_scale=1, train_wall=10, gb_free=2.8, wall=112074
2021-06-20 01:46:51 | INFO | train_inner | epoch 004: 790 / 3002 loss=2.628, ppl=6.18, wps=5728.3, ups=0.09, wpb=64683, bsz=128, num_updates=9736, lr=9.99301e-05, gnorm=2.279, loss_scale=1, train_wall=11, gb_free=2.8, wall=112085
2021-06-20 01:47:02 | INFO | train_inner | epoch 004: 791 / 3002 loss=2.603, ppl=6.08, wps=5924.4, ups=0.09, wpb=64851, bsz=128, num_updates=9737, lr=9.99301e-05, gnorm=2.283, loss_scale=1, train_wall=10, gb_free=2.8, wall=112096
2021-06-20 01:47:13 | INFO | train_inner | epoch 004: 792 / 3002 loss=2.598, ppl=6.05, wps=5990.3, ups=0.09, wpb=64793, bsz=128, num_updates=9738, lr=9.99301e-05, gnorm=2.476, loss_scale=1, train_wall=10, gb_free=2.8, wall=112107
2021-06-20 01:47:24 | INFO | train_inner | epoch 004: 793 / 3002 loss=2.694, ppl=6.47, wps=5862.3, ups=0.09, wpb=64854, bsz=128, num_updates=9739, lr=9.99301e-05, gnorm=3.056, loss_scale=1, train_wall=11, gb_free=2.8, wall=112118
2021-06-20 01:47:35 | INFO | train_inner | epoch 004: 794 / 3002 loss=2.432, ppl=5.4, wps=5872.9, ups=0.09, wpb=64878, bsz=128, num_updates=9740, lr=9.99301e-05, gnorm=4.155, loss_scale=1, train_wall=11, gb_free=2.8, wall=112129
2021-06-20 01:47:46 | INFO | train_inner | epoch 004: 795 / 3002 loss=2.607, ppl=6.09, wps=5916.1, ups=0.09, wpb=64843, bsz=128, num_updates=9741, lr=9.99301e-05, gnorm=2.266, loss_scale=1, train_wall=11, gb_free=2.8, wall=112140
2021-06-20 01:47:57 | INFO | train_inner | epoch 004: 796 / 3002 loss=2.626, ppl=6.17, wps=5875.2, ups=0.09, wpb=64857, bsz=128, num_updates=9742, lr=9.99301e-05, gnorm=2.15, loss_scale=1, train_wall=11, gb_free=2.8, wall=112151
2021-06-20 01:48:08 | INFO | train_inner | epoch 004: 797 / 3002 loss=2.434, ppl=5.4, wps=5780.7, ups=0.09, wpb=64895, bsz=128, num_updates=9743, lr=9.99301e-05, gnorm=3.391, loss_scale=1, train_wall=11, gb_free=2.8, wall=112162
2021-06-20 01:48:19 | INFO | train_inner | epoch 004: 798 / 3002 loss=2.454, ppl=5.48, wps=5890.6, ups=0.09, wpb=64844, bsz=128, num_updates=9744, lr=9.993e-05, gnorm=2.184, loss_scale=1, train_wall=11, gb_free=2.8, wall=112173
2021-06-20 01:48:30 | INFO | train_inner | epoch 004: 799 / 3002 loss=2.565, ppl=5.92, wps=5962.3, ups=0.09, wpb=65001, bsz=128, num_updates=9745, lr=9.993e-05, gnorm=2.102, loss_scale=1, train_wall=10, gb_free=2.8, wall=112184
2021-06-20 01:48:41 | INFO | train_inner | epoch 004: 800 / 3002 loss=2.725, ppl=6.61, wps=5932.5, ups=0.09, wpb=64799, bsz=128, num_updates=9746, lr=9.993e-05, gnorm=2.094, loss_scale=1, train_wall=10, gb_free=2.8, wall=112195
2021-06-20 01:48:52 | INFO | train_inner | epoch 004: 801 / 3002 loss=2.741, ppl=6.68, wps=5918.6, ups=0.09, wpb=64725, bsz=128, num_updates=9747, lr=9.993e-05, gnorm=2.402, loss_scale=1, train_wall=10, gb_free=2.8, wall=112206
2021-06-20 01:49:03 | INFO | train_inner | epoch 004: 802 / 3002 loss=2.62, ppl=6.15, wps=5876.3, ups=0.09, wpb=64839, bsz=128, num_updates=9748, lr=9.993e-05, gnorm=2.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=112217
2021-06-20 01:49:14 | INFO | train_inner | epoch 004: 803 / 3002 loss=2.657, ppl=6.31, wps=5893.3, ups=0.09, wpb=64801, bsz=128, num_updates=9749, lr=9.993e-05, gnorm=1.991, loss_scale=1, train_wall=11, gb_free=2.8, wall=112228
2021-06-20 01:49:25 | INFO | train_inner | epoch 004: 804 / 3002 loss=2.46, ppl=5.5, wps=5858.3, ups=0.09, wpb=64792, bsz=128, num_updates=9750, lr=9.993e-05, gnorm=2.119, loss_scale=1, train_wall=11, gb_free=2.8, wall=112239
2021-06-20 01:49:36 | INFO | train_inner | epoch 004: 805 / 3002 loss=2.656, ppl=6.3, wps=5872.6, ups=0.09, wpb=64844, bsz=128, num_updates=9751, lr=9.993e-05, gnorm=2.15, loss_scale=1, train_wall=11, gb_free=2.8, wall=112250
2021-06-20 01:49:47 | INFO | train_inner | epoch 004: 806 / 3002 loss=2.569, ppl=5.94, wps=5849, ups=0.09, wpb=64817, bsz=128, num_updates=9752, lr=9.993e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=112261
2021-06-20 01:49:58 | INFO | train_inner | epoch 004: 807 / 3002 loss=2.64, ppl=6.23, wps=5821.6, ups=0.09, wpb=64812, bsz=128, num_updates=9753, lr=9.993e-05, gnorm=2.578, loss_scale=1, train_wall=11, gb_free=2.8, wall=112273
2021-06-20 01:50:09 | INFO | train_inner | epoch 004: 808 / 3002 loss=2.628, ppl=6.18, wps=5842.9, ups=0.09, wpb=64822, bsz=128, num_updates=9754, lr=9.993e-05, gnorm=17.174, loss_scale=1, train_wall=11, gb_free=2.8, wall=112284
2021-06-20 01:50:21 | INFO | train_inner | epoch 004: 809 / 3002 loss=2.499, ppl=5.65, wps=5809.5, ups=0.09, wpb=64825, bsz=128, num_updates=9755, lr=9.993e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=112295
2021-06-20 01:50:32 | INFO | train_inner | epoch 004: 810 / 3002 loss=2.573, ppl=5.95, wps=5833.2, ups=0.09, wpb=64812, bsz=128, num_updates=9756, lr=9.99299e-05, gnorm=2.876, loss_scale=1, train_wall=11, gb_free=2.8, wall=112306
2021-06-20 01:50:43 | INFO | train_inner | epoch 004: 811 / 3002 loss=2.506, ppl=5.68, wps=5833.8, ups=0.09, wpb=64788, bsz=128, num_updates=9757, lr=9.99299e-05, gnorm=2.443, loss_scale=1, train_wall=11, gb_free=2.8, wall=112317
2021-06-20 01:50:54 | INFO | train_inner | epoch 004: 812 / 3002 loss=2.603, ppl=6.08, wps=5841.7, ups=0.09, wpb=64832, bsz=128, num_updates=9758, lr=9.99299e-05, gnorm=2.061, loss_scale=1, train_wall=11, gb_free=2.8, wall=112328
2021-06-20 01:51:05 | INFO | train_inner | epoch 004: 813 / 3002 loss=2.612, ppl=6.12, wps=5893.4, ups=0.09, wpb=64802, bsz=128, num_updates=9759, lr=9.99299e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=112339
2021-06-20 01:51:16 | INFO | train_inner | epoch 004: 814 / 3002 loss=2.634, ppl=6.21, wps=5959.8, ups=0.09, wpb=64887, bsz=128, num_updates=9760, lr=9.99299e-05, gnorm=2.099, loss_scale=1, train_wall=10, gb_free=2.8, wall=112350
2021-06-20 01:51:27 | INFO | train_inner | epoch 004: 815 / 3002 loss=2.613, ppl=6.12, wps=5795.9, ups=0.09, wpb=64902, bsz=128, num_updates=9761, lr=9.99299e-05, gnorm=2.304, loss_scale=1, train_wall=11, gb_free=2.8, wall=112361
2021-06-20 01:51:38 | INFO | train_inner | epoch 004: 816 / 3002 loss=2.557, ppl=5.88, wps=5672.5, ups=0.09, wpb=64846, bsz=128, num_updates=9762, lr=9.99299e-05, gnorm=2.402, loss_scale=1, train_wall=11, gb_free=2.8, wall=112373
2021-06-20 01:51:50 | INFO | train_inner | epoch 004: 817 / 3002 loss=2.548, ppl=5.85, wps=5774.9, ups=0.09, wpb=64839, bsz=128, num_updates=9763, lr=9.99299e-05, gnorm=2.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=112384
2021-06-20 01:52:00 | INFO | train_inner | epoch 004: 818 / 3002 loss=2.663, ppl=6.33, wps=5962.5, ups=0.09, wpb=64823, bsz=128, num_updates=9764, lr=9.99299e-05, gnorm=2.132, loss_scale=1, train_wall=10, gb_free=2.8, wall=112395
2021-06-20 01:52:11 | INFO | train_inner | epoch 004: 819 / 3002 loss=2.733, ppl=6.65, wps=5919.3, ups=0.09, wpb=64903, bsz=128, num_updates=9765, lr=9.99299e-05, gnorm=2.162, loss_scale=1, train_wall=10, gb_free=2.8, wall=112406
2021-06-20 01:52:23 | INFO | train_inner | epoch 004: 820 / 3002 loss=2.609, ppl=6.1, wps=5772.8, ups=0.09, wpb=64884, bsz=128, num_updates=9766, lr=9.99299e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=112417
2021-06-20 01:52:34 | INFO | train_inner | epoch 004: 821 / 3002 loss=2.665, ppl=6.34, wps=5887.7, ups=0.09, wpb=64854, bsz=128, num_updates=9767, lr=9.99299e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=112428
2021-06-20 01:52:45 | INFO | train_inner | epoch 004: 822 / 3002 loss=2.538, ppl=5.81, wps=5971.4, ups=0.09, wpb=64800, bsz=128, num_updates=9768, lr=9.99299e-05, gnorm=2.017, loss_scale=1, train_wall=10, gb_free=2.8, wall=112439
2021-06-20 01:52:55 | INFO | train_inner | epoch 004: 823 / 3002 loss=2.468, ppl=5.53, wps=5908.3, ups=0.09, wpb=64801, bsz=128, num_updates=9769, lr=9.99298e-05, gnorm=2.246, loss_scale=1, train_wall=11, gb_free=2.8, wall=112450
2021-06-20 01:53:07 | INFO | train_inner | epoch 004: 824 / 3002 loss=2.585, ppl=6, wps=5856.8, ups=0.09, wpb=64791, bsz=128, num_updates=9770, lr=9.99298e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=112461
2021-06-20 01:53:18 | INFO | train_inner | epoch 004: 825 / 3002 loss=2.738, ppl=6.67, wps=5823.5, ups=0.09, wpb=64793, bsz=128, num_updates=9771, lr=9.99298e-05, gnorm=2.154, loss_scale=1, train_wall=11, gb_free=2.8, wall=112472
2021-06-20 01:53:29 | INFO | train_inner | epoch 004: 826 / 3002 loss=2.509, ppl=5.69, wps=5896.5, ups=0.09, wpb=64813, bsz=128, num_updates=9772, lr=9.99298e-05, gnorm=2.169, loss_scale=1, train_wall=11, gb_free=2.8, wall=112483
2021-06-20 01:53:40 | INFO | train_inner | epoch 004: 827 / 3002 loss=2.559, ppl=5.89, wps=5845.4, ups=0.09, wpb=64723, bsz=128, num_updates=9773, lr=9.99298e-05, gnorm=2.078, loss_scale=1, train_wall=11, gb_free=2.8, wall=112494
2021-06-20 01:53:51 | INFO | train_inner | epoch 004: 828 / 3002 loss=2.43, ppl=5.39, wps=5923.7, ups=0.09, wpb=64801, bsz=128, num_updates=9774, lr=9.99298e-05, gnorm=2.098, loss_scale=1, train_wall=10, gb_free=2.8, wall=112505
2021-06-20 01:54:02 | INFO | train_inner | epoch 004: 829 / 3002 loss=2.553, ppl=5.87, wps=5862.8, ups=0.09, wpb=64853, bsz=128, num_updates=9775, lr=9.99298e-05, gnorm=2.736, loss_scale=2, train_wall=11, gb_free=2.8, wall=112516
2021-06-20 01:54:13 | INFO | train_inner | epoch 004: 830 / 3002 loss=2.631, ppl=6.19, wps=5882.5, ups=0.09, wpb=64844, bsz=128, num_updates=9776, lr=9.99298e-05, gnorm=2.33, loss_scale=2, train_wall=11, gb_free=2.8, wall=112527
2021-06-20 01:54:24 | INFO | train_inner | epoch 004: 831 / 3002 loss=2.708, ppl=6.53, wps=5837.8, ups=0.09, wpb=64901, bsz=128, num_updates=9777, lr=9.99298e-05, gnorm=3.067, loss_scale=2, train_wall=11, gb_free=2.8, wall=112538
2021-06-20 01:54:35 | INFO | train_inner | epoch 004: 832 / 3002 loss=2.579, ppl=5.98, wps=5842.9, ups=0.09, wpb=64832, bsz=128, num_updates=9778, lr=9.99298e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=112549
2021-06-20 01:54:46 | INFO | train_inner | epoch 004: 833 / 3002 loss=2.491, ppl=5.62, wps=6041.9, ups=0.09, wpb=64908, bsz=128, num_updates=9779, lr=9.99298e-05, gnorm=2.081, loss_scale=2, train_wall=10, gb_free=2.8, wall=112560
2021-06-20 01:54:57 | INFO | train_inner | epoch 004: 834 / 3002 loss=2.465, ppl=5.52, wps=5954.7, ups=0.09, wpb=64860, bsz=128, num_updates=9780, lr=9.99298e-05, gnorm=1.97, loss_scale=2, train_wall=10, gb_free=2.8, wall=112571
2021-06-20 01:55:08 | INFO | train_inner | epoch 004: 835 / 3002 loss=2.56, ppl=5.9, wps=5798.7, ups=0.09, wpb=64853, bsz=128, num_updates=9781, lr=9.99297e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=112582
2021-06-20 01:55:19 | INFO | train_inner | epoch 004: 836 / 3002 loss=2.736, ppl=6.66, wps=5905.2, ups=0.09, wpb=64791, bsz=128, num_updates=9782, lr=9.99297e-05, gnorm=2.227, loss_scale=2, train_wall=11, gb_free=2.8, wall=112593
2021-06-20 01:55:29 | INFO | train_inner | epoch 004: 837 / 3002 loss=2.417, ppl=5.34, wps=6110.7, ups=0.09, wpb=64886, bsz=128, num_updates=9783, lr=9.99297e-05, gnorm=1.943, loss_scale=2, train_wall=10, gb_free=2.8, wall=112604
2021-06-20 01:55:40 | INFO | train_inner | epoch 004: 838 / 3002 loss=2.555, ppl=5.88, wps=5873.6, ups=0.09, wpb=64751, bsz=128, num_updates=9784, lr=9.99297e-05, gnorm=2.44, loss_scale=2, train_wall=11, gb_free=2.8, wall=112615
2021-06-20 01:55:52 | INFO | train_inner | epoch 004: 839 / 3002 loss=2.648, ppl=6.27, wps=5808.1, ups=0.09, wpb=64790, bsz=128, num_updates=9785, lr=9.99297e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=112626
2021-06-20 01:56:03 | INFO | train_inner | epoch 004: 840 / 3002 loss=2.426, ppl=5.37, wps=5865.7, ups=0.09, wpb=64826, bsz=128, num_updates=9786, lr=9.99297e-05, gnorm=2.076, loss_scale=2, train_wall=11, gb_free=2.8, wall=112637
2021-06-20 01:56:14 | INFO | train_inner | epoch 004: 841 / 3002 loss=2.71, ppl=6.54, wps=5901.9, ups=0.09, wpb=64819, bsz=128, num_updates=9787, lr=9.99297e-05, gnorm=2.111, loss_scale=2, train_wall=10, gb_free=2.8, wall=112648
2021-06-20 01:56:25 | INFO | train_inner | epoch 004: 842 / 3002 loss=2.794, ppl=6.93, wps=5818.6, ups=0.09, wpb=64885, bsz=128, num_updates=9788, lr=9.99297e-05, gnorm=2.207, loss_scale=2, train_wall=11, gb_free=2.8, wall=112659
2021-06-20 01:56:36 | INFO | train_inner | epoch 004: 843 / 3002 loss=2.459, ppl=5.5, wps=5762.2, ups=0.09, wpb=64838, bsz=128, num_updates=9789, lr=9.99297e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=112670
2021-06-20 01:56:47 | INFO | train_inner | epoch 004: 844 / 3002 loss=2.581, ppl=5.99, wps=5776.8, ups=0.09, wpb=64723, bsz=128, num_updates=9790, lr=9.99297e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=112682
2021-06-20 01:56:58 | INFO | train_inner | epoch 004: 845 / 3002 loss=2.47, ppl=5.54, wps=5892.4, ups=0.09, wpb=64843, bsz=128, num_updates=9791, lr=9.99297e-05, gnorm=2.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=112693
2021-06-20 01:57:09 | INFO | train_inner | epoch 004: 846 / 3002 loss=2.62, ppl=6.15, wps=5955.2, ups=0.09, wpb=64798, bsz=128, num_updates=9792, lr=9.99297e-05, gnorm=3.039, loss_scale=2, train_wall=10, gb_free=2.8, wall=112703
2021-06-20 01:57:20 | INFO | train_inner | epoch 004: 847 / 3002 loss=2.802, ppl=6.98, wps=5793.1, ups=0.09, wpb=64796, bsz=128, num_updates=9793, lr=9.99297e-05, gnorm=2.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=112715
2021-06-20 01:57:31 | INFO | train_inner | epoch 004: 848 / 3002 loss=2.69, ppl=6.45, wps=5824.9, ups=0.09, wpb=64889, bsz=128, num_updates=9794, lr=9.99296e-05, gnorm=2.238, loss_scale=2, train_wall=11, gb_free=2.8, wall=112726
2021-06-20 01:57:42 | INFO | train_inner | epoch 004: 849 / 3002 loss=2.601, ppl=6.07, wps=5866.5, ups=0.09, wpb=64833, bsz=128, num_updates=9795, lr=9.99296e-05, gnorm=2.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=112737
2021-06-20 01:57:54 | INFO | train_inner | epoch 004: 850 / 3002 loss=2.692, ppl=6.46, wps=5795.7, ups=0.09, wpb=64766, bsz=128, num_updates=9796, lr=9.99296e-05, gnorm=3.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=112748
2021-06-20 01:58:05 | INFO | train_inner | epoch 004: 851 / 3002 loss=2.686, ppl=6.43, wps=5849.4, ups=0.09, wpb=64652, bsz=128, num_updates=9797, lr=9.99296e-05, gnorm=2.132, loss_scale=2, train_wall=11, gb_free=2.8, wall=112759
2021-06-20 01:58:16 | INFO | train_inner | epoch 004: 852 / 3002 loss=2.46, ppl=5.5, wps=5824.3, ups=0.09, wpb=64865, bsz=128, num_updates=9798, lr=9.99296e-05, gnorm=2.208, loss_scale=2, train_wall=11, gb_free=2.8, wall=112770
2021-06-20 01:58:27 | INFO | train_inner | epoch 004: 853 / 3002 loss=2.559, ppl=5.89, wps=5885.2, ups=0.09, wpb=64869, bsz=128, num_updates=9799, lr=9.99296e-05, gnorm=2.096, loss_scale=2, train_wall=11, gb_free=2.8, wall=112781
2021-06-20 01:58:38 | INFO | train_inner | epoch 004: 854 / 3002 loss=2.853, ppl=7.22, wps=5851, ups=0.09, wpb=64789, bsz=128, num_updates=9800, lr=9.99296e-05, gnorm=2.137, loss_scale=2, train_wall=11, gb_free=2.8, wall=112792
2021-06-20 01:58:49 | INFO | train_inner | epoch 004: 855 / 3002 loss=2.585, ppl=6, wps=5813.2, ups=0.09, wpb=64671, bsz=128, num_updates=9801, lr=9.99296e-05, gnorm=4.555, loss_scale=2, train_wall=11, gb_free=2.8, wall=112803
2021-06-20 01:59:00 | INFO | train_inner | epoch 004: 856 / 3002 loss=2.561, ppl=5.9, wps=5870.7, ups=0.09, wpb=64800, bsz=128, num_updates=9802, lr=9.99296e-05, gnorm=2.124, loss_scale=2, train_wall=11, gb_free=2.8, wall=112814
2021-06-20 01:59:11 | INFO | train_inner | epoch 004: 857 / 3002 loss=2.438, ppl=5.42, wps=5830.9, ups=0.09, wpb=64820, bsz=128, num_updates=9803, lr=9.99296e-05, gnorm=2.142, loss_scale=2, train_wall=11, gb_free=2.8, wall=112826
2021-06-20 01:59:22 | INFO | train_inner | epoch 004: 858 / 3002 loss=2.617, ppl=6.13, wps=6031.9, ups=0.09, wpb=64743, bsz=128, num_updates=9804, lr=9.99296e-05, gnorm=2.069, loss_scale=2, train_wall=10, gb_free=2.8, wall=112836
2021-06-20 01:59:33 | INFO | train_inner | epoch 004: 859 / 3002 loss=2.595, ppl=6.04, wps=5882.2, ups=0.09, wpb=64847, bsz=128, num_updates=9805, lr=9.99296e-05, gnorm=2.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=112847
2021-06-20 01:59:44 | INFO | train_inner | epoch 004: 860 / 3002 loss=2.727, ppl=6.62, wps=5845.7, ups=0.09, wpb=64814, bsz=128, num_updates=9806, lr=9.99295e-05, gnorm=2.21, loss_scale=2, train_wall=11, gb_free=2.8, wall=112858
2021-06-20 01:59:55 | INFO | train_inner | epoch 004: 861 / 3002 loss=2.723, ppl=6.6, wps=5963.9, ups=0.09, wpb=64878, bsz=128, num_updates=9807, lr=9.99295e-05, gnorm=2.069, loss_scale=2, train_wall=10, gb_free=2.8, wall=112869
2021-06-20 02:00:06 | INFO | train_inner | epoch 004: 862 / 3002 loss=2.475, ppl=5.56, wps=5854.1, ups=0.09, wpb=64823, bsz=128, num_updates=9808, lr=9.99295e-05, gnorm=2.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=112880
2021-06-20 02:00:17 | INFO | train_inner | epoch 004: 863 / 3002 loss=2.483, ppl=5.59, wps=5938, ups=0.09, wpb=64928, bsz=128, num_updates=9809, lr=9.99295e-05, gnorm=2.09, loss_scale=2, train_wall=10, gb_free=2.8, wall=112891
2021-06-20 02:00:28 | INFO | train_inner | epoch 004: 864 / 3002 loss=2.649, ppl=6.27, wps=5894.6, ups=0.09, wpb=64861, bsz=128, num_updates=9810, lr=9.99295e-05, gnorm=2.372, loss_scale=2, train_wall=11, gb_free=2.8, wall=112902
2021-06-20 02:00:39 | INFO | train_inner | epoch 004: 865 / 3002 loss=2.581, ppl=5.99, wps=5851.1, ups=0.09, wpb=64868, bsz=128, num_updates=9811, lr=9.99295e-05, gnorm=2.633, loss_scale=2, train_wall=11, gb_free=2.8, wall=112913
2021-06-20 02:00:50 | INFO | train_inner | epoch 004: 866 / 3002 loss=2.668, ppl=6.36, wps=5837, ups=0.09, wpb=64876, bsz=128, num_updates=9812, lr=9.99295e-05, gnorm=2.339, loss_scale=2, train_wall=11, gb_free=2.8, wall=112924
2021-06-20 02:01:01 | INFO | train_inner | epoch 004: 867 / 3002 loss=2.757, ppl=6.76, wps=5908, ups=0.09, wpb=64849, bsz=128, num_updates=9813, lr=9.99295e-05, gnorm=2.073, loss_scale=2, train_wall=10, gb_free=2.8, wall=112935
2021-06-20 02:01:12 | INFO | train_inner | epoch 004: 868 / 3002 loss=2.596, ppl=6.04, wps=5983.6, ups=0.09, wpb=64781, bsz=128, num_updates=9814, lr=9.99295e-05, gnorm=2.018, loss_scale=2, train_wall=10, gb_free=2.8, wall=112946
2021-06-20 02:01:23 | INFO | train_inner | epoch 004: 869 / 3002 loss=2.505, ppl=5.68, wps=5765.3, ups=0.09, wpb=64719, bsz=128, num_updates=9815, lr=9.99295e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=112958
2021-06-20 02:01:34 | INFO | train_inner | epoch 004: 870 / 3002 loss=2.687, ppl=6.44, wps=5975.4, ups=0.09, wpb=64836, bsz=128, num_updates=9816, lr=9.99295e-05, gnorm=2.039, loss_scale=2, train_wall=10, gb_free=2.8, wall=112968
2021-06-20 02:01:45 | INFO | train_inner | epoch 004: 871 / 3002 loss=2.58, ppl=5.98, wps=5847.1, ups=0.09, wpb=64804, bsz=128, num_updates=9817, lr=9.99295e-05, gnorm=2.437, loss_scale=2, train_wall=11, gb_free=2.8, wall=112979
2021-06-20 02:01:56 | INFO | train_inner | epoch 004: 872 / 3002 loss=2.677, ppl=6.4, wps=5813.8, ups=0.09, wpb=64709, bsz=128, num_updates=9818, lr=9.99295e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=112991
2021-06-20 02:02:07 | INFO | train_inner | epoch 004: 873 / 3002 loss=2.565, ppl=5.92, wps=5874.8, ups=0.09, wpb=64839, bsz=128, num_updates=9819, lr=9.99294e-05, gnorm=5.522, loss_scale=2, train_wall=11, gb_free=2.8, wall=113002
2021-06-20 02:02:19 | INFO | train_inner | epoch 004: 874 / 3002 loss=2.557, ppl=5.88, wps=5773.3, ups=0.09, wpb=64833, bsz=128, num_updates=9820, lr=9.99294e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=113013
2021-06-20 02:02:30 | INFO | train_inner | epoch 004: 875 / 3002 loss=2.633, ppl=6.2, wps=5856.8, ups=0.09, wpb=64768, bsz=128, num_updates=9821, lr=9.99294e-05, gnorm=2.24, loss_scale=2, train_wall=11, gb_free=2.8, wall=113024
2021-06-20 02:02:41 | INFO | train_inner | epoch 004: 876 / 3002 loss=2.531, ppl=5.78, wps=5826.1, ups=0.09, wpb=64866, bsz=128, num_updates=9822, lr=9.99294e-05, gnorm=2.814, loss_scale=2, train_wall=11, gb_free=2.8, wall=113035
2021-06-20 02:02:52 | INFO | train_inner | epoch 004: 877 / 3002 loss=2.615, ppl=6.12, wps=5762.8, ups=0.09, wpb=64875, bsz=128, num_updates=9823, lr=9.99294e-05, gnorm=2.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=113046
2021-06-20 02:03:03 | INFO | train_inner | epoch 004: 878 / 3002 loss=2.516, ppl=5.72, wps=5803.8, ups=0.09, wpb=64822, bsz=128, num_updates=9824, lr=9.99294e-05, gnorm=21.087, loss_scale=2, train_wall=11, gb_free=2.8, wall=113057
2021-06-20 02:03:14 | INFO | train_inner | epoch 004: 879 / 3002 loss=2.65, ppl=6.28, wps=5817.4, ups=0.09, wpb=64836, bsz=128, num_updates=9825, lr=9.99294e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=113069
2021-06-20 02:03:25 | INFO | train_inner | epoch 004: 880 / 3002 loss=2.663, ppl=6.33, wps=5856.2, ups=0.09, wpb=64692, bsz=128, num_updates=9826, lr=9.99294e-05, gnorm=2.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=113080
2021-06-20 02:03:36 | INFO | train_inner | epoch 004: 881 / 3002 loss=2.561, ppl=5.9, wps=5840.3, ups=0.09, wpb=64787, bsz=128, num_updates=9827, lr=9.99294e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=113091
2021-06-20 02:03:48 | INFO | train_inner | epoch 004: 882 / 3002 loss=2.711, ppl=6.55, wps=5806.4, ups=0.09, wpb=64692, bsz=128, num_updates=9828, lr=9.99294e-05, gnorm=2.178, loss_scale=2, train_wall=11, gb_free=2.8, wall=113102
2021-06-20 02:03:59 | INFO | train_inner | epoch 004: 883 / 3002 loss=2.598, ppl=6.05, wps=5869.9, ups=0.09, wpb=64866, bsz=128, num_updates=9829, lr=9.99294e-05, gnorm=2.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=113113
2021-06-20 02:04:10 | INFO | train_inner | epoch 004: 884 / 3002 loss=2.61, ppl=6.11, wps=5846, ups=0.09, wpb=64826, bsz=128, num_updates=9830, lr=9.99294e-05, gnorm=2.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=113124
2021-06-20 02:04:21 | INFO | train_inner | epoch 004: 885 / 3002 loss=2.528, ppl=5.77, wps=5944.1, ups=0.09, wpb=64730, bsz=128, num_updates=9831, lr=9.99293e-05, gnorm=2.105, loss_scale=2, train_wall=10, gb_free=2.8, wall=113135
2021-06-20 02:04:32 | INFO | train_inner | epoch 004: 886 / 3002 loss=2.594, ppl=6.04, wps=5838, ups=0.09, wpb=64799, bsz=128, num_updates=9832, lr=9.99293e-05, gnorm=3.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=113146
2021-06-20 02:04:43 | INFO | train_inner | epoch 004: 887 / 3002 loss=2.714, ppl=6.56, wps=5893, ups=0.09, wpb=64857, bsz=128, num_updates=9833, lr=9.99293e-05, gnorm=2.184, loss_scale=2, train_wall=11, gb_free=2.8, wall=113157
2021-06-20 02:04:54 | INFO | train_inner | epoch 004: 888 / 3002 loss=2.694, ppl=6.47, wps=5881.5, ups=0.09, wpb=64810, bsz=128, num_updates=9834, lr=9.99293e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=113168
2021-06-20 02:05:05 | INFO | train_inner | epoch 004: 889 / 3002 loss=2.688, ppl=6.44, wps=5981.6, ups=0.09, wpb=64864, bsz=128, num_updates=9835, lr=9.99293e-05, gnorm=2.222, loss_scale=2, train_wall=10, gb_free=2.8, wall=113179
2021-06-20 02:05:16 | INFO | train_inner | epoch 004: 890 / 3002 loss=2.694, ppl=6.47, wps=5778.4, ups=0.09, wpb=64718, bsz=128, num_updates=9836, lr=9.99293e-05, gnorm=4.845, loss_scale=2, train_wall=11, gb_free=2.8, wall=113190
2021-06-20 02:05:27 | INFO | train_inner | epoch 004: 891 / 3002 loss=2.68, ppl=6.41, wps=5813.5, ups=0.09, wpb=64819, bsz=128, num_updates=9837, lr=9.99293e-05, gnorm=2.463, loss_scale=2, train_wall=11, gb_free=2.8, wall=113201
2021-06-20 02:05:38 | INFO | train_inner | epoch 004: 892 / 3002 loss=2.608, ppl=6.09, wps=5910.7, ups=0.09, wpb=64766, bsz=128, num_updates=9838, lr=9.99293e-05, gnorm=3.282, loss_scale=2, train_wall=11, gb_free=2.8, wall=113212
2021-06-20 02:05:49 | INFO | train_inner | epoch 004: 893 / 3002 loss=2.59, ppl=6.02, wps=5785.8, ups=0.09, wpb=64773, bsz=128, num_updates=9839, lr=9.99293e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=113223
2021-06-20 02:06:00 | INFO | train_inner | epoch 004: 894 / 3002 loss=2.669, ppl=6.36, wps=5839.4, ups=0.09, wpb=64887, bsz=128, num_updates=9840, lr=9.99293e-05, gnorm=3.477, loss_scale=2, train_wall=11, gb_free=2.8, wall=113235
2021-06-20 02:06:11 | INFO | train_inner | epoch 004: 895 / 3002 loss=2.9, ppl=7.46, wps=5899.2, ups=0.09, wpb=64708, bsz=128, num_updates=9841, lr=9.99293e-05, gnorm=3.274, loss_scale=2, train_wall=10, gb_free=2.8, wall=113245
2021-06-20 02:06:22 | INFO | train_inner | epoch 004: 896 / 3002 loss=2.605, ppl=6.08, wps=5912.2, ups=0.09, wpb=64819, bsz=128, num_updates=9842, lr=9.99293e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=113256
2021-06-20 02:06:33 | INFO | train_inner | epoch 004: 897 / 3002 loss=2.754, ppl=6.75, wps=5785.6, ups=0.09, wpb=64770, bsz=128, num_updates=9843, lr=9.99293e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=113268
2021-06-20 02:06:44 | INFO | train_inner | epoch 004: 898 / 3002 loss=2.591, ppl=6.03, wps=5854.4, ups=0.09, wpb=64758, bsz=128, num_updates=9844, lr=9.99292e-05, gnorm=3.872, loss_scale=2, train_wall=11, gb_free=2.8, wall=113279
2021-06-20 02:06:55 | INFO | train_inner | epoch 004: 899 / 3002 loss=2.603, ppl=6.08, wps=5903.9, ups=0.09, wpb=64838, bsz=128, num_updates=9845, lr=9.99292e-05, gnorm=2.232, loss_scale=2, train_wall=11, gb_free=2.8, wall=113290
2021-06-20 02:07:06 | INFO | train_inner | epoch 004: 900 / 3002 loss=2.533, ppl=5.79, wps=5890.7, ups=0.09, wpb=64848, bsz=128, num_updates=9846, lr=9.99292e-05, gnorm=2.856, loss_scale=2, train_wall=11, gb_free=2.8, wall=113301
2021-06-20 02:07:17 | INFO | train_inner | epoch 004: 901 / 3002 loss=2.6, ppl=6.06, wps=5884.4, ups=0.09, wpb=64856, bsz=128, num_updates=9847, lr=9.99292e-05, gnorm=2.156, loss_scale=2, train_wall=11, gb_free=2.8, wall=113312
2021-06-20 02:07:28 | INFO | train_inner | epoch 004: 902 / 3002 loss=2.658, ppl=6.31, wps=5835.7, ups=0.09, wpb=64827, bsz=128, num_updates=9848, lr=9.99292e-05, gnorm=2.215, loss_scale=2, train_wall=11, gb_free=2.8, wall=113323
2021-06-20 02:07:40 | INFO | train_inner | epoch 004: 903 / 3002 loss=2.602, ppl=6.07, wps=5846.3, ups=0.09, wpb=64782, bsz=128, num_updates=9849, lr=9.99292e-05, gnorm=2.194, loss_scale=2, train_wall=11, gb_free=2.8, wall=113334
2021-06-20 02:07:51 | INFO | train_inner | epoch 004: 904 / 3002 loss=2.653, ppl=6.29, wps=5853.4, ups=0.09, wpb=64842, bsz=128, num_updates=9850, lr=9.99292e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=113345
2021-06-20 02:08:01 | INFO | train_inner | epoch 004: 905 / 3002 loss=2.668, ppl=6.36, wps=6004.8, ups=0.09, wpb=64767, bsz=128, num_updates=9851, lr=9.99292e-05, gnorm=2.122, loss_scale=2, train_wall=10, gb_free=2.8, wall=113356
2021-06-20 02:08:13 | INFO | train_inner | epoch 004: 906 / 3002 loss=2.772, ppl=6.83, wps=5843.7, ups=0.09, wpb=64825, bsz=128, num_updates=9852, lr=9.99292e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=113367
2021-06-20 02:08:23 | INFO | train_inner | epoch 004: 907 / 3002 loss=2.723, ppl=6.6, wps=5911.1, ups=0.09, wpb=64834, bsz=128, num_updates=9853, lr=9.99292e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=113378
2021-06-20 02:08:35 | INFO | train_inner | epoch 004: 908 / 3002 loss=2.612, ppl=6.11, wps=5819, ups=0.09, wpb=64805, bsz=128, num_updates=9854, lr=9.99292e-05, gnorm=2.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=113389
2021-06-20 02:08:46 | INFO | train_inner | epoch 004: 909 / 3002 loss=2.822, ppl=7.07, wps=5754.2, ups=0.09, wpb=64800, bsz=128, num_updates=9855, lr=9.99292e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=113400
2021-06-20 02:08:57 | INFO | train_inner | epoch 004: 910 / 3002 loss=2.709, ppl=6.54, wps=5866.4, ups=0.09, wpb=64802, bsz=128, num_updates=9856, lr=9.99291e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=113411
2021-06-20 02:09:08 | INFO | train_inner | epoch 004: 911 / 3002 loss=2.621, ppl=6.15, wps=5839.6, ups=0.09, wpb=64802, bsz=128, num_updates=9857, lr=9.99291e-05, gnorm=2.07, loss_scale=2, train_wall=11, gb_free=2.8, wall=113422
2021-06-20 02:09:19 | INFO | train_inner | epoch 004: 912 / 3002 loss=2.586, ppl=6, wps=5766.9, ups=0.09, wpb=64771, bsz=128, num_updates=9858, lr=9.99291e-05, gnorm=3.779, loss_scale=2, train_wall=11, gb_free=2.8, wall=113434
2021-06-20 02:09:30 | INFO | train_inner | epoch 004: 913 / 3002 loss=2.525, ppl=5.75, wps=5873, ups=0.09, wpb=64956, bsz=128, num_updates=9859, lr=9.99291e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=113445
2021-06-20 02:09:41 | INFO | train_inner | epoch 004: 914 / 3002 loss=2.708, ppl=6.53, wps=5864.1, ups=0.09, wpb=64792, bsz=128, num_updates=9860, lr=9.99291e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=113456
2021-06-20 02:09:52 | INFO | train_inner | epoch 004: 915 / 3002 loss=2.513, ppl=5.71, wps=5938.6, ups=0.09, wpb=64862, bsz=128, num_updates=9861, lr=9.99291e-05, gnorm=2.07, loss_scale=2, train_wall=10, gb_free=2.8, wall=113467
2021-06-20 02:10:03 | INFO | train_inner | epoch 004: 916 / 3002 loss=2.617, ppl=6.13, wps=5922.2, ups=0.09, wpb=64893, bsz=128, num_updates=9862, lr=9.99291e-05, gnorm=1.998, loss_scale=2, train_wall=10, gb_free=2.8, wall=113478
2021-06-20 02:10:14 | INFO | train_inner | epoch 004: 917 / 3002 loss=2.656, ppl=6.3, wps=5802.7, ups=0.09, wpb=64755, bsz=128, num_updates=9863, lr=9.99291e-05, gnorm=2.149, loss_scale=2, train_wall=11, gb_free=2.8, wall=113489
2021-06-20 02:10:26 | INFO | train_inner | epoch 004: 918 / 3002 loss=2.672, ppl=6.37, wps=5821.5, ups=0.09, wpb=64779, bsz=128, num_updates=9864, lr=9.99291e-05, gnorm=2.421, loss_scale=2, train_wall=11, gb_free=2.8, wall=113500
2021-06-20 02:10:36 | INFO | train_inner | epoch 004: 919 / 3002 loss=2.511, ppl=5.7, wps=5957.7, ups=0.09, wpb=64842, bsz=128, num_updates=9865, lr=9.99291e-05, gnorm=2.154, loss_scale=2, train_wall=10, gb_free=2.8, wall=113511
2021-06-20 02:10:47 | INFO | train_inner | epoch 004: 920 / 3002 loss=2.47, ppl=5.54, wps=5911.1, ups=0.09, wpb=64894, bsz=128, num_updates=9866, lr=9.99291e-05, gnorm=2.1, loss_scale=2, train_wall=11, gb_free=2.8, wall=113522
2021-06-20 02:10:58 | INFO | train_inner | epoch 004: 921 / 3002 loss=2.497, ppl=5.65, wps=5857.8, ups=0.09, wpb=64815, bsz=128, num_updates=9867, lr=9.99291e-05, gnorm=2.732, loss_scale=2, train_wall=11, gb_free=2.8, wall=113533
2021-06-20 02:11:10 | INFO | train_inner | epoch 004: 922 / 3002 loss=2.502, ppl=5.66, wps=5864.1, ups=0.09, wpb=64883, bsz=128, num_updates=9868, lr=9.99291e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=113544
2021-06-20 02:11:21 | INFO | train_inner | epoch 004: 923 / 3002 loss=2.612, ppl=6.11, wps=5754.8, ups=0.09, wpb=64907, bsz=128, num_updates=9869, lr=9.9929e-05, gnorm=2.339, loss_scale=2, train_wall=11, gb_free=2.8, wall=113555
2021-06-20 02:11:32 | INFO | train_inner | epoch 004: 924 / 3002 loss=2.778, ppl=6.86, wps=5867.1, ups=0.09, wpb=64866, bsz=128, num_updates=9870, lr=9.9929e-05, gnorm=2.554, loss_scale=2, train_wall=11, gb_free=2.8, wall=113566
2021-06-20 02:11:43 | INFO | train_inner | epoch 004: 925 / 3002 loss=2.738, ppl=6.67, wps=5823.3, ups=0.09, wpb=64833, bsz=128, num_updates=9871, lr=9.9929e-05, gnorm=2.414, loss_scale=2, train_wall=11, gb_free=2.8, wall=113577
2021-06-20 02:11:54 | INFO | train_inner | epoch 004: 926 / 3002 loss=2.506, ppl=5.68, wps=5800, ups=0.09, wpb=64861, bsz=128, num_updates=9872, lr=9.9929e-05, gnorm=2.064, loss_scale=2, train_wall=11, gb_free=2.8, wall=113589
2021-06-20 02:12:05 | INFO | train_inner | epoch 004: 927 / 3002 loss=2.535, ppl=5.8, wps=5807.6, ups=0.09, wpb=64893, bsz=128, num_updates=9873, lr=9.9929e-05, gnorm=4.601, loss_scale=2, train_wall=11, gb_free=2.8, wall=113600
2021-06-20 02:12:16 | INFO | train_inner | epoch 004: 928 / 3002 loss=2.659, ppl=6.32, wps=5893.1, ups=0.09, wpb=64785, bsz=128, num_updates=9874, lr=9.9929e-05, gnorm=9.52, loss_scale=2, train_wall=11, gb_free=2.8, wall=113611
2021-06-20 02:12:27 | INFO | train_inner | epoch 004: 929 / 3002 loss=2.625, ppl=6.17, wps=5992.6, ups=0.09, wpb=64934, bsz=128, num_updates=9875, lr=9.9929e-05, gnorm=2.41, loss_scale=2, train_wall=10, gb_free=2.8, wall=113622
2021-06-20 02:12:38 | INFO | train_inner | epoch 004: 930 / 3002 loss=2.683, ppl=6.42, wps=5953.2, ups=0.09, wpb=64780, bsz=128, num_updates=9876, lr=9.9929e-05, gnorm=2.549, loss_scale=2, train_wall=10, gb_free=2.8, wall=113632
2021-06-20 02:12:49 | INFO | train_inner | epoch 004: 931 / 3002 loss=2.8, ppl=6.97, wps=5864.2, ups=0.09, wpb=64765, bsz=128, num_updates=9877, lr=9.9929e-05, gnorm=4.022, loss_scale=2, train_wall=11, gb_free=2.8, wall=113643
2021-06-20 02:13:00 | INFO | train_inner | epoch 004: 932 / 3002 loss=2.609, ppl=6.1, wps=5763, ups=0.09, wpb=64820, bsz=128, num_updates=9878, lr=9.9929e-05, gnorm=2.267, loss_scale=2, train_wall=11, gb_free=2.8, wall=113655
2021-06-20 02:13:12 | INFO | train_inner | epoch 004: 933 / 3002 loss=2.558, ppl=5.89, wps=5791.2, ups=0.09, wpb=64842, bsz=128, num_updates=9879, lr=9.9929e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=113666
2021-06-20 02:13:23 | INFO | train_inner | epoch 004: 934 / 3002 loss=2.437, ppl=5.42, wps=5869, ups=0.09, wpb=64841, bsz=128, num_updates=9880, lr=9.9929e-05, gnorm=2.708, loss_scale=2, train_wall=11, gb_free=2.8, wall=113677
2021-06-20 02:13:34 | INFO | train_inner | epoch 004: 935 / 3002 loss=2.557, ppl=5.88, wps=5781.2, ups=0.09, wpb=64787, bsz=128, num_updates=9881, lr=9.99289e-05, gnorm=2.219, loss_scale=2, train_wall=11, gb_free=2.8, wall=113688
2021-06-20 02:13:45 | INFO | train_inner | epoch 004: 936 / 3002 loss=2.575, ppl=5.96, wps=5836.1, ups=0.09, wpb=64766, bsz=128, num_updates=9882, lr=9.99289e-05, gnorm=2.46, loss_scale=2, train_wall=11, gb_free=2.8, wall=113699
2021-06-20 02:13:56 | INFO | train_inner | epoch 004: 937 / 3002 loss=2.654, ppl=6.3, wps=5920.8, ups=0.09, wpb=64794, bsz=128, num_updates=9883, lr=9.99289e-05, gnorm=2.262, loss_scale=2, train_wall=10, gb_free=2.8, wall=113710
2021-06-20 02:14:07 | INFO | train_inner | epoch 004: 938 / 3002 loss=2.546, ppl=5.84, wps=5823.7, ups=0.09, wpb=64866, bsz=128, num_updates=9884, lr=9.99289e-05, gnorm=2.364, loss_scale=2, train_wall=11, gb_free=2.8, wall=113721
2021-06-20 02:14:18 | INFO | train_inner | epoch 004: 939 / 3002 loss=2.692, ppl=6.46, wps=5889.2, ups=0.09, wpb=64828, bsz=128, num_updates=9885, lr=9.99289e-05, gnorm=2.28, loss_scale=2, train_wall=11, gb_free=2.8, wall=113732
2021-06-20 02:14:29 | INFO | train_inner | epoch 004: 940 / 3002 loss=2.671, ppl=6.37, wps=5857.2, ups=0.09, wpb=64837, bsz=128, num_updates=9886, lr=9.99289e-05, gnorm=2.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=113743
2021-06-20 02:14:40 | INFO | train_inner | epoch 004: 941 / 3002 loss=2.661, ppl=6.33, wps=5854.7, ups=0.09, wpb=64672, bsz=128, num_updates=9887, lr=9.99289e-05, gnorm=6.406, loss_scale=2, train_wall=11, gb_free=2.8, wall=113754
2021-06-20 02:14:51 | INFO | train_inner | epoch 004: 942 / 3002 loss=2.731, ppl=6.64, wps=5826.7, ups=0.09, wpb=64809, bsz=128, num_updates=9888, lr=9.99289e-05, gnorm=2.311, loss_scale=2, train_wall=11, gb_free=2.8, wall=113766
2021-06-20 02:15:02 | INFO | train_inner | epoch 004: 943 / 3002 loss=2.625, ppl=6.17, wps=5833.1, ups=0.09, wpb=64809, bsz=128, num_updates=9889, lr=9.99289e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=113777
2021-06-20 02:15:13 | INFO | train_inner | epoch 004: 944 / 3002 loss=2.605, ppl=6.08, wps=5813.9, ups=0.09, wpb=64795, bsz=128, num_updates=9890, lr=9.99289e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=113788
2021-06-20 02:15:25 | INFO | train_inner | epoch 004: 945 / 3002 loss=2.675, ppl=6.39, wps=5835.9, ups=0.09, wpb=64857, bsz=128, num_updates=9891, lr=9.99289e-05, gnorm=2.19, loss_scale=2, train_wall=11, gb_free=2.8, wall=113799
2021-06-20 02:15:36 | INFO | train_inner | epoch 004: 946 / 3002 loss=2.523, ppl=5.75, wps=5892.1, ups=0.09, wpb=64909, bsz=128, num_updates=9892, lr=9.99289e-05, gnorm=4.753, loss_scale=2, train_wall=11, gb_free=2.8, wall=113810
2021-06-20 02:15:47 | INFO | train_inner | epoch 004: 947 / 3002 loss=2.524, ppl=5.75, wps=5896.4, ups=0.09, wpb=64878, bsz=128, num_updates=9893, lr=9.99289e-05, gnorm=2.561, loss_scale=2, train_wall=11, gb_free=2.8, wall=113821
2021-06-20 02:15:58 | INFO | train_inner | epoch 004: 948 / 3002 loss=2.48, ppl=5.58, wps=5769.5, ups=0.09, wpb=64858, bsz=128, num_updates=9894, lr=9.99288e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=113832
2021-06-20 02:16:09 | INFO | train_inner | epoch 004: 949 / 3002 loss=2.5, ppl=5.66, wps=5864.3, ups=0.09, wpb=64887, bsz=128, num_updates=9895, lr=9.99288e-05, gnorm=2.304, loss_scale=2, train_wall=11, gb_free=2.8, wall=113843
2021-06-20 02:16:20 | INFO | train_inner | epoch 004: 950 / 3002 loss=2.715, ppl=6.57, wps=5852.9, ups=0.09, wpb=64794, bsz=128, num_updates=9896, lr=9.99288e-05, gnorm=2.952, loss_scale=2, train_wall=11, gb_free=2.8, wall=113854
2021-06-20 02:16:31 | INFO | train_inner | epoch 004: 951 / 3002 loss=2.531, ppl=5.78, wps=5823.8, ups=0.09, wpb=64938, bsz=128, num_updates=9897, lr=9.99288e-05, gnorm=2.315, loss_scale=2, train_wall=11, gb_free=2.8, wall=113865
2021-06-20 02:16:42 | INFO | train_inner | epoch 004: 952 / 3002 loss=2.592, ppl=6.03, wps=5992.1, ups=0.09, wpb=64900, bsz=128, num_updates=9898, lr=9.99288e-05, gnorm=2.061, loss_scale=2, train_wall=10, gb_free=2.8, wall=113876
2021-06-20 02:16:53 | INFO | train_inner | epoch 004: 953 / 3002 loss=2.566, ppl=5.92, wps=5729, ups=0.09, wpb=64840, bsz=128, num_updates=9899, lr=9.99288e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=113888
2021-06-20 02:17:04 | INFO | train_inner | epoch 004: 954 / 3002 loss=2.629, ppl=6.19, wps=5805.4, ups=0.09, wpb=64755, bsz=128, num_updates=9900, lr=9.99288e-05, gnorm=2.864, loss_scale=2, train_wall=11, gb_free=2.8, wall=113899
2021-06-20 02:17:16 | INFO | train_inner | epoch 004: 955 / 3002 loss=2.635, ppl=6.21, wps=5848.1, ups=0.09, wpb=64887, bsz=128, num_updates=9901, lr=9.99288e-05, gnorm=2.338, loss_scale=2, train_wall=11, gb_free=2.8, wall=113910
2021-06-20 02:17:27 | INFO | train_inner | epoch 004: 956 / 3002 loss=2.42, ppl=5.35, wps=5813.3, ups=0.09, wpb=64911, bsz=128, num_updates=9902, lr=9.99288e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=113921
2021-06-20 02:17:38 | INFO | train_inner | epoch 004: 957 / 3002 loss=2.639, ppl=6.23, wps=5832.8, ups=0.09, wpb=64733, bsz=128, num_updates=9903, lr=9.99288e-05, gnorm=2.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=113932
2021-06-20 02:17:49 | INFO | train_inner | epoch 004: 958 / 3002 loss=2.729, ppl=6.63, wps=5773.7, ups=0.09, wpb=64827, bsz=128, num_updates=9904, lr=9.99288e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=113943
2021-06-20 02:18:00 | INFO | train_inner | epoch 004: 959 / 3002 loss=2.48, ppl=5.58, wps=5865.2, ups=0.09, wpb=64832, bsz=128, num_updates=9905, lr=9.99288e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=113954
2021-06-20 02:18:11 | INFO | train_inner | epoch 004: 960 / 3002 loss=2.631, ppl=6.2, wps=5947.9, ups=0.09, wpb=64826, bsz=128, num_updates=9906, lr=9.99287e-05, gnorm=2.021, loss_scale=4, train_wall=10, gb_free=2.8, wall=113965
2021-06-20 02:18:22 | INFO | train_inner | epoch 004: 961 / 3002 loss=2.494, ppl=5.63, wps=5759, ups=0.09, wpb=64844, bsz=128, num_updates=9907, lr=9.99287e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=113977
2021-06-20 02:18:33 | INFO | train_inner | epoch 004: 962 / 3002 loss=2.509, ppl=5.69, wps=5817.3, ups=0.09, wpb=64858, bsz=128, num_updates=9908, lr=9.99287e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=113988
2021-06-20 02:18:44 | INFO | train_inner | epoch 004: 963 / 3002 loss=2.521, ppl=5.74, wps=5856.8, ups=0.09, wpb=64802, bsz=128, num_updates=9909, lr=9.99287e-05, gnorm=2.042, loss_scale=4, train_wall=11, gb_free=2.8, wall=113999
2021-06-20 02:18:56 | INFO | train_inner | epoch 004: 964 / 3002 loss=2.511, ppl=5.7, wps=5798.4, ups=0.09, wpb=64792, bsz=128, num_updates=9910, lr=9.99287e-05, gnorm=2.619, loss_scale=4, train_wall=11, gb_free=2.8, wall=114010
2021-06-20 02:19:07 | INFO | train_inner | epoch 004: 965 / 3002 loss=2.543, ppl=5.83, wps=5810.9, ups=0.09, wpb=64852, bsz=128, num_updates=9911, lr=9.99287e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=114021
2021-06-20 02:19:18 | INFO | train_inner | epoch 004: 966 / 3002 loss=2.63, ppl=6.19, wps=5885.9, ups=0.09, wpb=64821, bsz=128, num_updates=9912, lr=9.99287e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=114032
2021-06-20 02:19:29 | INFO | train_inner | epoch 004: 967 / 3002 loss=2.439, ppl=5.42, wps=5660.3, ups=0.09, wpb=64782, bsz=128, num_updates=9913, lr=9.99287e-05, gnorm=2.17, loss_scale=4, train_wall=11, gb_free=2.8, wall=114044
2021-06-20 02:19:40 | INFO | train_inner | epoch 004: 968 / 3002 loss=2.572, ppl=5.95, wps=5810.9, ups=0.09, wpb=64816, bsz=128, num_updates=9914, lr=9.99287e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=114055
2021-06-20 02:19:51 | INFO | train_inner | epoch 004: 969 / 3002 loss=2.679, ppl=6.4, wps=6000.1, ups=0.09, wpb=64836, bsz=128, num_updates=9915, lr=9.99287e-05, gnorm=2.074, loss_scale=4, train_wall=10, gb_free=2.8, wall=114066
2021-06-20 02:20:02 | INFO | train_inner | epoch 004: 970 / 3002 loss=2.599, ppl=6.06, wps=5769.6, ups=0.09, wpb=64846, bsz=128, num_updates=9916, lr=9.99287e-05, gnorm=2.092, loss_scale=4, train_wall=11, gb_free=2.8, wall=114077
2021-06-20 02:20:14 | INFO | train_inner | epoch 004: 971 / 3002 loss=2.608, ppl=6.09, wps=5764.3, ups=0.09, wpb=64786, bsz=128, num_updates=9917, lr=9.99287e-05, gnorm=4.07, loss_scale=4, train_wall=11, gb_free=2.8, wall=114088
2021-06-20 02:20:25 | INFO | train_inner | epoch 004: 972 / 3002 loss=2.595, ppl=6.04, wps=5842.6, ups=0.09, wpb=64796, bsz=128, num_updates=9918, lr=9.99287e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=114099
2021-06-20 02:20:36 | INFO | train_inner | epoch 004: 973 / 3002 loss=2.648, ppl=6.27, wps=5894.8, ups=0.09, wpb=64786, bsz=128, num_updates=9919, lr=9.99286e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=114110
2021-06-20 02:20:47 | INFO | train_inner | epoch 004: 974 / 3002 loss=2.447, ppl=5.45, wps=5819.7, ups=0.09, wpb=64819, bsz=128, num_updates=9920, lr=9.99286e-05, gnorm=2.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=114121
2021-06-20 02:20:58 | INFO | train_inner | epoch 004: 975 / 3002 loss=2.6, ppl=6.06, wps=5910.9, ups=0.09, wpb=64877, bsz=128, num_updates=9921, lr=9.99286e-05, gnorm=2.131, loss_scale=4, train_wall=11, gb_free=2.8, wall=114132
2021-06-20 02:21:09 | INFO | train_inner | epoch 004: 976 / 3002 loss=2.647, ppl=6.26, wps=5855.2, ups=0.09, wpb=64738, bsz=128, num_updates=9922, lr=9.99286e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=114143
2021-06-20 02:21:20 | INFO | train_inner | epoch 004: 977 / 3002 loss=2.7, ppl=6.5, wps=5863.9, ups=0.09, wpb=64872, bsz=128, num_updates=9923, lr=9.99286e-05, gnorm=2.271, loss_scale=4, train_wall=11, gb_free=2.8, wall=114154
2021-06-20 02:21:31 | INFO | train_inner | epoch 004: 978 / 3002 loss=2.604, ppl=6.08, wps=5785.2, ups=0.09, wpb=64883, bsz=128, num_updates=9924, lr=9.99286e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=114166
2021-06-20 02:21:43 | INFO | train_inner | epoch 004: 979 / 3002 loss=2.462, ppl=5.51, wps=5671.5, ups=0.09, wpb=64786, bsz=128, num_updates=9925, lr=9.99286e-05, gnorm=4.289, loss_scale=4, train_wall=11, gb_free=2.8, wall=114177
2021-06-20 02:21:53 | INFO | train_inner | epoch 004: 980 / 3002 loss=2.48, ppl=5.58, wps=5990, ups=0.09, wpb=64860, bsz=128, num_updates=9926, lr=9.99286e-05, gnorm=2.163, loss_scale=4, train_wall=10, gb_free=2.8, wall=114188
2021-06-20 02:22:04 | INFO | train_inner | epoch 004: 981 / 3002 loss=2.49, ppl=5.62, wps=5975, ups=0.09, wpb=64861, bsz=128, num_updates=9927, lr=9.99286e-05, gnorm=2.138, loss_scale=4, train_wall=10, gb_free=2.8, wall=114199
2021-06-20 02:22:16 | INFO | train_inner | epoch 004: 982 / 3002 loss=2.416, ppl=5.34, wps=5767.6, ups=0.09, wpb=64837, bsz=128, num_updates=9928, lr=9.99286e-05, gnorm=2.084, loss_scale=4, train_wall=11, gb_free=2.8, wall=114210
2021-06-20 02:22:27 | INFO | train_inner | epoch 004: 983 / 3002 loss=2.467, ppl=5.53, wps=5694.9, ups=0.09, wpb=64813, bsz=128, num_updates=9929, lr=9.99286e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=114221
2021-06-20 02:22:38 | INFO | train_inner | epoch 004: 984 / 3002 loss=2.592, ppl=6.03, wps=5942.7, ups=0.09, wpb=64823, bsz=128, num_updates=9930, lr=9.99286e-05, gnorm=2.133, loss_scale=4, train_wall=10, gb_free=2.8, wall=114232
2021-06-20 02:22:49 | INFO | train_inner | epoch 004: 985 / 3002 loss=2.635, ppl=6.21, wps=5885.4, ups=0.09, wpb=64875, bsz=128, num_updates=9931, lr=9.99285e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=114243
2021-06-20 02:23:00 | INFO | train_inner | epoch 004: 986 / 3002 loss=2.659, ppl=6.32, wps=5925.9, ups=0.09, wpb=64789, bsz=128, num_updates=9932, lr=9.99285e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=114254
2021-06-20 02:23:11 | INFO | train_inner | epoch 004: 987 / 3002 loss=2.569, ppl=5.93, wps=5861.4, ups=0.09, wpb=64843, bsz=128, num_updates=9933, lr=9.99285e-05, gnorm=2.134, loss_scale=4, train_wall=11, gb_free=2.8, wall=114265
2021-06-20 02:23:22 | INFO | train_inner | epoch 004: 988 / 3002 loss=2.873, ppl=7.33, wps=5908.5, ups=0.09, wpb=64806, bsz=128, num_updates=9934, lr=9.99285e-05, gnorm=2.028, loss_scale=4, train_wall=10, gb_free=2.8, wall=114276
2021-06-20 02:23:33 | INFO | train_inner | epoch 004: 989 / 3002 loss=2.562, ppl=5.9, wps=5833.4, ups=0.09, wpb=64827, bsz=128, num_updates=9935, lr=9.99285e-05, gnorm=2.055, loss_scale=4, train_wall=11, gb_free=2.8, wall=114287
2021-06-20 02:23:44 | INFO | train_inner | epoch 004: 990 / 3002 loss=2.564, ppl=5.91, wps=5828.6, ups=0.09, wpb=64773, bsz=128, num_updates=9936, lr=9.99285e-05, gnorm=4.284, loss_scale=4, train_wall=11, gb_free=2.8, wall=114298
2021-06-20 02:23:55 | INFO | train_inner | epoch 004: 991 / 3002 loss=2.591, ppl=6.02, wps=5900.5, ups=0.09, wpb=64728, bsz=128, num_updates=9937, lr=9.99285e-05, gnorm=2.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=114309
2021-06-20 02:24:06 | INFO | train_inner | epoch 004: 992 / 3002 loss=2.513, ppl=5.71, wps=5807.1, ups=0.09, wpb=64841, bsz=128, num_updates=9938, lr=9.99285e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=114321
2021-06-20 02:24:17 | INFO | train_inner | epoch 004: 993 / 3002 loss=2.483, ppl=5.59, wps=5986.3, ups=0.09, wpb=64846, bsz=128, num_updates=9939, lr=9.99285e-05, gnorm=3.215, loss_scale=4, train_wall=10, gb_free=2.8, wall=114331
2021-06-20 02:24:28 | INFO | train_inner | epoch 004: 994 / 3002 loss=2.531, ppl=5.78, wps=5832.8, ups=0.09, wpb=64822, bsz=128, num_updates=9940, lr=9.99285e-05, gnorm=2.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=114343
2021-06-20 02:24:39 | INFO | train_inner | epoch 004: 995 / 3002 loss=2.593, ppl=6.03, wps=5847.8, ups=0.09, wpb=64853, bsz=128, num_updates=9941, lr=9.99285e-05, gnorm=2.506, loss_scale=4, train_wall=11, gb_free=2.8, wall=114354
2021-06-20 02:24:50 | INFO | train_inner | epoch 004: 996 / 3002 loss=2.567, ppl=5.93, wps=5800, ups=0.09, wpb=64875, bsz=128, num_updates=9942, lr=9.99285e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=114365
2021-06-20 02:25:02 | INFO | train_inner | epoch 004: 997 / 3002 loss=2.506, ppl=5.68, wps=5804.3, ups=0.09, wpb=64800, bsz=128, num_updates=9943, lr=9.99285e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=114376
2021-06-20 02:25:13 | INFO | train_inner | epoch 004: 998 / 3002 loss=2.581, ppl=5.98, wps=5799.6, ups=0.09, wpb=64880, bsz=128, num_updates=9944, lr=9.99284e-05, gnorm=2.108, loss_scale=4, train_wall=11, gb_free=2.8, wall=114387
2021-06-20 02:25:24 | INFO | train_inner | epoch 004: 999 / 3002 loss=2.596, ppl=6.05, wps=5864.3, ups=0.09, wpb=64804, bsz=128, num_updates=9945, lr=9.99284e-05, gnorm=2.062, loss_scale=4, train_wall=11, gb_free=2.8, wall=114398
2021-06-20 02:25:35 | INFO | train_inner | epoch 004: 1000 / 3002 loss=2.575, ppl=5.96, wps=5887.9, ups=0.09, wpb=64887, bsz=128, num_updates=9946, lr=9.99284e-05, gnorm=2.191, loss_scale=4, train_wall=11, gb_free=2.8, wall=114409
2021-06-20 02:25:46 | INFO | train_inner | epoch 004: 1001 / 3002 loss=2.479, ppl=5.58, wps=5848.6, ups=0.09, wpb=64864, bsz=128, num_updates=9947, lr=9.99284e-05, gnorm=2.053, loss_scale=4, train_wall=11, gb_free=2.8, wall=114420
2021-06-20 02:25:57 | INFO | train_inner | epoch 004: 1002 / 3002 loss=2.518, ppl=5.73, wps=5831.8, ups=0.09, wpb=64853, bsz=128, num_updates=9948, lr=9.99284e-05, gnorm=2.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=114431
2021-06-20 02:26:08 | INFO | train_inner | epoch 004: 1003 / 3002 loss=2.578, ppl=5.97, wps=5984.6, ups=0.09, wpb=64902, bsz=128, num_updates=9949, lr=9.99284e-05, gnorm=2.095, loss_scale=4, train_wall=10, gb_free=2.8, wall=114442
2021-06-20 02:26:19 | INFO | train_inner | epoch 004: 1004 / 3002 loss=2.638, ppl=6.22, wps=5903.2, ups=0.09, wpb=64810, bsz=128, num_updates=9950, lr=9.99284e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=114453
2021-06-20 02:26:30 | INFO | train_inner | epoch 004: 1005 / 3002 loss=2.437, ppl=5.42, wps=6001.3, ups=0.09, wpb=64969, bsz=128, num_updates=9951, lr=9.99284e-05, gnorm=2.015, loss_scale=4, train_wall=10, gb_free=2.8, wall=114464
2021-06-20 02:26:41 | INFO | train_inner | epoch 004: 1006 / 3002 loss=2.685, ppl=6.43, wps=5868.6, ups=0.09, wpb=64803, bsz=128, num_updates=9952, lr=9.99284e-05, gnorm=3.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=114475
2021-06-20 02:26:52 | INFO | train_inner | epoch 004: 1007 / 3002 loss=2.481, ppl=5.58, wps=5860.5, ups=0.09, wpb=64857, bsz=128, num_updates=9953, lr=9.99284e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=114486
2021-06-20 02:27:03 | INFO | train_inner | epoch 004: 1008 / 3002 loss=2.657, ppl=6.31, wps=5813.7, ups=0.09, wpb=64811, bsz=128, num_updates=9954, lr=9.99284e-05, gnorm=2.112, loss_scale=4, train_wall=11, gb_free=2.8, wall=114497
2021-06-20 02:27:14 | INFO | train_inner | epoch 004: 1009 / 3002 loss=2.583, ppl=5.99, wps=5855.2, ups=0.09, wpb=64839, bsz=128, num_updates=9955, lr=9.99284e-05, gnorm=2.055, loss_scale=4, train_wall=11, gb_free=2.8, wall=114508
2021-06-20 02:27:25 | INFO | train_inner | epoch 004: 1010 / 3002 loss=2.587, ppl=6.01, wps=5848.1, ups=0.09, wpb=64886, bsz=128, num_updates=9956, lr=9.99283e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=114520
2021-06-20 02:27:36 | INFO | train_inner | epoch 004: 1011 / 3002 loss=2.539, ppl=5.81, wps=5850.9, ups=0.09, wpb=64817, bsz=128, num_updates=9957, lr=9.99283e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=114531
2021-06-20 02:27:47 | INFO | train_inner | epoch 004: 1012 / 3002 loss=2.543, ppl=5.83, wps=5888.5, ups=0.09, wpb=64825, bsz=128, num_updates=9958, lr=9.99283e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=114542
2021-06-20 02:27:58 | INFO | train_inner | epoch 004: 1013 / 3002 loss=2.558, ppl=5.89, wps=5840, ups=0.09, wpb=64866, bsz=128, num_updates=9959, lr=9.99283e-05, gnorm=2.114, loss_scale=4, train_wall=11, gb_free=2.8, wall=114553
2021-06-20 02:28:09 | INFO | train_inner | epoch 004: 1014 / 3002 loss=2.478, ppl=5.57, wps=5915.2, ups=0.09, wpb=64865, bsz=128, num_updates=9960, lr=9.99283e-05, gnorm=2.086, loss_scale=4, train_wall=11, gb_free=2.8, wall=114564
2021-06-20 02:28:20 | INFO | train_inner | epoch 004: 1015 / 3002 loss=2.477, ppl=5.57, wps=5848.3, ups=0.09, wpb=64801, bsz=128, num_updates=9961, lr=9.99283e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=114575
2021-06-20 02:28:31 | INFO | train_inner | epoch 004: 1016 / 3002 loss=2.611, ppl=6.11, wps=5996.8, ups=0.09, wpb=64819, bsz=128, num_updates=9962, lr=9.99283e-05, gnorm=2.004, loss_scale=4, train_wall=10, gb_free=2.8, wall=114586
2021-06-20 02:28:42 | INFO | train_inner | epoch 004: 1017 / 3002 loss=2.546, ppl=5.84, wps=5825.5, ups=0.09, wpb=64860, bsz=128, num_updates=9963, lr=9.99283e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=114597
2021-06-20 02:28:53 | INFO | train_inner | epoch 004: 1018 / 3002 loss=2.441, ppl=5.43, wps=5906.8, ups=0.09, wpb=64831, bsz=128, num_updates=9964, lr=9.99283e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=114608
2021-06-20 02:29:05 | INFO | train_inner | epoch 004: 1019 / 3002 loss=2.657, ppl=6.31, wps=5780.2, ups=0.09, wpb=64825, bsz=128, num_updates=9965, lr=9.99283e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=114619
2021-06-20 02:29:16 | INFO | train_inner | epoch 004: 1020 / 3002 loss=2.548, ppl=5.85, wps=5808.3, ups=0.09, wpb=64872, bsz=128, num_updates=9966, lr=9.99283e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=114630
2021-06-20 02:29:27 | INFO | train_inner | epoch 004: 1021 / 3002 loss=2.573, ppl=5.95, wps=5849.7, ups=0.09, wpb=64844, bsz=128, num_updates=9967, lr=9.99283e-05, gnorm=13.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=114641
2021-06-20 02:29:38 | INFO | train_inner | epoch 004: 1022 / 3002 loss=2.593, ppl=6.03, wps=5925.8, ups=0.09, wpb=64835, bsz=128, num_updates=9968, lr=9.99283e-05, gnorm=1.997, loss_scale=4, train_wall=10, gb_free=2.8, wall=114652
2021-06-20 02:29:49 | INFO | train_inner | epoch 004: 1023 / 3002 loss=2.586, ppl=6.01, wps=5956.1, ups=0.09, wpb=64796, bsz=128, num_updates=9969, lr=9.99282e-05, gnorm=3.898, loss_scale=4, train_wall=10, gb_free=2.8, wall=114663
2021-06-20 02:30:00 | INFO | train_inner | epoch 004: 1024 / 3002 loss=2.692, ppl=6.46, wps=5880.8, ups=0.09, wpb=64763, bsz=128, num_updates=9970, lr=9.99282e-05, gnorm=2.28, loss_scale=4, train_wall=11, gb_free=2.8, wall=114674
2021-06-20 02:30:11 | INFO | train_inner | epoch 004: 1025 / 3002 loss=2.594, ppl=6.04, wps=5892.3, ups=0.09, wpb=64863, bsz=128, num_updates=9971, lr=9.99282e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=114685
2021-06-20 02:30:22 | INFO | train_inner | epoch 004: 1026 / 3002 loss=2.743, ppl=6.69, wps=5872.7, ups=0.09, wpb=64824, bsz=128, num_updates=9972, lr=9.99282e-05, gnorm=2.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=114696
2021-06-20 02:30:33 | INFO | train_inner | epoch 004: 1027 / 3002 loss=2.439, ppl=5.42, wps=5862.1, ups=0.09, wpb=64872, bsz=128, num_updates=9973, lr=9.99282e-05, gnorm=2.266, loss_scale=4, train_wall=11, gb_free=2.8, wall=114707
2021-06-20 02:30:44 | INFO | train_inner | epoch 004: 1028 / 3002 loss=2.625, ppl=6.17, wps=5784.6, ups=0.09, wpb=64796, bsz=128, num_updates=9974, lr=9.99282e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=114718
2021-06-20 02:30:55 | INFO | train_inner | epoch 004: 1029 / 3002 loss=2.69, ppl=6.45, wps=5780.5, ups=0.09, wpb=64832, bsz=128, num_updates=9975, lr=9.99282e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=114730
2021-06-20 02:31:06 | INFO | train_inner | epoch 004: 1030 / 3002 loss=2.658, ppl=6.31, wps=5850.5, ups=0.09, wpb=64760, bsz=128, num_updates=9976, lr=9.99282e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=114741
2021-06-20 02:31:17 | INFO | train_inner | epoch 004: 1031 / 3002 loss=2.61, ppl=6.11, wps=5854.5, ups=0.09, wpb=64812, bsz=128, num_updates=9977, lr=9.99282e-05, gnorm=2.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=114752
2021-06-20 02:31:28 | INFO | train_inner | epoch 004: 1032 / 3002 loss=2.651, ppl=6.28, wps=5848.5, ups=0.09, wpb=64821, bsz=128, num_updates=9978, lr=9.99282e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=114763
2021-06-20 02:31:39 | INFO | train_inner | epoch 004: 1033 / 3002 loss=2.445, ppl=5.45, wps=5933, ups=0.09, wpb=64874, bsz=128, num_updates=9979, lr=9.99282e-05, gnorm=2.213, loss_scale=4, train_wall=10, gb_free=2.8, wall=114774
2021-06-20 02:31:50 | INFO | train_inner | epoch 004: 1034 / 3002 loss=2.545, ppl=5.84, wps=5919.7, ups=0.09, wpb=64787, bsz=128, num_updates=9980, lr=9.99282e-05, gnorm=2.07, loss_scale=4, train_wall=10, gb_free=2.8, wall=114785
2021-06-20 02:32:02 | INFO | train_inner | epoch 004: 1035 / 3002 loss=2.478, ppl=5.57, wps=5773.6, ups=0.09, wpb=64844, bsz=128, num_updates=9981, lr=9.99281e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=114796
2021-06-20 02:32:13 | INFO | train_inner | epoch 004: 1036 / 3002 loss=2.691, ppl=6.46, wps=5866, ups=0.09, wpb=64862, bsz=128, num_updates=9982, lr=9.99281e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=114807
2021-06-20 02:32:24 | INFO | train_inner | epoch 004: 1037 / 3002 loss=2.579, ppl=5.98, wps=5825.6, ups=0.09, wpb=64864, bsz=128, num_updates=9983, lr=9.99281e-05, gnorm=2.139, loss_scale=4, train_wall=11, gb_free=2.8, wall=114818
2021-06-20 02:32:35 | INFO | train_inner | epoch 004: 1038 / 3002 loss=2.393, ppl=5.25, wps=5888.2, ups=0.09, wpb=64869, bsz=128, num_updates=9984, lr=9.99281e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=114829
2021-06-20 02:32:46 | INFO | train_inner | epoch 004: 1039 / 3002 loss=2.569, ppl=5.93, wps=5879.2, ups=0.09, wpb=64763, bsz=128, num_updates=9985, lr=9.99281e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=114840
2021-06-20 02:32:57 | INFO | train_inner | epoch 004: 1040 / 3002 loss=2.499, ppl=5.65, wps=5842.7, ups=0.09, wpb=64792, bsz=128, num_updates=9986, lr=9.99281e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=114851
2021-06-20 02:33:08 | INFO | train_inner | epoch 004: 1041 / 3002 loss=2.872, ppl=7.32, wps=5889.8, ups=0.09, wpb=64870, bsz=128, num_updates=9987, lr=9.99281e-05, gnorm=2.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=114862
2021-06-20 02:33:19 | INFO | train_inner | epoch 004: 1042 / 3002 loss=2.606, ppl=6.09, wps=5868.7, ups=0.09, wpb=64917, bsz=128, num_updates=9988, lr=9.99281e-05, gnorm=2.218, loss_scale=4, train_wall=11, gb_free=2.8, wall=114873
2021-06-20 02:33:30 | INFO | train_inner | epoch 004: 1043 / 3002 loss=2.692, ppl=6.46, wps=5800.5, ups=0.09, wpb=64762, bsz=128, num_updates=9989, lr=9.99281e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=114884
2021-06-20 02:33:41 | INFO | train_inner | epoch 004: 1044 / 3002 loss=2.51, ppl=5.7, wps=5938.7, ups=0.09, wpb=64858, bsz=128, num_updates=9990, lr=9.99281e-05, gnorm=2.063, loss_scale=4, train_wall=10, gb_free=2.8, wall=114895
2021-06-20 02:33:52 | INFO | train_inner | epoch 004: 1045 / 3002 loss=2.477, ppl=5.57, wps=5846, ups=0.09, wpb=64832, bsz=128, num_updates=9991, lr=9.99281e-05, gnorm=2.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=114906
2021-06-20 02:34:03 | INFO | train_inner | epoch 004: 1046 / 3002 loss=2.544, ppl=5.83, wps=5963.8, ups=0.09, wpb=64923, bsz=128, num_updates=9992, lr=9.99281e-05, gnorm=2.142, loss_scale=4, train_wall=10, gb_free=2.8, wall=114917
2021-06-20 02:34:14 | INFO | train_inner | epoch 004: 1047 / 3002 loss=2.604, ppl=6.08, wps=5801.9, ups=0.09, wpb=64897, bsz=128, num_updates=9993, lr=9.99281e-05, gnorm=1.976, loss_scale=4, train_wall=11, gb_free=2.8, wall=114928
2021-06-20 02:34:25 | INFO | train_inner | epoch 004: 1048 / 3002 loss=2.475, ppl=5.56, wps=5962.5, ups=0.09, wpb=64876, bsz=128, num_updates=9994, lr=9.9928e-05, gnorm=2.097, loss_scale=4, train_wall=10, gb_free=2.8, wall=114939
2021-06-20 02:34:36 | INFO | train_inner | epoch 004: 1049 / 3002 loss=2.561, ppl=5.9, wps=5800.9, ups=0.09, wpb=64864, bsz=128, num_updates=9995, lr=9.9928e-05, gnorm=2.114, loss_scale=4, train_wall=11, gb_free=2.8, wall=114951
2021-06-20 02:34:47 | INFO | train_inner | epoch 004: 1050 / 3002 loss=2.512, ppl=5.7, wps=5778.3, ups=0.09, wpb=64802, bsz=128, num_updates=9996, lr=9.9928e-05, gnorm=2.324, loss_scale=4, train_wall=11, gb_free=2.8, wall=114962
2021-06-20 02:34:58 | INFO | train_inner | epoch 004: 1051 / 3002 loss=2.568, ppl=5.93, wps=5924.4, ups=0.09, wpb=64843, bsz=128, num_updates=9997, lr=9.9928e-05, gnorm=2.087, loss_scale=4, train_wall=10, gb_free=2.8, wall=114973
2021-06-20 02:35:10 | INFO | train_inner | epoch 004: 1052 / 3002 loss=2.641, ppl=6.24, wps=5794.5, ups=0.09, wpb=64778, bsz=128, num_updates=9998, lr=9.9928e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=114984
2021-06-20 02:35:21 | INFO | train_inner | epoch 004: 1053 / 3002 loss=2.453, ppl=5.48, wps=5891.8, ups=0.09, wpb=64885, bsz=128, num_updates=9999, lr=9.9928e-05, gnorm=2.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=114995
2021-06-20 02:35:32 | INFO | train_inner | epoch 004: 1054 / 3002 loss=2.654, ppl=6.29, wps=5838.3, ups=0.09, wpb=64829, bsz=128, num_updates=10000, lr=9.9928e-05, gnorm=2.485, loss_scale=4, train_wall=11, gb_free=2.8, wall=115006
2021-06-20 02:35:43 | INFO | train_inner | epoch 004: 1055 / 3002 loss=2.812, ppl=7.02, wps=5823.8, ups=0.09, wpb=64897, bsz=128, num_updates=10001, lr=9.9928e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=115017
2021-06-20 02:35:54 | INFO | train_inner | epoch 004: 1056 / 3002 loss=2.603, ppl=6.07, wps=5796.2, ups=0.09, wpb=64772, bsz=128, num_updates=10002, lr=9.9928e-05, gnorm=2.055, loss_scale=4, train_wall=11, gb_free=2.8, wall=115028
2021-06-20 02:36:05 | INFO | train_inner | epoch 004: 1057 / 3002 loss=2.586, ppl=6.01, wps=5925, ups=0.09, wpb=64864, bsz=128, num_updates=10003, lr=9.9928e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=115039
2021-06-20 02:36:16 | INFO | train_inner | epoch 004: 1058 / 3002 loss=2.617, ppl=6.13, wps=5864.6, ups=0.09, wpb=64893, bsz=128, num_updates=10004, lr=9.9928e-05, gnorm=2.395, loss_scale=4, train_wall=11, gb_free=2.8, wall=115050
2021-06-20 02:36:27 | INFO | train_inner | epoch 004: 1059 / 3002 loss=2.483, ppl=5.59, wps=5917.7, ups=0.09, wpb=64864, bsz=128, num_updates=10005, lr=9.9928e-05, gnorm=2.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=115061
2021-06-20 02:36:38 | INFO | train_inner | epoch 004: 1060 / 3002 loss=2.531, ppl=5.78, wps=5841.6, ups=0.09, wpb=64900, bsz=128, num_updates=10006, lr=9.99279e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=115072
2021-06-20 02:36:49 | INFO | train_inner | epoch 004: 1061 / 3002 loss=2.444, ppl=5.44, wps=5946.9, ups=0.09, wpb=64845, bsz=128, num_updates=10007, lr=9.99279e-05, gnorm=2.094, loss_scale=4, train_wall=10, gb_free=2.8, wall=115083
2021-06-20 02:37:00 | INFO | train_inner | epoch 004: 1062 / 3002 loss=2.654, ppl=6.29, wps=5814.2, ups=0.09, wpb=64764, bsz=128, num_updates=10008, lr=9.99279e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=115094
2021-06-20 02:37:11 | INFO | train_inner | epoch 004: 1063 / 3002 loss=2.604, ppl=6.08, wps=5907.9, ups=0.09, wpb=64859, bsz=128, num_updates=10009, lr=9.99279e-05, gnorm=2.055, loss_scale=4, train_wall=10, gb_free=2.8, wall=115105
2021-06-20 02:37:22 | INFO | train_inner | epoch 004: 1064 / 3002 loss=2.606, ppl=6.09, wps=5680.5, ups=0.09, wpb=64767, bsz=128, num_updates=10010, lr=9.99279e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=115117
2021-06-20 02:37:33 | INFO | train_inner | epoch 004: 1065 / 3002 loss=2.595, ppl=6.04, wps=5915, ups=0.09, wpb=64900, bsz=128, num_updates=10011, lr=9.99279e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=115128
2021-06-20 02:37:45 | INFO | train_inner | epoch 004: 1066 / 3002 loss=2.614, ppl=6.12, wps=5800.5, ups=0.09, wpb=64799, bsz=128, num_updates=10012, lr=9.99279e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=115139
2021-06-20 02:37:56 | INFO | train_inner | epoch 004: 1067 / 3002 loss=2.485, ppl=5.6, wps=5816.5, ups=0.09, wpb=64842, bsz=128, num_updates=10013, lr=9.99279e-05, gnorm=2.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=115150
2021-06-20 02:38:07 | INFO | train_inner | epoch 004: 1068 / 3002 loss=2.633, ppl=6.2, wps=5859.6, ups=0.09, wpb=64830, bsz=128, num_updates=10014, lr=9.99279e-05, gnorm=2.058, loss_scale=4, train_wall=11, gb_free=2.8, wall=115161
2021-06-20 02:38:18 | INFO | train_inner | epoch 004: 1069 / 3002 loss=2.601, ppl=6.07, wps=5769.9, ups=0.09, wpb=64808, bsz=128, num_updates=10015, lr=9.99279e-05, gnorm=2.088, loss_scale=4, train_wall=11, gb_free=2.8, wall=115172
2021-06-20 02:38:29 | INFO | train_inner | epoch 004: 1070 / 3002 loss=2.581, ppl=5.99, wps=5936.9, ups=0.09, wpb=64859, bsz=128, num_updates=10016, lr=9.99279e-05, gnorm=2.137, loss_scale=4, train_wall=10, gb_free=2.8, wall=115183
2021-06-20 02:38:40 | INFO | train_inner | epoch 004: 1071 / 3002 loss=2.552, ppl=5.87, wps=5820.1, ups=0.09, wpb=64824, bsz=128, num_updates=10017, lr=9.99279e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=115194
2021-06-20 02:38:51 | INFO | train_inner | epoch 004: 1072 / 3002 loss=2.663, ppl=6.34, wps=5901.8, ups=0.09, wpb=64803, bsz=128, num_updates=10018, lr=9.99279e-05, gnorm=2.74, loss_scale=4, train_wall=10, gb_free=2.8, wall=115205
2021-06-20 02:39:02 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-20 02:39:13 | INFO | train_inner | epoch 004: 1074 / 3002 loss=2.532, ppl=5.79, wps=2932.8, ups=0.05, wpb=64799, bsz=128, num_updates=10019, lr=9.99278e-05, gnorm=2.16, loss_scale=2, train_wall=21, gb_free=2.8, wall=115228
2021-06-20 02:39:24 | INFO | train_inner | epoch 004: 1075 / 3002 loss=2.557, ppl=5.89, wps=5758.7, ups=0.09, wpb=64856, bsz=128, num_updates=10020, lr=9.99278e-05, gnorm=2.285, loss_scale=2, train_wall=11, gb_free=2.8, wall=115239
2021-06-20 02:39:35 | INFO | train_inner | epoch 004: 1076 / 3002 loss=2.525, ppl=5.76, wps=5908.2, ups=0.09, wpb=64838, bsz=128, num_updates=10021, lr=9.99278e-05, gnorm=2.593, loss_scale=2, train_wall=11, gb_free=2.8, wall=115250
2021-06-20 02:39:47 | INFO | train_inner | epoch 004: 1077 / 3002 loss=2.729, ppl=6.63, wps=5841, ups=0.09, wpb=64800, bsz=128, num_updates=10022, lr=9.99278e-05, gnorm=2.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=115261
2021-06-20 02:39:57 | INFO | train_inner | epoch 004: 1078 / 3002 loss=2.572, ppl=5.95, wps=5933.2, ups=0.09, wpb=64835, bsz=128, num_updates=10023, lr=9.99278e-05, gnorm=2.238, loss_scale=2, train_wall=10, gb_free=2.8, wall=115272
2021-06-20 02:40:09 | INFO | train_inner | epoch 004: 1079 / 3002 loss=2.595, ppl=6.04, wps=5754.8, ups=0.09, wpb=64838, bsz=128, num_updates=10024, lr=9.99278e-05, gnorm=6.347, loss_scale=2, train_wall=11, gb_free=2.8, wall=115283
2021-06-20 02:40:20 | INFO | train_inner | epoch 004: 1080 / 3002 loss=2.636, ppl=6.22, wps=5860.7, ups=0.09, wpb=64842, bsz=128, num_updates=10025, lr=9.99278e-05, gnorm=2.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=115294
2021-06-20 02:40:31 | INFO | train_inner | epoch 004: 1081 / 3002 loss=2.647, ppl=6.26, wps=5919.3, ups=0.09, wpb=64749, bsz=128, num_updates=10026, lr=9.99278e-05, gnorm=2.224, loss_scale=2, train_wall=10, gb_free=2.8, wall=115305
2021-06-20 02:40:42 | INFO | train_inner | epoch 004: 1082 / 3002 loss=2.594, ppl=6.04, wps=5789, ups=0.09, wpb=64741, bsz=128, num_updates=10027, lr=9.99278e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=115316
2021-06-20 02:40:53 | INFO | train_inner | epoch 004: 1083 / 3002 loss=2.557, ppl=5.88, wps=5930.5, ups=0.09, wpb=64886, bsz=128, num_updates=10028, lr=9.99278e-05, gnorm=3.197, loss_scale=2, train_wall=10, gb_free=2.8, wall=115327
2021-06-20 02:41:04 | INFO | train_inner | epoch 004: 1084 / 3002 loss=2.688, ppl=6.44, wps=5766.3, ups=0.09, wpb=64793, bsz=128, num_updates=10029, lr=9.99278e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=115338
2021-06-20 02:41:15 | INFO | train_inner | epoch 004: 1085 / 3002 loss=2.484, ppl=5.59, wps=5739.9, ups=0.09, wpb=64746, bsz=128, num_updates=10030, lr=9.99278e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=115350
2021-06-20 02:41:27 | INFO | train_inner | epoch 004: 1086 / 3002 loss=2.632, ppl=6.2, wps=5765.7, ups=0.09, wpb=64766, bsz=128, num_updates=10031, lr=9.99277e-05, gnorm=2.158, loss_scale=2, train_wall=11, gb_free=2.8, wall=115361
2021-06-20 02:41:38 | INFO | train_inner | epoch 004: 1087 / 3002 loss=2.516, ppl=5.72, wps=5823.9, ups=0.09, wpb=64805, bsz=128, num_updates=10032, lr=9.99277e-05, gnorm=2.09, loss_scale=2, train_wall=11, gb_free=2.8, wall=115372
2021-06-20 02:41:49 | INFO | train_inner | epoch 004: 1088 / 3002 loss=2.753, ppl=6.74, wps=5960.4, ups=0.09, wpb=64828, bsz=128, num_updates=10033, lr=9.99277e-05, gnorm=2.187, loss_scale=2, train_wall=10, gb_free=2.8, wall=115383
2021-06-20 02:42:00 | INFO | train_inner | epoch 004: 1089 / 3002 loss=2.582, ppl=5.99, wps=5938.3, ups=0.09, wpb=64852, bsz=128, num_updates=10034, lr=9.99277e-05, gnorm=2.23, loss_scale=2, train_wall=10, gb_free=2.8, wall=115394
2021-06-20 02:42:11 | INFO | train_inner | epoch 004: 1090 / 3002 loss=2.545, ppl=5.83, wps=5807.4, ups=0.09, wpb=64767, bsz=128, num_updates=10035, lr=9.99277e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=115405
2021-06-20 02:42:22 | INFO | train_inner | epoch 004: 1091 / 3002 loss=2.568, ppl=5.93, wps=5935.7, ups=0.09, wpb=64781, bsz=128, num_updates=10036, lr=9.99277e-05, gnorm=2.08, loss_scale=2, train_wall=10, gb_free=2.8, wall=115416
2021-06-20 02:42:33 | INFO | train_inner | epoch 004: 1092 / 3002 loss=2.617, ppl=6.13, wps=5853.1, ups=0.09, wpb=64782, bsz=128, num_updates=10037, lr=9.99277e-05, gnorm=5.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=115427
2021-06-20 02:42:44 | INFO | train_inner | epoch 004: 1093 / 3002 loss=2.669, ppl=6.36, wps=5881.3, ups=0.09, wpb=64819, bsz=128, num_updates=10038, lr=9.99277e-05, gnorm=2.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=115438
2021-06-20 02:42:55 | INFO | train_inner | epoch 004: 1094 / 3002 loss=2.533, ppl=5.79, wps=5899.5, ups=0.09, wpb=64853, bsz=128, num_updates=10039, lr=9.99277e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=115449
2021-06-20 02:43:06 | INFO | train_inner | epoch 004: 1095 / 3002 loss=2.708, ppl=6.53, wps=5722.5, ups=0.09, wpb=64785, bsz=128, num_updates=10040, lr=9.99277e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=115460
2021-06-20 02:43:17 | INFO | train_inner | epoch 004: 1096 / 3002 loss=2.584, ppl=6, wps=5865.8, ups=0.09, wpb=64784, bsz=128, num_updates=10041, lr=9.99277e-05, gnorm=8.753, loss_scale=2, train_wall=11, gb_free=2.8, wall=115471
2021-06-20 02:43:28 | INFO | train_inner | epoch 004: 1097 / 3002 loss=2.734, ppl=6.65, wps=5919.2, ups=0.09, wpb=64801, bsz=128, num_updates=10042, lr=9.99277e-05, gnorm=2.132, loss_scale=2, train_wall=10, gb_free=2.8, wall=115482
2021-06-20 02:43:39 | INFO | train_inner | epoch 004: 1098 / 3002 loss=2.541, ppl=5.82, wps=5804.4, ups=0.09, wpb=64835, bsz=128, num_updates=10043, lr=9.99277e-05, gnorm=2.083, loss_scale=2, train_wall=11, gb_free=2.8, wall=115494
2021-06-20 02:43:50 | INFO | train_inner | epoch 004: 1099 / 3002 loss=2.623, ppl=6.16, wps=5778.6, ups=0.09, wpb=64818, bsz=128, num_updates=10044, lr=9.99276e-05, gnorm=2.26, loss_scale=2, train_wall=11, gb_free=2.8, wall=115505
2021-06-20 02:44:01 | INFO | train_inner | epoch 004: 1100 / 3002 loss=2.553, ppl=5.87, wps=5842.8, ups=0.09, wpb=64813, bsz=128, num_updates=10045, lr=9.99276e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=115516
2021-06-20 02:44:13 | INFO | train_inner | epoch 004: 1101 / 3002 loss=2.628, ppl=6.18, wps=5783.3, ups=0.09, wpb=64772, bsz=128, num_updates=10046, lr=9.99276e-05, gnorm=2.23, loss_scale=2, train_wall=11, gb_free=2.8, wall=115527
2021-06-20 02:44:24 | INFO | train_inner | epoch 004: 1102 / 3002 loss=2.808, ppl=7, wps=5769.1, ups=0.09, wpb=64840, bsz=128, num_updates=10047, lr=9.99276e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=115538
2021-06-20 02:44:35 | INFO | train_inner | epoch 004: 1103 / 3002 loss=2.532, ppl=5.78, wps=6001.5, ups=0.09, wpb=64896, bsz=128, num_updates=10048, lr=9.99276e-05, gnorm=2.111, loss_scale=2, train_wall=10, gb_free=2.8, wall=115549
2021-06-20 02:44:46 | INFO | train_inner | epoch 004: 1104 / 3002 loss=2.729, ppl=6.63, wps=5951.7, ups=0.09, wpb=64791, bsz=128, num_updates=10049, lr=9.99276e-05, gnorm=2.176, loss_scale=2, train_wall=10, gb_free=2.8, wall=115560
2021-06-20 02:44:57 | INFO | train_inner | epoch 004: 1105 / 3002 loss=2.491, ppl=5.62, wps=5953.9, ups=0.09, wpb=64900, bsz=128, num_updates=10050, lr=9.99276e-05, gnorm=2.001, loss_scale=2, train_wall=10, gb_free=2.8, wall=115571
2021-06-20 02:45:08 | INFO | train_inner | epoch 004: 1106 / 3002 loss=2.695, ppl=6.48, wps=5738.5, ups=0.09, wpb=64772, bsz=128, num_updates=10051, lr=9.99276e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=115582
2021-06-20 02:45:19 | INFO | train_inner | epoch 004: 1107 / 3002 loss=2.422, ppl=5.36, wps=5960.6, ups=0.09, wpb=64868, bsz=128, num_updates=10052, lr=9.99276e-05, gnorm=2.318, loss_scale=2, train_wall=10, gb_free=2.8, wall=115593
2021-06-20 02:45:30 | INFO | train_inner | epoch 004: 1108 / 3002 loss=2.68, ppl=6.41, wps=5798.9, ups=0.09, wpb=64782, bsz=128, num_updates=10053, lr=9.99276e-05, gnorm=1.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=115604
2021-06-20 02:45:41 | INFO | train_inner | epoch 004: 1109 / 3002 loss=2.717, ppl=6.58, wps=5783.3, ups=0.09, wpb=64868, bsz=128, num_updates=10054, lr=9.99276e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=115615
2021-06-20 02:45:52 | INFO | train_inner | epoch 004: 1110 / 3002 loss=2.484, ppl=5.6, wps=5900.9, ups=0.09, wpb=64824, bsz=128, num_updates=10055, lr=9.99276e-05, gnorm=2.342, loss_scale=2, train_wall=11, gb_free=2.8, wall=115626
2021-06-20 02:46:03 | INFO | train_inner | epoch 004: 1111 / 3002 loss=2.657, ppl=6.31, wps=5808.9, ups=0.09, wpb=64820, bsz=128, num_updates=10056, lr=9.99275e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=115638
2021-06-20 02:46:14 | INFO | train_inner | epoch 004: 1112 / 3002 loss=2.824, ppl=7.08, wps=5786.9, ups=0.09, wpb=64781, bsz=128, num_updates=10057, lr=9.99275e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=115649
2021-06-20 02:46:26 | INFO | train_inner | epoch 004: 1113 / 3002 loss=2.684, ppl=6.43, wps=5808.1, ups=0.09, wpb=64820, bsz=128, num_updates=10058, lr=9.99275e-05, gnorm=2.175, loss_scale=2, train_wall=11, gb_free=2.8, wall=115660
2021-06-20 02:46:36 | INFO | train_inner | epoch 004: 1114 / 3002 loss=2.731, ppl=6.64, wps=5938.7, ups=0.09, wpb=64760, bsz=128, num_updates=10059, lr=9.99275e-05, gnorm=2.107, loss_scale=2, train_wall=10, gb_free=2.8, wall=115671
2021-06-20 02:46:48 | INFO | train_inner | epoch 004: 1115 / 3002 loss=2.651, ppl=6.28, wps=5881.1, ups=0.09, wpb=64846, bsz=128, num_updates=10060, lr=9.99275e-05, gnorm=2.048, loss_scale=2, train_wall=11, gb_free=2.8, wall=115682
2021-06-20 02:46:59 | INFO | train_inner | epoch 004: 1116 / 3002 loss=2.701, ppl=6.5, wps=5899.1, ups=0.09, wpb=64838, bsz=128, num_updates=10061, lr=9.99275e-05, gnorm=2.209, loss_scale=2, train_wall=11, gb_free=2.8, wall=115693
2021-06-20 02:47:10 | INFO | train_inner | epoch 004: 1117 / 3002 loss=2.729, ppl=6.63, wps=5850.1, ups=0.09, wpb=64900, bsz=128, num_updates=10062, lr=9.99275e-05, gnorm=2.561, loss_scale=2, train_wall=11, gb_free=2.8, wall=115704
2021-06-20 02:47:21 | INFO | train_inner | epoch 004: 1118 / 3002 loss=2.633, ppl=6.2, wps=5900.2, ups=0.09, wpb=64887, bsz=128, num_updates=10063, lr=9.99275e-05, gnorm=2.271, loss_scale=2, train_wall=11, gb_free=2.8, wall=115715
2021-06-20 02:47:32 | INFO | train_inner | epoch 004: 1119 / 3002 loss=2.545, ppl=5.83, wps=5909.1, ups=0.09, wpb=64816, bsz=128, num_updates=10064, lr=9.99275e-05, gnorm=12.7, loss_scale=2, train_wall=11, gb_free=2.8, wall=115726
2021-06-20 02:47:43 | INFO | train_inner | epoch 004: 1120 / 3002 loss=2.613, ppl=6.12, wps=5856.3, ups=0.09, wpb=64904, bsz=128, num_updates=10065, lr=9.99275e-05, gnorm=2.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=115737
2021-06-20 02:47:54 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-20 02:48:05 | INFO | train_inner | epoch 004: 1122 / 3002 loss=2.665, ppl=6.34, wps=2956.7, ups=0.05, wpb=64790, bsz=128, num_updates=10066, lr=9.99275e-05, gnorm=3.286, loss_scale=1, train_wall=21, gb_free=2.8, wall=115759
2021-06-20 02:48:16 | INFO | train_inner | epoch 004: 1123 / 3002 loss=2.976, ppl=7.87, wps=5855.4, ups=0.09, wpb=64768, bsz=128, num_updates=10067, lr=9.99275e-05, gnorm=3.639, loss_scale=1, train_wall=11, gb_free=2.8, wall=115770
2021-06-20 02:48:27 | INFO | train_inner | epoch 004: 1124 / 3002 loss=2.926, ppl=7.6, wps=5724.4, ups=0.09, wpb=64805, bsz=128, num_updates=10068, lr=9.99275e-05, gnorm=39.922, loss_scale=1, train_wall=11, gb_free=2.8, wall=115781
2021-06-20 02:48:38 | INFO | train_inner | epoch 004: 1125 / 3002 loss=2.948, ppl=7.71, wps=5821.7, ups=0.09, wpb=64870, bsz=128, num_updates=10069, lr=9.99274e-05, gnorm=4.003, loss_scale=1, train_wall=11, gb_free=2.8, wall=115792
2021-06-20 02:48:49 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-20 02:49:00 | INFO | train_inner | epoch 004: 1127 / 3002 loss=2.744, ppl=6.7, wps=2962.7, ups=0.05, wpb=64847, bsz=128, num_updates=10070, lr=9.99274e-05, gnorm=4.255, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=115814
2021-06-20 02:49:11 | INFO | train_inner | epoch 004: 1128 / 3002 loss=2.748, ppl=6.72, wps=5848.2, ups=0.09, wpb=64836, bsz=128, num_updates=10071, lr=9.99274e-05, gnorm=2.342, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115825
2021-06-20 02:49:22 | INFO | train_inner | epoch 004: 1129 / 3002 loss=2.634, ppl=6.21, wps=5790.7, ups=0.09, wpb=64771, bsz=128, num_updates=10072, lr=9.99274e-05, gnorm=2.259, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115837
2021-06-20 02:49:33 | INFO | train_inner | epoch 004: 1130 / 3002 loss=2.606, ppl=6.09, wps=5904.6, ups=0.09, wpb=64774, bsz=128, num_updates=10073, lr=9.99274e-05, gnorm=2.371, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115848
2021-06-20 02:49:44 | INFO | train_inner | epoch 004: 1131 / 3002 loss=2.524, ppl=5.75, wps=5981.2, ups=0.09, wpb=64886, bsz=128, num_updates=10074, lr=9.99274e-05, gnorm=2.162, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115858
2021-06-20 02:49:55 | INFO | train_inner | epoch 004: 1132 / 3002 loss=2.567, ppl=5.93, wps=5954.7, ups=0.09, wpb=64910, bsz=128, num_updates=10075, lr=9.99274e-05, gnorm=2.256, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115869
2021-06-20 02:50:06 | INFO | train_inner | epoch 004: 1133 / 3002 loss=2.549, ppl=5.85, wps=5859.1, ups=0.09, wpb=64844, bsz=128, num_updates=10076, lr=9.99274e-05, gnorm=2.217, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115880
2021-06-20 02:50:17 | INFO | train_inner | epoch 004: 1134 / 3002 loss=2.711, ppl=6.55, wps=5790.6, ups=0.09, wpb=64780, bsz=128, num_updates=10077, lr=9.99274e-05, gnorm=2.668, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115892
2021-06-20 02:50:28 | INFO | train_inner | epoch 004: 1135 / 3002 loss=2.541, ppl=5.82, wps=5927.4, ups=0.09, wpb=64856, bsz=128, num_updates=10078, lr=9.99274e-05, gnorm=2.153, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115903
2021-06-20 02:50:39 | INFO | train_inner | epoch 004: 1136 / 3002 loss=2.549, ppl=5.85, wps=5906.2, ups=0.09, wpb=64878, bsz=128, num_updates=10079, lr=9.99274e-05, gnorm=2.254, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115913
2021-06-20 02:50:51 | INFO | train_inner | epoch 004: 1137 / 3002 loss=2.636, ppl=6.22, wps=5708.1, ups=0.09, wpb=64863, bsz=128, num_updates=10080, lr=9.99274e-05, gnorm=2.261, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115925
2021-06-20 02:51:02 | INFO | train_inner | epoch 004: 1138 / 3002 loss=2.718, ppl=6.58, wps=5853.2, ups=0.09, wpb=64796, bsz=128, num_updates=10081, lr=9.99273e-05, gnorm=2.208, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115936
2021-06-20 02:51:13 | INFO | train_inner | epoch 004: 1139 / 3002 loss=2.608, ppl=6.09, wps=5850.5, ups=0.09, wpb=64827, bsz=128, num_updates=10082, lr=9.99273e-05, gnorm=2.129, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115947
2021-06-20 02:51:23 | INFO | train_inner | epoch 004: 1140 / 3002 loss=2.49, ppl=5.62, wps=6022.3, ups=0.09, wpb=64793, bsz=128, num_updates=10083, lr=9.99273e-05, gnorm=2.089, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115958
2021-06-20 02:51:35 | INFO | train_inner | epoch 004: 1141 / 3002 loss=2.637, ppl=6.22, wps=5822.2, ups=0.09, wpb=64745, bsz=128, num_updates=10084, lr=9.99273e-05, gnorm=2.208, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115969
2021-06-20 02:51:46 | INFO | train_inner | epoch 004: 1142 / 3002 loss=2.66, ppl=6.32, wps=5917.4, ups=0.09, wpb=64901, bsz=128, num_updates=10085, lr=9.99273e-05, gnorm=2.104, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=115980
2021-06-20 02:51:56 | INFO | train_inner | epoch 004: 1143 / 3002 loss=2.44, ppl=5.43, wps=5946.2, ups=0.09, wpb=64826, bsz=128, num_updates=10086, lr=9.99273e-05, gnorm=2.088, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=115991
2021-06-20 02:52:07 | INFO | train_inner | epoch 004: 1144 / 3002 loss=2.613, ppl=6.12, wps=5865.3, ups=0.09, wpb=64805, bsz=128, num_updates=10087, lr=9.99273e-05, gnorm=2.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116002
2021-06-20 02:52:18 | INFO | train_inner | epoch 004: 1145 / 3002 loss=2.622, ppl=6.16, wps=5888.9, ups=0.09, wpb=64726, bsz=128, num_updates=10088, lr=9.99273e-05, gnorm=2.033, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116013
2021-06-20 02:52:29 | INFO | train_inner | epoch 004: 1146 / 3002 loss=2.581, ppl=5.99, wps=5887, ups=0.09, wpb=64787, bsz=128, num_updates=10089, lr=9.99273e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116024
2021-06-20 02:52:41 | INFO | train_inner | epoch 004: 1147 / 3002 loss=2.529, ppl=5.77, wps=5817.6, ups=0.09, wpb=64828, bsz=128, num_updates=10090, lr=9.99273e-05, gnorm=2.222, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116035
2021-06-20 02:52:52 | INFO | train_inner | epoch 004: 1148 / 3002 loss=2.596, ppl=6.04, wps=5876.3, ups=0.09, wpb=64844, bsz=128, num_updates=10091, lr=9.99273e-05, gnorm=2.116, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116046
2021-06-20 02:53:03 | INFO | train_inner | epoch 004: 1149 / 3002 loss=2.534, ppl=5.79, wps=5912.3, ups=0.09, wpb=64832, bsz=128, num_updates=10092, lr=9.99273e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116057
2021-06-20 02:53:14 | INFO | train_inner | epoch 004: 1150 / 3002 loss=2.436, ppl=5.41, wps=5797.6, ups=0.09, wpb=64799, bsz=128, num_updates=10093, lr=9.99273e-05, gnorm=1.998, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116068
2021-06-20 02:53:25 | INFO | train_inner | epoch 004: 1151 / 3002 loss=2.603, ppl=6.07, wps=5810.5, ups=0.09, wpb=64811, bsz=128, num_updates=10094, lr=9.99272e-05, gnorm=2.075, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116079
2021-06-20 02:53:36 | INFO | train_inner | epoch 004: 1152 / 3002 loss=2.435, ppl=5.41, wps=5756.2, ups=0.09, wpb=64809, bsz=128, num_updates=10095, lr=9.99272e-05, gnorm=1.991, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116091
2021-06-20 02:53:47 | INFO | train_inner | epoch 004: 1153 / 3002 loss=2.555, ppl=5.87, wps=5828.5, ups=0.09, wpb=64825, bsz=128, num_updates=10096, lr=9.99272e-05, gnorm=1.953, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116102
2021-06-20 02:53:58 | INFO | train_inner | epoch 004: 1154 / 3002 loss=2.724, ppl=6.6, wps=5920.4, ups=0.09, wpb=64778, bsz=128, num_updates=10097, lr=9.99272e-05, gnorm=2.021, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116113
2021-06-20 02:54:10 | INFO | train_inner | epoch 004: 1155 / 3002 loss=2.668, ppl=6.36, wps=5724.5, ups=0.09, wpb=64740, bsz=128, num_updates=10098, lr=9.99272e-05, gnorm=2.038, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116124
2021-06-20 02:54:21 | INFO | train_inner | epoch 004: 1156 / 3002 loss=2.562, ppl=5.91, wps=5913, ups=0.09, wpb=64905, bsz=128, num_updates=10099, lr=9.99272e-05, gnorm=2.145, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116135
2021-06-20 02:54:32 | INFO | train_inner | epoch 004: 1157 / 3002 loss=2.638, ppl=6.22, wps=5874.7, ups=0.09, wpb=64780, bsz=128, num_updates=10100, lr=9.99272e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116146
2021-06-20 02:54:43 | INFO | train_inner | epoch 004: 1158 / 3002 loss=2.619, ppl=6.15, wps=5769.4, ups=0.09, wpb=64803, bsz=128, num_updates=10101, lr=9.99272e-05, gnorm=2.071, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116157
2021-06-20 02:54:54 | INFO | train_inner | epoch 004: 1159 / 3002 loss=2.569, ppl=5.93, wps=5735.5, ups=0.09, wpb=64805, bsz=128, num_updates=10102, lr=9.99272e-05, gnorm=2.129, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116168
2021-06-20 02:55:05 | INFO | train_inner | epoch 004: 1160 / 3002 loss=2.554, ppl=5.87, wps=5895.9, ups=0.09, wpb=64767, bsz=128, num_updates=10103, lr=9.99272e-05, gnorm=2.068, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116179
2021-06-20 02:55:16 | INFO | train_inner | epoch 004: 1161 / 3002 loss=2.712, ppl=6.55, wps=5901.6, ups=0.09, wpb=64863, bsz=128, num_updates=10104, lr=9.99272e-05, gnorm=2.155, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116190
2021-06-20 02:55:27 | INFO | train_inner | epoch 004: 1162 / 3002 loss=2.542, ppl=5.83, wps=5827.4, ups=0.09, wpb=64855, bsz=128, num_updates=10105, lr=9.99272e-05, gnorm=2.038, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116202
2021-06-20 02:55:38 | INFO | train_inner | epoch 004: 1163 / 3002 loss=2.511, ppl=5.7, wps=5847.4, ups=0.09, wpb=64879, bsz=128, num_updates=10106, lr=9.99271e-05, gnorm=1.946, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116213
2021-06-20 02:55:49 | INFO | train_inner | epoch 004: 1164 / 3002 loss=2.518, ppl=5.73, wps=5844.2, ups=0.09, wpb=64871, bsz=128, num_updates=10107, lr=9.99271e-05, gnorm=9.294, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116224
2021-06-20 02:56:00 | INFO | train_inner | epoch 004: 1165 / 3002 loss=2.576, ppl=5.96, wps=5936.8, ups=0.09, wpb=64825, bsz=128, num_updates=10108, lr=9.99271e-05, gnorm=2.028, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116235
2021-06-20 02:56:12 | INFO | train_inner | epoch 004: 1166 / 3002 loss=2.524, ppl=5.75, wps=5735.8, ups=0.09, wpb=64796, bsz=128, num_updates=10109, lr=9.99271e-05, gnorm=2.107, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116246
2021-06-20 02:56:23 | INFO | train_inner | epoch 004: 1167 / 3002 loss=2.585, ppl=6, wps=5894.1, ups=0.09, wpb=64866, bsz=128, num_updates=10110, lr=9.99271e-05, gnorm=2.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116257
2021-06-20 02:56:34 | INFO | train_inner | epoch 004: 1168 / 3002 loss=2.759, ppl=6.77, wps=5893.7, ups=0.09, wpb=64825, bsz=128, num_updates=10111, lr=9.99271e-05, gnorm=2.174, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116268
2021-06-20 02:56:45 | INFO | train_inner | epoch 004: 1169 / 3002 loss=2.5, ppl=5.66, wps=5894.5, ups=0.09, wpb=64957, bsz=128, num_updates=10112, lr=9.99271e-05, gnorm=2.098, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116279
2021-06-20 02:56:56 | INFO | train_inner | epoch 004: 1170 / 3002 loss=2.695, ppl=6.48, wps=5753, ups=0.09, wpb=64778, bsz=128, num_updates=10113, lr=9.99271e-05, gnorm=2.121, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116290
2021-06-20 02:57:07 | INFO | train_inner | epoch 004: 1171 / 3002 loss=2.66, ppl=6.32, wps=5792.8, ups=0.09, wpb=64841, bsz=128, num_updates=10114, lr=9.99271e-05, gnorm=2.056, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116301
2021-06-20 02:57:18 | INFO | train_inner | epoch 004: 1172 / 3002 loss=2.549, ppl=5.85, wps=6022.4, ups=0.09, wpb=64865, bsz=128, num_updates=10115, lr=9.99271e-05, gnorm=1.971, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116312
2021-06-20 02:57:29 | INFO | train_inner | epoch 004: 1173 / 3002 loss=2.692, ppl=6.46, wps=5788.8, ups=0.09, wpb=64655, bsz=128, num_updates=10116, lr=9.99271e-05, gnorm=2.133, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116323
2021-06-20 02:57:40 | INFO | train_inner | epoch 004: 1174 / 3002 loss=2.528, ppl=5.77, wps=5794.9, ups=0.09, wpb=64858, bsz=128, num_updates=10117, lr=9.99271e-05, gnorm=2.094, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116335
2021-06-20 02:57:51 | INFO | train_inner | epoch 004: 1175 / 3002 loss=2.566, ppl=5.92, wps=5881.6, ups=0.09, wpb=64886, bsz=128, num_updates=10118, lr=9.99271e-05, gnorm=2.034, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116346
2021-06-20 02:58:02 | INFO | train_inner | epoch 004: 1176 / 3002 loss=2.754, ppl=6.74, wps=5837.3, ups=0.09, wpb=64807, bsz=128, num_updates=10119, lr=9.9927e-05, gnorm=2.423, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116357
2021-06-20 02:58:13 | INFO | train_inner | epoch 004: 1177 / 3002 loss=2.588, ppl=6.01, wps=5899.9, ups=0.09, wpb=64959, bsz=128, num_updates=10120, lr=9.9927e-05, gnorm=2.083, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116368
2021-06-20 02:58:25 | INFO | train_inner | epoch 004: 1178 / 3002 loss=2.595, ppl=6.04, wps=5729.2, ups=0.09, wpb=64881, bsz=128, num_updates=10121, lr=9.9927e-05, gnorm=2.23, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116379
2021-06-20 02:58:36 | INFO | train_inner | epoch 004: 1179 / 3002 loss=2.55, ppl=5.86, wps=5843.3, ups=0.09, wpb=64885, bsz=128, num_updates=10122, lr=9.9927e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116390
2021-06-20 02:58:47 | INFO | train_inner | epoch 004: 1180 / 3002 loss=2.675, ppl=6.38, wps=5799.4, ups=0.09, wpb=64763, bsz=128, num_updates=10123, lr=9.9927e-05, gnorm=2.103, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116401
2021-06-20 02:58:58 | INFO | train_inner | epoch 004: 1181 / 3002 loss=2.555, ppl=5.88, wps=5830.2, ups=0.09, wpb=64864, bsz=128, num_updates=10124, lr=9.9927e-05, gnorm=2.255, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116412
2021-06-20 02:59:09 | INFO | train_inner | epoch 004: 1182 / 3002 loss=2.564, ppl=5.91, wps=5944, ups=0.09, wpb=64937, bsz=128, num_updates=10125, lr=9.9927e-05, gnorm=2.019, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116423
2021-06-20 02:59:20 | INFO | train_inner | epoch 004: 1183 / 3002 loss=2.581, ppl=5.98, wps=5727.8, ups=0.09, wpb=64871, bsz=128, num_updates=10126, lr=9.9927e-05, gnorm=2.087, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116435
2021-06-20 02:59:31 | INFO | train_inner | epoch 004: 1184 / 3002 loss=2.644, ppl=6.25, wps=5834.3, ups=0.09, wpb=64853, bsz=128, num_updates=10127, lr=9.9927e-05, gnorm=2.305, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116446
2021-06-20 02:59:43 | INFO | train_inner | epoch 004: 1185 / 3002 loss=2.566, ppl=5.92, wps=5797.5, ups=0.09, wpb=64797, bsz=128, num_updates=10128, lr=9.9927e-05, gnorm=3.303, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116457
2021-06-20 02:59:54 | INFO | train_inner | epoch 004: 1186 / 3002 loss=2.681, ppl=6.41, wps=5905.2, ups=0.09, wpb=64803, bsz=128, num_updates=10129, lr=9.9927e-05, gnorm=2.133, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116468
2021-06-20 03:00:05 | INFO | train_inner | epoch 004: 1187 / 3002 loss=2.669, ppl=6.36, wps=5795.1, ups=0.09, wpb=64825, bsz=128, num_updates=10130, lr=9.9927e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116479
2021-06-20 03:00:16 | INFO | train_inner | epoch 004: 1188 / 3002 loss=2.598, ppl=6.05, wps=5823.8, ups=0.09, wpb=64781, bsz=128, num_updates=10131, lr=9.99269e-05, gnorm=2.005, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116490
2021-06-20 03:00:27 | INFO | train_inner | epoch 004: 1189 / 3002 loss=2.547, ppl=5.84, wps=5788.6, ups=0.09, wpb=64790, bsz=128, num_updates=10132, lr=9.99269e-05, gnorm=2.062, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116501
2021-06-20 03:00:38 | INFO | train_inner | epoch 004: 1190 / 3002 loss=2.559, ppl=5.89, wps=5934, ups=0.09, wpb=64865, bsz=128, num_updates=10133, lr=9.99269e-05, gnorm=2.179, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116512
2021-06-20 03:00:49 | INFO | train_inner | epoch 004: 1191 / 3002 loss=2.515, ppl=5.72, wps=5703.1, ups=0.09, wpb=64816, bsz=128, num_updates=10134, lr=9.99269e-05, gnorm=2.116, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116524
2021-06-20 03:01:01 | INFO | train_inner | epoch 004: 1192 / 3002 loss=2.495, ppl=5.64, wps=5796.3, ups=0.09, wpb=64805, bsz=128, num_updates=10135, lr=9.99269e-05, gnorm=2.201, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116535
2021-06-20 03:01:12 | INFO | train_inner | epoch 004: 1193 / 3002 loss=2.526, ppl=5.76, wps=5813.5, ups=0.09, wpb=64846, bsz=128, num_updates=10136, lr=9.99269e-05, gnorm=2.167, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116546
2021-06-20 03:01:23 | INFO | train_inner | epoch 004: 1194 / 3002 loss=2.576, ppl=5.96, wps=5822, ups=0.09, wpb=64797, bsz=128, num_updates=10137, lr=9.99269e-05, gnorm=2.003, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116557
2021-06-20 03:01:34 | INFO | train_inner | epoch 004: 1195 / 3002 loss=2.602, ppl=6.07, wps=5839.8, ups=0.09, wpb=64844, bsz=128, num_updates=10138, lr=9.99269e-05, gnorm=2.049, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116568
2021-06-20 03:01:45 | INFO | train_inner | epoch 004: 1196 / 3002 loss=2.696, ppl=6.48, wps=5738.7, ups=0.09, wpb=64775, bsz=128, num_updates=10139, lr=9.99269e-05, gnorm=2.044, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116580
2021-06-20 03:01:56 | INFO | train_inner | epoch 004: 1197 / 3002 loss=2.558, ppl=5.89, wps=5788.8, ups=0.09, wpb=64751, bsz=128, num_updates=10140, lr=9.99269e-05, gnorm=2.093, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116591
2021-06-20 03:02:07 | INFO | train_inner | epoch 004: 1198 / 3002 loss=2.675, ppl=6.39, wps=5913.4, ups=0.09, wpb=64895, bsz=128, num_updates=10141, lr=9.99269e-05, gnorm=2.28, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116602
2021-06-20 03:02:19 | INFO | train_inner | epoch 004: 1199 / 3002 loss=2.576, ppl=5.96, wps=5824.9, ups=0.09, wpb=64802, bsz=128, num_updates=10142, lr=9.99269e-05, gnorm=2.093, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116613
2021-06-20 03:02:30 | INFO | train_inner | epoch 004: 1200 / 3002 loss=2.621, ppl=6.15, wps=5813.9, ups=0.09, wpb=64894, bsz=128, num_updates=10143, lr=9.99269e-05, gnorm=2.116, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116624
2021-06-20 03:02:41 | INFO | train_inner | epoch 004: 1201 / 3002 loss=2.704, ppl=6.51, wps=5847, ups=0.09, wpb=64868, bsz=128, num_updates=10144, lr=9.99268e-05, gnorm=2.4, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116635
2021-06-20 03:02:52 | INFO | train_inner | epoch 004: 1202 / 3002 loss=2.547, ppl=5.84, wps=5921.3, ups=0.09, wpb=64845, bsz=128, num_updates=10145, lr=9.99268e-05, gnorm=2.124, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116646
2021-06-20 03:03:03 | INFO | train_inner | epoch 004: 1203 / 3002 loss=2.556, ppl=5.88, wps=5775.1, ups=0.09, wpb=64841, bsz=128, num_updates=10146, lr=9.99268e-05, gnorm=2.182, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116657
2021-06-20 03:03:14 | INFO | train_inner | epoch 004: 1204 / 3002 loss=2.566, ppl=5.92, wps=5885.5, ups=0.09, wpb=64834, bsz=128, num_updates=10147, lr=9.99268e-05, gnorm=2.089, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116668
2021-06-20 03:03:25 | INFO | train_inner | epoch 004: 1205 / 3002 loss=2.457, ppl=5.49, wps=5801.3, ups=0.09, wpb=64794, bsz=128, num_updates=10148, lr=9.99268e-05, gnorm=2.037, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116680
2021-06-20 03:03:36 | INFO | train_inner | epoch 004: 1206 / 3002 loss=2.512, ppl=5.7, wps=5886.1, ups=0.09, wpb=64933, bsz=128, num_updates=10149, lr=9.99268e-05, gnorm=2.125, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116691
2021-06-20 03:03:47 | INFO | train_inner | epoch 004: 1207 / 3002 loss=2.512, ppl=5.7, wps=5797.2, ups=0.09, wpb=64790, bsz=128, num_updates=10150, lr=9.99268e-05, gnorm=1.983, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116702
2021-06-20 03:03:59 | INFO | train_inner | epoch 004: 1208 / 3002 loss=2.551, ppl=5.86, wps=5757, ups=0.09, wpb=64854, bsz=128, num_updates=10151, lr=9.99268e-05, gnorm=2.052, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116713
2021-06-20 03:04:10 | INFO | train_inner | epoch 004: 1209 / 3002 loss=2.589, ppl=6.02, wps=5881.6, ups=0.09, wpb=64799, bsz=128, num_updates=10152, lr=9.99268e-05, gnorm=2.106, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116724
2021-06-20 03:04:21 | INFO | train_inner | epoch 004: 1210 / 3002 loss=2.621, ppl=6.15, wps=5838.1, ups=0.09, wpb=64913, bsz=128, num_updates=10153, lr=9.99268e-05, gnorm=2.193, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116735
2021-06-20 03:04:32 | INFO | train_inner | epoch 004: 1211 / 3002 loss=2.504, ppl=5.67, wps=5828.3, ups=0.09, wpb=64787, bsz=128, num_updates=10154, lr=9.99268e-05, gnorm=1.986, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116746
2021-06-20 03:04:43 | INFO | train_inner | epoch 004: 1212 / 3002 loss=2.595, ppl=6.04, wps=5939, ups=0.09, wpb=64811, bsz=128, num_updates=10155, lr=9.99268e-05, gnorm=2.113, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116757
2021-06-20 03:04:54 | INFO | train_inner | epoch 004: 1213 / 3002 loss=2.476, ppl=5.56, wps=5755.9, ups=0.09, wpb=64859, bsz=128, num_updates=10156, lr=9.99267e-05, gnorm=4.463, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116768
2021-06-20 03:05:05 | INFO | train_inner | epoch 004: 1214 / 3002 loss=2.628, ppl=6.18, wps=5848.9, ups=0.09, wpb=64802, bsz=128, num_updates=10157, lr=9.99267e-05, gnorm=2.085, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116780
2021-06-20 03:05:16 | INFO | train_inner | epoch 004: 1215 / 3002 loss=2.444, ppl=5.44, wps=5796, ups=0.09, wpb=64899, bsz=128, num_updates=10158, lr=9.99267e-05, gnorm=1.972, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116791
2021-06-20 03:05:27 | INFO | train_inner | epoch 004: 1216 / 3002 loss=2.645, ppl=6.26, wps=5923.9, ups=0.09, wpb=64925, bsz=128, num_updates=10159, lr=9.99267e-05, gnorm=2.077, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116802
2021-06-20 03:05:38 | INFO | train_inner | epoch 004: 1217 / 3002 loss=2.443, ppl=5.44, wps=5863.8, ups=0.09, wpb=64865, bsz=128, num_updates=10160, lr=9.99267e-05, gnorm=2.047, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116813
2021-06-20 03:05:50 | INFO | train_inner | epoch 004: 1218 / 3002 loss=2.506, ppl=5.68, wps=5755, ups=0.09, wpb=64783, bsz=128, num_updates=10161, lr=9.99267e-05, gnorm=2.309, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116824
2021-06-20 03:06:01 | INFO | train_inner | epoch 004: 1219 / 3002 loss=2.528, ppl=5.77, wps=5781.9, ups=0.09, wpb=64812, bsz=128, num_updates=10162, lr=9.99267e-05, gnorm=9.636, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116835
2021-06-20 03:06:12 | INFO | train_inner | epoch 004: 1220 / 3002 loss=2.519, ppl=5.73, wps=6002.3, ups=0.09, wpb=64901, bsz=128, num_updates=10163, lr=9.99267e-05, gnorm=17.12, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116846
2021-06-20 03:06:23 | INFO | train_inner | epoch 004: 1221 / 3002 loss=2.556, ppl=5.88, wps=5865.8, ups=0.09, wpb=64865, bsz=128, num_updates=10164, lr=9.99267e-05, gnorm=1.957, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116857
2021-06-20 03:06:34 | INFO | train_inner | epoch 004: 1222 / 3002 loss=2.661, ppl=6.32, wps=5888.1, ups=0.09, wpb=64841, bsz=128, num_updates=10165, lr=9.99267e-05, gnorm=2.188, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116868
2021-06-20 03:06:45 | INFO | train_inner | epoch 004: 1223 / 3002 loss=2.653, ppl=6.29, wps=5927.6, ups=0.09, wpb=64884, bsz=128, num_updates=10166, lr=9.99267e-05, gnorm=2.139, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116879
2021-06-20 03:06:56 | INFO | train_inner | epoch 004: 1224 / 3002 loss=2.635, ppl=6.21, wps=5979.2, ups=0.09, wpb=64842, bsz=128, num_updates=10167, lr=9.99267e-05, gnorm=2.037, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116890
2021-06-20 03:07:07 | INFO | train_inner | epoch 004: 1225 / 3002 loss=2.564, ppl=5.91, wps=5865.5, ups=0.09, wpb=64889, bsz=128, num_updates=10168, lr=9.99267e-05, gnorm=2.04, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116901
2021-06-20 03:07:17 | INFO | train_inner | epoch 004: 1226 / 3002 loss=2.603, ppl=6.08, wps=5968.1, ups=0.09, wpb=64838, bsz=128, num_updates=10169, lr=9.99266e-05, gnorm=2.099, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=116912
2021-06-20 03:07:29 | INFO | train_inner | epoch 004: 1227 / 3002 loss=2.611, ppl=6.11, wps=5815.9, ups=0.09, wpb=64709, bsz=128, num_updates=10170, lr=9.99266e-05, gnorm=2.071, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116923
2021-06-20 03:07:40 | INFO | train_inner | epoch 004: 1228 / 3002 loss=2.667, ppl=6.35, wps=5803.1, ups=0.09, wpb=64916, bsz=128, num_updates=10171, lr=9.99266e-05, gnorm=2.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116934
2021-06-20 03:07:51 | INFO | train_inner | epoch 004: 1229 / 3002 loss=2.643, ppl=6.25, wps=5838.2, ups=0.09, wpb=64859, bsz=128, num_updates=10172, lr=9.99266e-05, gnorm=2.115, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116945
2021-06-20 03:08:02 | INFO | train_inner | epoch 004: 1230 / 3002 loss=2.626, ppl=6.17, wps=5885.4, ups=0.09, wpb=64860, bsz=128, num_updates=10173, lr=9.99266e-05, gnorm=2.043, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116956
2021-06-20 03:08:13 | INFO | train_inner | epoch 004: 1231 / 3002 loss=2.535, ppl=5.79, wps=5878.5, ups=0.09, wpb=64839, bsz=128, num_updates=10174, lr=9.99266e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116967
2021-06-20 03:08:24 | INFO | train_inner | epoch 004: 1232 / 3002 loss=2.601, ppl=6.07, wps=5903.8, ups=0.09, wpb=64824, bsz=128, num_updates=10175, lr=9.99266e-05, gnorm=2.168, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116978
2021-06-20 03:08:35 | INFO | train_inner | epoch 004: 1233 / 3002 loss=2.54, ppl=5.82, wps=5877.3, ups=0.09, wpb=64796, bsz=128, num_updates=10176, lr=9.99266e-05, gnorm=2.017, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=116989
2021-06-20 03:08:46 | INFO | train_inner | epoch 004: 1234 / 3002 loss=2.556, ppl=5.88, wps=5926.1, ups=0.09, wpb=64768, bsz=128, num_updates=10177, lr=9.99266e-05, gnorm=2.076, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=117000
2021-06-20 03:08:57 | INFO | train_inner | epoch 004: 1235 / 3002 loss=2.517, ppl=5.73, wps=5793.4, ups=0.09, wpb=64805, bsz=128, num_updates=10178, lr=9.99266e-05, gnorm=2.019, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117011
2021-06-20 03:09:08 | INFO | train_inner | epoch 004: 1236 / 3002 loss=2.563, ppl=5.91, wps=5875, ups=0.09, wpb=64851, bsz=128, num_updates=10179, lr=9.99266e-05, gnorm=2.13, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117022
2021-06-20 03:09:19 | INFO | train_inner | epoch 004: 1237 / 3002 loss=2.748, ppl=6.72, wps=5927.2, ups=0.09, wpb=64811, bsz=128, num_updates=10180, lr=9.99266e-05, gnorm=2.141, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=117033
2021-06-20 03:09:30 | INFO | train_inner | epoch 004: 1238 / 3002 loss=2.77, ppl=6.82, wps=5853, ups=0.09, wpb=64767, bsz=128, num_updates=10181, lr=9.99265e-05, gnorm=2.138, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117044
2021-06-20 03:09:41 | INFO | train_inner | epoch 004: 1239 / 3002 loss=2.601, ppl=6.07, wps=5828.9, ups=0.09, wpb=64854, bsz=128, num_updates=10182, lr=9.99265e-05, gnorm=2.051, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117056
2021-06-20 03:09:52 | INFO | train_inner | epoch 004: 1240 / 3002 loss=2.86, ppl=7.26, wps=5877.2, ups=0.09, wpb=64893, bsz=128, num_updates=10183, lr=9.99265e-05, gnorm=2.228, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117067
2021-06-20 03:10:03 | INFO | train_inner | epoch 004: 1241 / 3002 loss=2.641, ppl=6.24, wps=5801.3, ups=0.09, wpb=64816, bsz=128, num_updates=10184, lr=9.99265e-05, gnorm=2.036, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117078
2021-06-20 03:10:14 | INFO | train_inner | epoch 004: 1242 / 3002 loss=2.531, ppl=5.78, wps=5869.2, ups=0.09, wpb=64825, bsz=128, num_updates=10185, lr=9.99265e-05, gnorm=2.741, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117089
2021-06-20 03:10:26 | INFO | train_inner | epoch 004: 1243 / 3002 loss=2.733, ppl=6.65, wps=5869.5, ups=0.09, wpb=64777, bsz=128, num_updates=10186, lr=9.99265e-05, gnorm=2.263, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117100
2021-06-20 03:10:37 | INFO | train_inner | epoch 004: 1244 / 3002 loss=2.55, ppl=5.86, wps=5693.7, ups=0.09, wpb=64839, bsz=128, num_updates=10187, lr=9.99265e-05, gnorm=2.064, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117111
2021-06-20 03:10:48 | INFO | train_inner | epoch 004: 1245 / 3002 loss=2.459, ppl=5.5, wps=5876.5, ups=0.09, wpb=64869, bsz=128, num_updates=10188, lr=9.99265e-05, gnorm=2.084, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117122
2021-06-20 03:10:59 | INFO | train_inner | epoch 004: 1246 / 3002 loss=2.485, ppl=5.6, wps=5821, ups=0.09, wpb=64780, bsz=128, num_updates=10189, lr=9.99265e-05, gnorm=2.155, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117133
2021-06-20 03:11:10 | INFO | train_inner | epoch 004: 1247 / 3002 loss=2.638, ppl=6.23, wps=5794, ups=0.09, wpb=64731, bsz=128, num_updates=10190, lr=9.99265e-05, gnorm=1.988, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117145
2021-06-20 03:11:22 | INFO | train_inner | epoch 004: 1248 / 3002 loss=2.611, ppl=6.11, wps=5659, ups=0.09, wpb=64838, bsz=128, num_updates=10191, lr=9.99265e-05, gnorm=1.98, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117156
2021-06-20 03:11:33 | INFO | train_inner | epoch 004: 1249 / 3002 loss=2.623, ppl=6.16, wps=5837.1, ups=0.09, wpb=64820, bsz=128, num_updates=10192, lr=9.99265e-05, gnorm=2.155, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117167
2021-06-20 03:11:44 | INFO | train_inner | epoch 004: 1250 / 3002 loss=2.635, ppl=6.21, wps=5871.8, ups=0.09, wpb=64893, bsz=128, num_updates=10193, lr=9.99265e-05, gnorm=2.062, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117178
2021-06-20 03:11:55 | INFO | train_inner | epoch 004: 1251 / 3002 loss=2.623, ppl=6.16, wps=5917.6, ups=0.09, wpb=64819, bsz=128, num_updates=10194, lr=9.99264e-05, gnorm=2.193, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117189
2021-06-20 03:12:06 | INFO | train_inner | epoch 004: 1252 / 3002 loss=2.728, ppl=6.63, wps=5929, ups=0.09, wpb=64866, bsz=128, num_updates=10195, lr=9.99264e-05, gnorm=2.121, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=117200
2021-06-20 03:12:17 | INFO | train_inner | epoch 004: 1253 / 3002 loss=2.566, ppl=5.92, wps=5782.9, ups=0.09, wpb=64810, bsz=128, num_updates=10196, lr=9.99264e-05, gnorm=2.114, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=117211
2021-06-20 03:12:28 | INFO | train_inner | epoch 004: 1254 / 3002 loss=2.287, ppl=4.88, wps=5885, ups=0.09, wpb=64890, bsz=128, num_updates=10197, lr=9.99264e-05, gnorm=2.216, loss_scale=1, train_wall=11, gb_free=2.8, wall=117222
2021-06-20 03:12:39 | INFO | train_inner | epoch 004: 1255 / 3002 loss=2.53, ppl=5.77, wps=5951.4, ups=0.09, wpb=64823, bsz=128, num_updates=10198, lr=9.99264e-05, gnorm=2.172, loss_scale=1, train_wall=10, gb_free=2.8, wall=117233
2021-06-20 03:12:50 | INFO | train_inner | epoch 004: 1256 / 3002 loss=2.529, ppl=5.77, wps=5890.4, ups=0.09, wpb=64884, bsz=128, num_updates=10199, lr=9.99264e-05, gnorm=2.134, loss_scale=1, train_wall=11, gb_free=2.8, wall=117244
2021-06-20 03:13:01 | INFO | train_inner | epoch 004: 1257 / 3002 loss=2.698, ppl=6.49, wps=5853.1, ups=0.09, wpb=64776, bsz=128, num_updates=10200, lr=9.99264e-05, gnorm=2.58, loss_scale=1, train_wall=11, gb_free=2.8, wall=117255
2021-06-20 03:13:12 | INFO | train_inner | epoch 004: 1258 / 3002 loss=2.542, ppl=5.82, wps=5833.1, ups=0.09, wpb=64911, bsz=128, num_updates=10201, lr=9.99264e-05, gnorm=2.227, loss_scale=1, train_wall=11, gb_free=2.8, wall=117266
2021-06-20 03:13:23 | INFO | train_inner | epoch 004: 1259 / 3002 loss=2.427, ppl=5.38, wps=5883.3, ups=0.09, wpb=64797, bsz=128, num_updates=10202, lr=9.99264e-05, gnorm=2.018, loss_scale=1, train_wall=11, gb_free=2.8, wall=117277
2021-06-20 03:13:34 | INFO | train_inner | epoch 004: 1260 / 3002 loss=2.556, ppl=5.88, wps=5995.4, ups=0.09, wpb=64812, bsz=128, num_updates=10203, lr=9.99264e-05, gnorm=1.971, loss_scale=1, train_wall=10, gb_free=2.8, wall=117288
2021-06-20 03:13:45 | INFO | train_inner | epoch 004: 1261 / 3002 loss=2.625, ppl=6.17, wps=5955.2, ups=0.09, wpb=64914, bsz=128, num_updates=10204, lr=9.99264e-05, gnorm=2.059, loss_scale=1, train_wall=10, gb_free=2.8, wall=117299
2021-06-20 03:13:56 | INFO | train_inner | epoch 004: 1262 / 3002 loss=2.653, ppl=6.29, wps=5908.7, ups=0.09, wpb=64850, bsz=128, num_updates=10205, lr=9.99264e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=117310
2021-06-20 03:14:07 | INFO | train_inner | epoch 004: 1263 / 3002 loss=2.505, ppl=5.68, wps=5961, ups=0.09, wpb=64843, bsz=128, num_updates=10206, lr=9.99263e-05, gnorm=2.076, loss_scale=1, train_wall=10, gb_free=2.8, wall=117321
2021-06-20 03:14:18 | INFO | train_inner | epoch 004: 1264 / 3002 loss=2.596, ppl=6.05, wps=5933.4, ups=0.09, wpb=64892, bsz=128, num_updates=10207, lr=9.99263e-05, gnorm=2.143, loss_scale=1, train_wall=10, gb_free=2.8, wall=117332
2021-06-20 03:14:29 | INFO | train_inner | epoch 004: 1265 / 3002 loss=2.493, ppl=5.63, wps=5807.8, ups=0.09, wpb=64826, bsz=128, num_updates=10208, lr=9.99263e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=117343
2021-06-20 03:14:40 | INFO | train_inner | epoch 004: 1266 / 3002 loss=2.575, ppl=5.96, wps=5913.1, ups=0.09, wpb=64821, bsz=128, num_updates=10209, lr=9.99263e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=117354
2021-06-20 03:14:51 | INFO | train_inner | epoch 004: 1267 / 3002 loss=2.637, ppl=6.22, wps=5957.2, ups=0.09, wpb=64799, bsz=128, num_updates=10210, lr=9.99263e-05, gnorm=1.996, loss_scale=1, train_wall=10, gb_free=2.8, wall=117365
2021-06-20 03:15:02 | INFO | train_inner | epoch 004: 1268 / 3002 loss=2.492, ppl=5.63, wps=5914.7, ups=0.09, wpb=64826, bsz=128, num_updates=10211, lr=9.99263e-05, gnorm=2.011, loss_scale=1, train_wall=11, gb_free=2.8, wall=117376
2021-06-20 03:15:13 | INFO | train_inner | epoch 004: 1269 / 3002 loss=2.561, ppl=5.9, wps=5705.2, ups=0.09, wpb=64790, bsz=128, num_updates=10212, lr=9.99263e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=117387
2021-06-20 03:15:24 | INFO | train_inner | epoch 004: 1270 / 3002 loss=2.579, ppl=5.98, wps=5856.1, ups=0.09, wpb=64855, bsz=128, num_updates=10213, lr=9.99263e-05, gnorm=2.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=117398
2021-06-20 03:15:35 | INFO | train_inner | epoch 004: 1271 / 3002 loss=2.635, ppl=6.21, wps=5727.3, ups=0.09, wpb=64840, bsz=128, num_updates=10214, lr=9.99263e-05, gnorm=2.042, loss_scale=1, train_wall=11, gb_free=2.8, wall=117410
2021-06-20 03:15:46 | INFO | train_inner | epoch 004: 1272 / 3002 loss=2.665, ppl=6.34, wps=5862.4, ups=0.09, wpb=64868, bsz=128, num_updates=10215, lr=9.99263e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=117421
2021-06-20 03:15:58 | INFO | train_inner | epoch 004: 1273 / 3002 loss=2.637, ppl=6.22, wps=5847.5, ups=0.09, wpb=64882, bsz=128, num_updates=10216, lr=9.99263e-05, gnorm=2.011, loss_scale=1, train_wall=11, gb_free=2.8, wall=117432
2021-06-20 03:16:09 | INFO | train_inner | epoch 004: 1274 / 3002 loss=2.685, ppl=6.43, wps=5894.2, ups=0.09, wpb=64869, bsz=128, num_updates=10217, lr=9.99263e-05, gnorm=2.159, loss_scale=1, train_wall=11, gb_free=2.8, wall=117443
2021-06-20 03:16:20 | INFO | train_inner | epoch 004: 1275 / 3002 loss=2.574, ppl=5.96, wps=5846.8, ups=0.09, wpb=64775, bsz=128, num_updates=10218, lr=9.99263e-05, gnorm=2.07, loss_scale=1, train_wall=11, gb_free=2.8, wall=117454
2021-06-20 03:16:31 | INFO | train_inner | epoch 004: 1276 / 3002 loss=2.461, ppl=5.51, wps=5896.7, ups=0.09, wpb=64808, bsz=128, num_updates=10219, lr=9.99262e-05, gnorm=2.097, loss_scale=1, train_wall=11, gb_free=2.8, wall=117465
2021-06-20 03:16:42 | INFO | train_inner | epoch 004: 1277 / 3002 loss=2.627, ppl=6.18, wps=5926.2, ups=0.09, wpb=64848, bsz=128, num_updates=10220, lr=9.99262e-05, gnorm=2.083, loss_scale=1, train_wall=10, gb_free=2.8, wall=117476
2021-06-20 03:16:52 | INFO | train_inner | epoch 004: 1278 / 3002 loss=2.659, ppl=6.31, wps=5996.4, ups=0.09, wpb=64902, bsz=128, num_updates=10221, lr=9.99262e-05, gnorm=2.072, loss_scale=1, train_wall=10, gb_free=2.8, wall=117487
2021-06-20 03:17:04 | INFO | train_inner | epoch 004: 1279 / 3002 loss=2.609, ppl=6.1, wps=5756, ups=0.09, wpb=64749, bsz=128, num_updates=10222, lr=9.99262e-05, gnorm=2.173, loss_scale=1, train_wall=11, gb_free=2.8, wall=117498
2021-06-20 03:17:15 | INFO | train_inner | epoch 004: 1280 / 3002 loss=2.714, ppl=6.56, wps=5821.7, ups=0.09, wpb=64915, bsz=128, num_updates=10223, lr=9.99262e-05, gnorm=8.619, loss_scale=1, train_wall=11, gb_free=2.8, wall=117509
2021-06-20 03:17:26 | INFO | train_inner | epoch 004: 1281 / 3002 loss=2.565, ppl=5.92, wps=5893.8, ups=0.09, wpb=64764, bsz=128, num_updates=10224, lr=9.99262e-05, gnorm=2.072, loss_scale=1, train_wall=11, gb_free=2.8, wall=117520
2021-06-20 03:17:37 | INFO | train_inner | epoch 004: 1282 / 3002 loss=2.575, ppl=5.96, wps=5947.4, ups=0.09, wpb=64821, bsz=128, num_updates=10225, lr=9.99262e-05, gnorm=2.182, loss_scale=1, train_wall=10, gb_free=2.8, wall=117531
2021-06-20 03:17:47 | INFO | train_inner | epoch 004: 1283 / 3002 loss=2.527, ppl=5.76, wps=5995.3, ups=0.09, wpb=64819, bsz=128, num_updates=10226, lr=9.99262e-05, gnorm=4.21, loss_scale=1, train_wall=10, gb_free=2.8, wall=117542
2021-06-20 03:17:58 | INFO | train_inner | epoch 004: 1284 / 3002 loss=2.625, ppl=6.17, wps=5865.5, ups=0.09, wpb=64811, bsz=128, num_updates=10227, lr=9.99262e-05, gnorm=2.029, loss_scale=1, train_wall=11, gb_free=2.8, wall=117553
2021-06-20 03:18:10 | INFO | train_inner | epoch 004: 1285 / 3002 loss=2.627, ppl=6.18, wps=5818.6, ups=0.09, wpb=64862, bsz=128, num_updates=10228, lr=9.99262e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=117564
2021-06-20 03:18:21 | INFO | train_inner | epoch 004: 1286 / 3002 loss=2.561, ppl=5.9, wps=5876.6, ups=0.09, wpb=64818, bsz=128, num_updates=10229, lr=9.99262e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=117575
2021-06-20 03:18:32 | INFO | train_inner | epoch 004: 1287 / 3002 loss=2.525, ppl=5.76, wps=5855.4, ups=0.09, wpb=64812, bsz=128, num_updates=10230, lr=9.99262e-05, gnorm=2.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=117586
2021-06-20 03:18:43 | INFO | train_inner | epoch 004: 1288 / 3002 loss=2.639, ppl=6.23, wps=5842.7, ups=0.09, wpb=64861, bsz=128, num_updates=10231, lr=9.99261e-05, gnorm=2.225, loss_scale=1, train_wall=11, gb_free=2.8, wall=117597
2021-06-20 03:18:54 | INFO | train_inner | epoch 004: 1289 / 3002 loss=2.566, ppl=5.92, wps=5898.7, ups=0.09, wpb=64705, bsz=128, num_updates=10232, lr=9.99261e-05, gnorm=3.387, loss_scale=1, train_wall=10, gb_free=2.8, wall=117608
2021-06-20 03:19:05 | INFO | train_inner | epoch 004: 1290 / 3002 loss=2.663, ppl=6.33, wps=6005, ups=0.09, wpb=64855, bsz=128, num_updates=10233, lr=9.99261e-05, gnorm=2.649, loss_scale=1, train_wall=10, gb_free=2.8, wall=117619
2021-06-20 03:19:16 | INFO | train_inner | epoch 004: 1291 / 3002 loss=2.807, ppl=7, wps=5939.6, ups=0.09, wpb=64865, bsz=128, num_updates=10234, lr=9.99261e-05, gnorm=2.178, loss_scale=1, train_wall=10, gb_free=2.8, wall=117630
2021-06-20 03:19:27 | INFO | train_inner | epoch 004: 1292 / 3002 loss=2.61, ppl=6.11, wps=5862.1, ups=0.09, wpb=64816, bsz=128, num_updates=10235, lr=9.99261e-05, gnorm=2.101, loss_scale=1, train_wall=11, gb_free=2.8, wall=117641
2021-06-20 03:19:38 | INFO | train_inner | epoch 004: 1293 / 3002 loss=2.739, ppl=6.67, wps=5933.9, ups=0.09, wpb=64826, bsz=128, num_updates=10236, lr=9.99261e-05, gnorm=8.649, loss_scale=1, train_wall=10, gb_free=2.8, wall=117652
2021-06-20 03:19:49 | INFO | train_inner | epoch 004: 1294 / 3002 loss=2.806, ppl=6.99, wps=5786.2, ups=0.09, wpb=64763, bsz=128, num_updates=10237, lr=9.99261e-05, gnorm=24.351, loss_scale=1, train_wall=11, gb_free=2.8, wall=117663
2021-06-20 03:20:00 | INFO | train_inner | epoch 004: 1295 / 3002 loss=2.549, ppl=5.85, wps=5841.3, ups=0.09, wpb=64762, bsz=128, num_updates=10238, lr=9.99261e-05, gnorm=3.349, loss_scale=1, train_wall=11, gb_free=2.8, wall=117674
2021-06-20 03:20:11 | INFO | train_inner | epoch 004: 1296 / 3002 loss=2.701, ppl=6.5, wps=5929.3, ups=0.09, wpb=64811, bsz=128, num_updates=10239, lr=9.99261e-05, gnorm=2.085, loss_scale=1, train_wall=10, gb_free=2.8, wall=117685
2021-06-20 03:20:22 | INFO | train_inner | epoch 004: 1297 / 3002 loss=2.646, ppl=6.26, wps=5793.4, ups=0.09, wpb=64917, bsz=128, num_updates=10240, lr=9.99261e-05, gnorm=2.096, loss_scale=1, train_wall=11, gb_free=2.8, wall=117696
2021-06-20 03:20:33 | INFO | train_inner | epoch 004: 1298 / 3002 loss=2.564, ppl=5.91, wps=5761.5, ups=0.09, wpb=64765, bsz=128, num_updates=10241, lr=9.99261e-05, gnorm=2.806, loss_scale=1, train_wall=11, gb_free=2.8, wall=117708
2021-06-20 03:20:44 | INFO | train_inner | epoch 004: 1299 / 3002 loss=2.559, ppl=5.89, wps=5921.9, ups=0.09, wpb=64877, bsz=128, num_updates=10242, lr=9.99261e-05, gnorm=2.198, loss_scale=1, train_wall=10, gb_free=2.8, wall=117718
2021-06-20 03:20:55 | INFO | train_inner | epoch 004: 1300 / 3002 loss=2.673, ppl=6.38, wps=5828.2, ups=0.09, wpb=64844, bsz=128, num_updates=10243, lr=9.99261e-05, gnorm=9.962, loss_scale=1, train_wall=11, gb_free=2.8, wall=117730
2021-06-20 03:21:06 | INFO | train_inner | epoch 004: 1301 / 3002 loss=2.613, ppl=6.12, wps=5817.8, ups=0.09, wpb=64710, bsz=128, num_updates=10244, lr=9.9926e-05, gnorm=2.347, loss_scale=1, train_wall=11, gb_free=2.8, wall=117741
2021-06-20 03:21:17 | INFO | train_inner | epoch 004: 1302 / 3002 loss=2.687, ppl=6.44, wps=5920.6, ups=0.09, wpb=64861, bsz=128, num_updates=10245, lr=9.9926e-05, gnorm=2.149, loss_scale=1, train_wall=11, gb_free=2.8, wall=117752
2021-06-20 03:21:29 | INFO | train_inner | epoch 004: 1303 / 3002 loss=2.621, ppl=6.15, wps=5783, ups=0.09, wpb=64847, bsz=128, num_updates=10246, lr=9.9926e-05, gnorm=2.501, loss_scale=1, train_wall=11, gb_free=2.8, wall=117763
2021-06-20 03:21:40 | INFO | train_inner | epoch 004: 1304 / 3002 loss=2.658, ppl=6.31, wps=5875.2, ups=0.09, wpb=64763, bsz=128, num_updates=10247, lr=9.9926e-05, gnorm=2.19, loss_scale=1, train_wall=11, gb_free=2.8, wall=117774
2021-06-20 03:21:51 | INFO | train_inner | epoch 004: 1305 / 3002 loss=2.475, ppl=5.56, wps=5780.1, ups=0.09, wpb=64841, bsz=128, num_updates=10248, lr=9.9926e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=117785
2021-06-20 03:22:02 | INFO | train_inner | epoch 004: 1306 / 3002 loss=2.662, ppl=6.33, wps=6034.4, ups=0.09, wpb=64817, bsz=128, num_updates=10249, lr=9.9926e-05, gnorm=2.098, loss_scale=1, train_wall=10, gb_free=2.8, wall=117796
2021-06-20 03:22:13 | INFO | train_inner | epoch 004: 1307 / 3002 loss=2.602, ppl=6.07, wps=5811.3, ups=0.09, wpb=64870, bsz=128, num_updates=10250, lr=9.9926e-05, gnorm=2.276, loss_scale=1, train_wall=11, gb_free=2.8, wall=117807
2021-06-20 03:22:24 | INFO | train_inner | epoch 004: 1308 / 3002 loss=2.558, ppl=5.89, wps=5878, ups=0.09, wpb=64853, bsz=128, num_updates=10251, lr=9.9926e-05, gnorm=2.167, loss_scale=1, train_wall=11, gb_free=2.8, wall=117818
2021-06-20 03:22:35 | INFO | train_inner | epoch 004: 1309 / 3002 loss=2.889, ppl=7.41, wps=5838.8, ups=0.09, wpb=64827, bsz=128, num_updates=10252, lr=9.9926e-05, gnorm=2.414, loss_scale=1, train_wall=11, gb_free=2.8, wall=117829
2021-06-20 03:22:46 | INFO | train_inner | epoch 004: 1310 / 3002 loss=2.541, ppl=5.82, wps=5940.2, ups=0.09, wpb=64863, bsz=128, num_updates=10253, lr=9.9926e-05, gnorm=2.211, loss_scale=1, train_wall=10, gb_free=2.8, wall=117840
2021-06-20 03:22:57 | INFO | train_inner | epoch 004: 1311 / 3002 loss=2.565, ppl=5.92, wps=5870.2, ups=0.09, wpb=64815, bsz=128, num_updates=10254, lr=9.9926e-05, gnorm=2.189, loss_scale=1, train_wall=11, gb_free=2.8, wall=117851
2021-06-20 03:23:08 | INFO | train_inner | epoch 004: 1312 / 3002 loss=2.579, ppl=5.98, wps=5772.9, ups=0.09, wpb=64784, bsz=128, num_updates=10255, lr=9.9926e-05, gnorm=2.318, loss_scale=1, train_wall=11, gb_free=2.8, wall=117862
2021-06-20 03:23:19 | INFO | train_inner | epoch 004: 1313 / 3002 loss=2.654, ppl=6.29, wps=5928.5, ups=0.09, wpb=64786, bsz=128, num_updates=10256, lr=9.99259e-05, gnorm=2.182, loss_scale=1, train_wall=10, gb_free=2.8, wall=117873
2021-06-20 03:23:30 | INFO | train_inner | epoch 004: 1314 / 3002 loss=2.556, ppl=5.88, wps=5993.3, ups=0.09, wpb=64961, bsz=128, num_updates=10257, lr=9.99259e-05, gnorm=2.121, loss_scale=1, train_wall=10, gb_free=2.8, wall=117884
2021-06-20 03:23:41 | INFO | train_inner | epoch 004: 1315 / 3002 loss=2.623, ppl=6.16, wps=5981.1, ups=0.09, wpb=64873, bsz=128, num_updates=10258, lr=9.99259e-05, gnorm=2.149, loss_scale=1, train_wall=10, gb_free=2.8, wall=117895
2021-06-20 03:23:52 | INFO | train_inner | epoch 004: 1316 / 3002 loss=2.559, ppl=5.89, wps=5776.9, ups=0.09, wpb=64836, bsz=128, num_updates=10259, lr=9.99259e-05, gnorm=2.175, loss_scale=1, train_wall=11, gb_free=2.8, wall=117906
2021-06-20 03:24:03 | INFO | train_inner | epoch 004: 1317 / 3002 loss=2.586, ppl=6.01, wps=5778.6, ups=0.09, wpb=64812, bsz=128, num_updates=10260, lr=9.99259e-05, gnorm=2.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=117917
2021-06-20 03:24:14 | INFO | train_inner | epoch 004: 1318 / 3002 loss=2.538, ppl=5.81, wps=5849.7, ups=0.09, wpb=64817, bsz=128, num_updates=10261, lr=9.99259e-05, gnorm=2.529, loss_scale=1, train_wall=11, gb_free=2.8, wall=117928
2021-06-20 03:24:25 | INFO | train_inner | epoch 004: 1319 / 3002 loss=2.485, ppl=5.6, wps=5921.3, ups=0.09, wpb=64985, bsz=128, num_updates=10262, lr=9.99259e-05, gnorm=2.137, loss_scale=1, train_wall=11, gb_free=2.8, wall=117939
2021-06-20 03:24:36 | INFO | train_inner | epoch 004: 1320 / 3002 loss=2.687, ppl=6.44, wps=5891.7, ups=0.09, wpb=64827, bsz=128, num_updates=10263, lr=9.99259e-05, gnorm=2.135, loss_scale=1, train_wall=11, gb_free=2.8, wall=117950
2021-06-20 03:24:47 | INFO | train_inner | epoch 004: 1321 / 3002 loss=2.533, ppl=5.79, wps=5844.8, ups=0.09, wpb=64800, bsz=128, num_updates=10264, lr=9.99259e-05, gnorm=2.089, loss_scale=1, train_wall=11, gb_free=2.8, wall=117962
2021-06-20 03:24:58 | INFO | train_inner | epoch 004: 1322 / 3002 loss=2.476, ppl=5.56, wps=5894.9, ups=0.09, wpb=64849, bsz=128, num_updates=10265, lr=9.99259e-05, gnorm=2.062, loss_scale=1, train_wall=11, gb_free=2.8, wall=117973
2021-06-20 03:25:09 | INFO | train_inner | epoch 004: 1323 / 3002 loss=2.547, ppl=5.84, wps=5919.4, ups=0.09, wpb=64832, bsz=128, num_updates=10266, lr=9.99259e-05, gnorm=2.073, loss_scale=1, train_wall=11, gb_free=2.8, wall=117984
2021-06-20 03:25:20 | INFO | train_inner | epoch 004: 1324 / 3002 loss=2.593, ppl=6.03, wps=5928, ups=0.09, wpb=64879, bsz=128, num_updates=10267, lr=9.99259e-05, gnorm=2.111, loss_scale=1, train_wall=10, gb_free=2.8, wall=117994
2021-06-20 03:25:31 | INFO | train_inner | epoch 004: 1325 / 3002 loss=2.543, ppl=5.83, wps=5706.2, ups=0.09, wpb=64810, bsz=128, num_updates=10268, lr=9.99259e-05, gnorm=2.105, loss_scale=1, train_wall=11, gb_free=2.8, wall=118006
2021-06-20 03:25:43 | INFO | train_inner | epoch 004: 1326 / 3002 loss=2.478, ppl=5.57, wps=5831.8, ups=0.09, wpb=64835, bsz=128, num_updates=10269, lr=9.99258e-05, gnorm=2.905, loss_scale=1, train_wall=11, gb_free=2.8, wall=118017
2021-06-20 03:25:54 | INFO | train_inner | epoch 004: 1327 / 3002 loss=2.692, ppl=6.46, wps=5789.3, ups=0.09, wpb=64811, bsz=128, num_updates=10270, lr=9.99258e-05, gnorm=2.232, loss_scale=1, train_wall=11, gb_free=2.8, wall=118028
2021-06-20 03:26:05 | INFO | train_inner | epoch 004: 1328 / 3002 loss=2.601, ppl=6.07, wps=5787.2, ups=0.09, wpb=64817, bsz=128, num_updates=10271, lr=9.99258e-05, gnorm=2.016, loss_scale=1, train_wall=11, gb_free=2.8, wall=118039
2021-06-20 03:26:16 | INFO | train_inner | epoch 004: 1329 / 3002 loss=2.729, ppl=6.63, wps=5735, ups=0.09, wpb=64836, bsz=128, num_updates=10272, lr=9.99258e-05, gnorm=21.78, loss_scale=1, train_wall=11, gb_free=2.8, wall=118051
2021-06-20 03:26:27 | INFO | train_inner | epoch 004: 1330 / 3002 loss=2.626, ppl=6.17, wps=5932.1, ups=0.09, wpb=64890, bsz=128, num_updates=10273, lr=9.99258e-05, gnorm=2.207, loss_scale=1, train_wall=10, gb_free=2.8, wall=118062
2021-06-20 03:26:39 | INFO | train_inner | epoch 004: 1331 / 3002 loss=2.531, ppl=5.78, wps=5745.6, ups=0.09, wpb=64809, bsz=128, num_updates=10274, lr=9.99258e-05, gnorm=2.308, loss_scale=1, train_wall=11, gb_free=2.8, wall=118073
2021-06-20 03:26:50 | INFO | train_inner | epoch 004: 1332 / 3002 loss=2.651, ppl=6.28, wps=5765.7, ups=0.09, wpb=64827, bsz=128, num_updates=10275, lr=9.99258e-05, gnorm=2.522, loss_scale=1, train_wall=11, gb_free=2.8, wall=118084
2021-06-20 03:27:01 | INFO | train_inner | epoch 004: 1333 / 3002 loss=2.718, ppl=6.58, wps=5754.3, ups=0.09, wpb=64820, bsz=128, num_updates=10276, lr=9.99258e-05, gnorm=2.27, loss_scale=1, train_wall=11, gb_free=2.8, wall=118095
2021-06-20 03:27:12 | INFO | train_inner | epoch 004: 1334 / 3002 loss=2.659, ppl=6.32, wps=5974.7, ups=0.09, wpb=64985, bsz=128, num_updates=10277, lr=9.99258e-05, gnorm=2.019, loss_scale=1, train_wall=10, gb_free=2.8, wall=118106
2021-06-20 03:27:23 | INFO | train_inner | epoch 004: 1335 / 3002 loss=2.406, ppl=5.3, wps=5866.8, ups=0.09, wpb=64785, bsz=128, num_updates=10278, lr=9.99258e-05, gnorm=2.11, loss_scale=1, train_wall=11, gb_free=2.8, wall=118117
2021-06-20 03:27:34 | INFO | train_inner | epoch 004: 1336 / 3002 loss=2.649, ppl=6.27, wps=5751.3, ups=0.09, wpb=64903, bsz=128, num_updates=10279, lr=9.99258e-05, gnorm=3.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=118129
2021-06-20 03:27:45 | INFO | train_inner | epoch 004: 1337 / 3002 loss=2.512, ppl=5.71, wps=5823.5, ups=0.09, wpb=64782, bsz=128, num_updates=10280, lr=9.99258e-05, gnorm=2.138, loss_scale=1, train_wall=11, gb_free=2.8, wall=118140
2021-06-20 03:27:56 | INFO | train_inner | epoch 004: 1338 / 3002 loss=2.608, ppl=6.1, wps=5889, ups=0.09, wpb=64844, bsz=128, num_updates=10281, lr=9.99257e-05, gnorm=2.168, loss_scale=1, train_wall=11, gb_free=2.8, wall=118151
2021-06-20 03:28:08 | INFO | train_inner | epoch 004: 1339 / 3002 loss=2.588, ppl=6.01, wps=5741.4, ups=0.09, wpb=64772, bsz=128, num_updates=10282, lr=9.99257e-05, gnorm=2.26, loss_scale=1, train_wall=11, gb_free=2.8, wall=118162
2021-06-20 03:28:19 | INFO | train_inner | epoch 004: 1340 / 3002 loss=2.542, ppl=5.83, wps=5877.2, ups=0.09, wpb=64869, bsz=128, num_updates=10283, lr=9.99257e-05, gnorm=2.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=118173
2021-06-20 03:28:30 | INFO | train_inner | epoch 004: 1341 / 3002 loss=2.551, ppl=5.86, wps=5862.5, ups=0.09, wpb=64888, bsz=128, num_updates=10284, lr=9.99257e-05, gnorm=2.153, loss_scale=1, train_wall=11, gb_free=2.8, wall=118184
2021-06-20 03:28:41 | INFO | train_inner | epoch 004: 1342 / 3002 loss=2.615, ppl=6.13, wps=5953.8, ups=0.09, wpb=64898, bsz=128, num_updates=10285, lr=9.99257e-05, gnorm=2.094, loss_scale=1, train_wall=10, gb_free=2.8, wall=118195
2021-06-20 03:28:52 | INFO | train_inner | epoch 004: 1343 / 3002 loss=2.605, ppl=6.09, wps=5871.7, ups=0.09, wpb=64840, bsz=128, num_updates=10286, lr=9.99257e-05, gnorm=6.703, loss_scale=1, train_wall=11, gb_free=2.8, wall=118206
2021-06-20 03:29:03 | INFO | train_inner | epoch 004: 1344 / 3002 loss=2.462, ppl=5.51, wps=5854.1, ups=0.09, wpb=64854, bsz=128, num_updates=10287, lr=9.99257e-05, gnorm=2.151, loss_scale=1, train_wall=11, gb_free=2.8, wall=118217
2021-06-20 03:29:14 | INFO | train_inner | epoch 004: 1345 / 3002 loss=2.649, ppl=6.27, wps=5787.5, ups=0.09, wpb=64828, bsz=128, num_updates=10288, lr=9.99257e-05, gnorm=2.437, loss_scale=1, train_wall=11, gb_free=2.8, wall=118228
2021-06-20 03:29:25 | INFO | train_inner | epoch 004: 1346 / 3002 loss=2.543, ppl=5.83, wps=5914.4, ups=0.09, wpb=64836, bsz=128, num_updates=10289, lr=9.99257e-05, gnorm=2.162, loss_scale=1, train_wall=11, gb_free=2.8, wall=118239
2021-06-20 03:29:36 | INFO | train_inner | epoch 004: 1347 / 3002 loss=2.551, ppl=5.86, wps=5824.3, ups=0.09, wpb=64820, bsz=128, num_updates=10290, lr=9.99257e-05, gnorm=10.612, loss_scale=1, train_wall=11, gb_free=2.8, wall=118250
2021-06-20 03:29:47 | INFO | train_inner | epoch 004: 1348 / 3002 loss=2.55, ppl=5.86, wps=5926.5, ups=0.09, wpb=64803, bsz=128, num_updates=10291, lr=9.99257e-05, gnorm=2.116, loss_scale=1, train_wall=10, gb_free=2.8, wall=118261
2021-06-20 03:29:58 | INFO | train_inner | epoch 004: 1349 / 3002 loss=2.516, ppl=5.72, wps=5824.2, ups=0.09, wpb=64742, bsz=128, num_updates=10292, lr=9.99257e-05, gnorm=2.102, loss_scale=1, train_wall=11, gb_free=2.8, wall=118272
2021-06-20 03:30:09 | INFO | train_inner | epoch 004: 1350 / 3002 loss=2.395, ppl=5.26, wps=5873.6, ups=0.09, wpb=64847, bsz=128, num_updates=10293, lr=9.99257e-05, gnorm=2.225, loss_scale=1, train_wall=11, gb_free=2.8, wall=118283
2021-06-20 03:30:20 | INFO | train_inner | epoch 004: 1351 / 3002 loss=2.672, ppl=6.37, wps=5770.2, ups=0.09, wpb=64862, bsz=128, num_updates=10294, lr=9.99256e-05, gnorm=2.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=118295
2021-06-20 03:30:31 | INFO | train_inner | epoch 004: 1352 / 3002 loss=2.572, ppl=5.94, wps=5845.6, ups=0.09, wpb=64804, bsz=128, num_updates=10295, lr=9.99256e-05, gnorm=4.899, loss_scale=1, train_wall=11, gb_free=2.8, wall=118306
2021-06-20 03:30:42 | INFO | train_inner | epoch 004: 1353 / 3002 loss=2.658, ppl=6.31, wps=5921, ups=0.09, wpb=64817, bsz=128, num_updates=10296, lr=9.99256e-05, gnorm=2.088, loss_scale=1, train_wall=10, gb_free=2.8, wall=118317
2021-06-20 03:30:54 | INFO | train_inner | epoch 004: 1354 / 3002 loss=2.653, ppl=6.29, wps=5827.5, ups=0.09, wpb=64824, bsz=128, num_updates=10297, lr=9.99256e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=118328
2021-06-20 03:31:05 | INFO | train_inner | epoch 004: 1355 / 3002 loss=2.517, ppl=5.72, wps=5812.4, ups=0.09, wpb=64816, bsz=128, num_updates=10298, lr=9.99256e-05, gnorm=2.039, loss_scale=1, train_wall=11, gb_free=2.8, wall=118339
2021-06-20 03:31:16 | INFO | train_inner | epoch 004: 1356 / 3002 loss=2.543, ppl=5.83, wps=5778.8, ups=0.09, wpb=64811, bsz=128, num_updates=10299, lr=9.99256e-05, gnorm=2.031, loss_scale=1, train_wall=11, gb_free=2.8, wall=118350
2021-06-20 03:31:27 | INFO | train_inner | epoch 004: 1357 / 3002 loss=2.475, ppl=5.56, wps=5901.3, ups=0.09, wpb=64895, bsz=128, num_updates=10300, lr=9.99256e-05, gnorm=2.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=118361
2021-06-20 03:31:38 | INFO | train_inner | epoch 004: 1358 / 3002 loss=2.716, ppl=6.57, wps=5897.3, ups=0.09, wpb=64763, bsz=128, num_updates=10301, lr=9.99256e-05, gnorm=2.128, loss_scale=1, train_wall=11, gb_free=2.8, wall=118372
2021-06-20 03:31:49 | INFO | train_inner | epoch 004: 1359 / 3002 loss=2.544, ppl=5.83, wps=5852.3, ups=0.09, wpb=64924, bsz=128, num_updates=10302, lr=9.99256e-05, gnorm=2.044, loss_scale=1, train_wall=11, gb_free=2.8, wall=118383
2021-06-20 03:32:00 | INFO | train_inner | epoch 004: 1360 / 3002 loss=2.663, ppl=6.33, wps=5907.8, ups=0.09, wpb=64807, bsz=128, num_updates=10303, lr=9.99256e-05, gnorm=2.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=118394
2021-06-20 03:32:11 | INFO | train_inner | epoch 004: 1361 / 3002 loss=2.661, ppl=6.32, wps=5857.9, ups=0.09, wpb=64795, bsz=128, num_updates=10304, lr=9.99256e-05, gnorm=2.169, loss_scale=1, train_wall=11, gb_free=2.8, wall=118405
2021-06-20 03:32:22 | INFO | train_inner | epoch 004: 1362 / 3002 loss=2.649, ppl=6.27, wps=5787.4, ups=0.09, wpb=64762, bsz=128, num_updates=10305, lr=9.99256e-05, gnorm=2.012, loss_scale=1, train_wall=11, gb_free=2.8, wall=118417
2021-06-20 03:32:33 | INFO | train_inner | epoch 004: 1363 / 3002 loss=2.64, ppl=6.23, wps=5762.7, ups=0.09, wpb=64850, bsz=128, num_updates=10306, lr=9.99255e-05, gnorm=2.003, loss_scale=1, train_wall=11, gb_free=2.8, wall=118428
2021-06-20 03:32:45 | INFO | train_inner | epoch 004: 1364 / 3002 loss=2.791, ppl=6.92, wps=5748.9, ups=0.09, wpb=64859, bsz=128, num_updates=10307, lr=9.99255e-05, gnorm=2.124, loss_scale=1, train_wall=11, gb_free=2.8, wall=118439
2021-06-20 03:32:56 | INFO | train_inner | epoch 004: 1365 / 3002 loss=2.724, ppl=6.61, wps=5857.2, ups=0.09, wpb=64808, bsz=128, num_updates=10308, lr=9.99255e-05, gnorm=2.229, loss_scale=1, train_wall=11, gb_free=2.8, wall=118450
2021-06-20 03:33:07 | INFO | train_inner | epoch 004: 1366 / 3002 loss=2.725, ppl=6.61, wps=5979.4, ups=0.09, wpb=64899, bsz=128, num_updates=10309, lr=9.99255e-05, gnorm=2.175, loss_scale=1, train_wall=10, gb_free=2.8, wall=118461
2021-06-20 03:33:18 | INFO | train_inner | epoch 004: 1367 / 3002 loss=2.429, ppl=5.38, wps=5875.9, ups=0.09, wpb=64907, bsz=128, num_updates=10310, lr=9.99255e-05, gnorm=2.191, loss_scale=1, train_wall=11, gb_free=2.8, wall=118472
2021-06-20 03:33:29 | INFO | train_inner | epoch 004: 1368 / 3002 loss=2.482, ppl=5.59, wps=5848.3, ups=0.09, wpb=64879, bsz=128, num_updates=10311, lr=9.99255e-05, gnorm=3.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=118483
2021-06-20 03:33:40 | INFO | train_inner | epoch 004: 1369 / 3002 loss=2.61, ppl=6.1, wps=5814, ups=0.09, wpb=64867, bsz=128, num_updates=10312, lr=9.99255e-05, gnorm=12.511, loss_scale=1, train_wall=11, gb_free=2.8, wall=118494
2021-06-20 03:33:51 | INFO | train_inner | epoch 004: 1370 / 3002 loss=2.506, ppl=5.68, wps=5841.6, ups=0.09, wpb=64849, bsz=128, num_updates=10313, lr=9.99255e-05, gnorm=2.142, loss_scale=1, train_wall=11, gb_free=2.8, wall=118505
2021-06-20 03:34:02 | INFO | train_inner | epoch 004: 1371 / 3002 loss=2.496, ppl=5.64, wps=5818, ups=0.09, wpb=64774, bsz=128, num_updates=10314, lr=9.99255e-05, gnorm=2.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=118517
2021-06-20 03:34:13 | INFO | train_inner | epoch 004: 1372 / 3002 loss=2.653, ppl=6.29, wps=5951, ups=0.09, wpb=64851, bsz=128, num_updates=10315, lr=9.99255e-05, gnorm=2.174, loss_scale=1, train_wall=10, gb_free=2.8, wall=118527
2021-06-20 03:34:24 | INFO | train_inner | epoch 004: 1373 / 3002 loss=2.676, ppl=6.39, wps=5827.1, ups=0.09, wpb=64846, bsz=128, num_updates=10316, lr=9.99255e-05, gnorm=2.157, loss_scale=1, train_wall=11, gb_free=2.8, wall=118539
2021-06-20 03:34:35 | INFO | train_inner | epoch 004: 1374 / 3002 loss=2.711, ppl=6.55, wps=5840.8, ups=0.09, wpb=64867, bsz=128, num_updates=10317, lr=9.99255e-05, gnorm=2.157, loss_scale=1, train_wall=11, gb_free=2.8, wall=118550
2021-06-20 03:34:46 | INFO | train_inner | epoch 004: 1375 / 3002 loss=2.335, ppl=5.05, wps=5839.3, ups=0.09, wpb=64928, bsz=128, num_updates=10318, lr=9.99255e-05, gnorm=2.077, loss_scale=1, train_wall=11, gb_free=2.8, wall=118561
2021-06-20 03:34:58 | INFO | train_inner | epoch 004: 1376 / 3002 loss=2.556, ppl=5.88, wps=5839.8, ups=0.09, wpb=64870, bsz=128, num_updates=10319, lr=9.99254e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=118572
2021-06-20 03:35:09 | INFO | train_inner | epoch 004: 1377 / 3002 loss=2.556, ppl=5.88, wps=5834.3, ups=0.09, wpb=64812, bsz=128, num_updates=10320, lr=9.99254e-05, gnorm=2.056, loss_scale=1, train_wall=11, gb_free=2.8, wall=118583
2021-06-20 03:35:20 | INFO | train_inner | epoch 004: 1378 / 3002 loss=2.519, ppl=5.73, wps=5960.1, ups=0.09, wpb=64882, bsz=128, num_updates=10321, lr=9.99254e-05, gnorm=2.026, loss_scale=1, train_wall=10, gb_free=2.8, wall=118594
2021-06-20 03:35:31 | INFO | train_inner | epoch 004: 1379 / 3002 loss=2.552, ppl=5.86, wps=5857.4, ups=0.09, wpb=64842, bsz=128, num_updates=10322, lr=9.99254e-05, gnorm=2.108, loss_scale=1, train_wall=11, gb_free=2.8, wall=118605
2021-06-20 03:35:42 | INFO | train_inner | epoch 004: 1380 / 3002 loss=2.567, ppl=5.93, wps=5887, ups=0.09, wpb=64817, bsz=128, num_updates=10323, lr=9.99254e-05, gnorm=2.064, loss_scale=1, train_wall=11, gb_free=2.8, wall=118616
2021-06-20 03:35:53 | INFO | train_inner | epoch 004: 1381 / 3002 loss=2.799, ppl=6.96, wps=5838.3, ups=0.09, wpb=64853, bsz=128, num_updates=10324, lr=9.99254e-05, gnorm=2.326, loss_scale=1, train_wall=11, gb_free=2.8, wall=118627
2021-06-20 03:36:04 | INFO | train_inner | epoch 004: 1382 / 3002 loss=2.472, ppl=5.55, wps=5726.2, ups=0.09, wpb=64756, bsz=128, num_updates=10325, lr=9.99254e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=118638
2021-06-20 03:36:15 | INFO | train_inner | epoch 004: 1383 / 3002 loss=2.555, ppl=5.88, wps=5867.4, ups=0.09, wpb=64879, bsz=128, num_updates=10326, lr=9.99254e-05, gnorm=2.155, loss_scale=2, train_wall=11, gb_free=2.8, wall=118649
2021-06-20 03:36:26 | INFO | train_inner | epoch 004: 1384 / 3002 loss=2.548, ppl=5.85, wps=5887.4, ups=0.09, wpb=64741, bsz=128, num_updates=10327, lr=9.99254e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=118660
2021-06-20 03:36:37 | INFO | train_inner | epoch 004: 1385 / 3002 loss=2.898, ppl=7.45, wps=5799.2, ups=0.09, wpb=64831, bsz=128, num_updates=10328, lr=9.99254e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=118672
2021-06-20 03:36:48 | INFO | train_inner | epoch 004: 1386 / 3002 loss=2.457, ppl=5.49, wps=5886.5, ups=0.09, wpb=64859, bsz=128, num_updates=10329, lr=9.99254e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=118683
2021-06-20 03:36:59 | INFO | train_inner | epoch 004: 1387 / 3002 loss=2.648, ppl=6.27, wps=5861.3, ups=0.09, wpb=64814, bsz=128, num_updates=10330, lr=9.99254e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=118694
2021-06-20 03:37:11 | INFO | train_inner | epoch 004: 1388 / 3002 loss=2.718, ppl=6.58, wps=5827.4, ups=0.09, wpb=64849, bsz=128, num_updates=10331, lr=9.99253e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=118705
2021-06-20 03:37:22 | INFO | train_inner | epoch 004: 1389 / 3002 loss=2.551, ppl=5.86, wps=5745.7, ups=0.09, wpb=64919, bsz=128, num_updates=10332, lr=9.99253e-05, gnorm=2.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=118716
2021-06-20 03:37:33 | INFO | train_inner | epoch 004: 1390 / 3002 loss=2.713, ppl=6.56, wps=5866.6, ups=0.09, wpb=64784, bsz=128, num_updates=10333, lr=9.99253e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=118727
2021-06-20 03:37:44 | INFO | train_inner | epoch 004: 1391 / 3002 loss=2.622, ppl=6.16, wps=5837.3, ups=0.09, wpb=64793, bsz=128, num_updates=10334, lr=9.99253e-05, gnorm=2.028, loss_scale=2, train_wall=11, gb_free=2.8, wall=118738
2021-06-20 03:37:55 | INFO | train_inner | epoch 004: 1392 / 3002 loss=2.68, ppl=6.41, wps=5923, ups=0.09, wpb=64822, bsz=128, num_updates=10335, lr=9.99253e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=118749
2021-06-20 03:38:06 | INFO | train_inner | epoch 004: 1393 / 3002 loss=2.712, ppl=6.55, wps=5812.6, ups=0.09, wpb=64830, bsz=128, num_updates=10336, lr=9.99253e-05, gnorm=5.41, loss_scale=2, train_wall=11, gb_free=2.8, wall=118760
2021-06-20 03:38:17 | INFO | train_inner | epoch 004: 1394 / 3002 loss=2.601, ppl=6.07, wps=5778.3, ups=0.09, wpb=64730, bsz=128, num_updates=10337, lr=9.99253e-05, gnorm=2.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=118772
2021-06-20 03:38:28 | INFO | train_inner | epoch 004: 1395 / 3002 loss=2.683, ppl=6.42, wps=5907.5, ups=0.09, wpb=64853, bsz=128, num_updates=10338, lr=9.99253e-05, gnorm=4.071, loss_scale=2, train_wall=11, gb_free=2.8, wall=118783
2021-06-20 03:38:39 | INFO | train_inner | epoch 004: 1396 / 3002 loss=2.589, ppl=6.02, wps=5926.6, ups=0.09, wpb=64807, bsz=128, num_updates=10339, lr=9.99253e-05, gnorm=2.029, loss_scale=2, train_wall=10, gb_free=2.8, wall=118793
2021-06-20 03:38:50 | INFO | train_inner | epoch 004: 1397 / 3002 loss=2.781, ppl=6.87, wps=5804.7, ups=0.09, wpb=64793, bsz=128, num_updates=10340, lr=9.99253e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=118805
2021-06-20 03:39:01 | INFO | train_inner | epoch 004: 1398 / 3002 loss=2.701, ppl=6.5, wps=5828.1, ups=0.09, wpb=64798, bsz=128, num_updates=10341, lr=9.99253e-05, gnorm=2.103, loss_scale=2, train_wall=11, gb_free=2.8, wall=118816
2021-06-20 03:39:13 | INFO | train_inner | epoch 004: 1399 / 3002 loss=2.578, ppl=5.97, wps=5857.6, ups=0.09, wpb=64827, bsz=128, num_updates=10342, lr=9.99253e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=118827
2021-06-20 03:39:24 | INFO | train_inner | epoch 004: 1400 / 3002 loss=2.451, ppl=5.47, wps=5841.6, ups=0.09, wpb=64864, bsz=128, num_updates=10343, lr=9.99253e-05, gnorm=2.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=118838
2021-06-20 03:39:35 | INFO | train_inner | epoch 004: 1401 / 3002 loss=2.59, ppl=6.02, wps=5756.8, ups=0.09, wpb=64809, bsz=128, num_updates=10344, lr=9.99252e-05, gnorm=2.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=118849
2021-06-20 03:39:46 | INFO | train_inner | epoch 004: 1402 / 3002 loss=2.655, ppl=6.3, wps=5916.7, ups=0.09, wpb=64824, bsz=128, num_updates=10345, lr=9.99252e-05, gnorm=2.059, loss_scale=2, train_wall=11, gb_free=2.8, wall=118860
2021-06-20 03:39:57 | INFO | train_inner | epoch 004: 1403 / 3002 loss=2.677, ppl=6.39, wps=5854.1, ups=0.09, wpb=64836, bsz=128, num_updates=10346, lr=9.99252e-05, gnorm=4.031, loss_scale=2, train_wall=11, gb_free=2.8, wall=118871
2021-06-20 03:40:08 | INFO | train_inner | epoch 004: 1404 / 3002 loss=2.504, ppl=5.67, wps=5825.4, ups=0.09, wpb=64841, bsz=128, num_updates=10347, lr=9.99252e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=118882
2021-06-20 03:40:19 | INFO | train_inner | epoch 004: 1405 / 3002 loss=2.502, ppl=5.66, wps=5909.6, ups=0.09, wpb=64890, bsz=128, num_updates=10348, lr=9.99252e-05, gnorm=7.559, loss_scale=2, train_wall=11, gb_free=2.8, wall=118893
2021-06-20 03:40:30 | INFO | train_inner | epoch 004: 1406 / 3002 loss=2.484, ppl=5.59, wps=5801, ups=0.09, wpb=64865, bsz=128, num_updates=10349, lr=9.99252e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=118905
2021-06-20 03:40:41 | INFO | train_inner | epoch 004: 1407 / 3002 loss=2.485, ppl=5.6, wps=5852.4, ups=0.09, wpb=64868, bsz=128, num_updates=10350, lr=9.99252e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=118916
2021-06-20 03:40:52 | INFO | train_inner | epoch 004: 1408 / 3002 loss=2.662, ppl=6.33, wps=5822.1, ups=0.09, wpb=64850, bsz=128, num_updates=10351, lr=9.99252e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=118927
2021-06-20 03:41:04 | INFO | train_inner | epoch 004: 1409 / 3002 loss=2.342, ppl=5.07, wps=5742.4, ups=0.09, wpb=64849, bsz=128, num_updates=10352, lr=9.99252e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=118938
2021-06-20 03:41:15 | INFO | train_inner | epoch 004: 1410 / 3002 loss=2.521, ppl=5.74, wps=5951.4, ups=0.09, wpb=64873, bsz=128, num_updates=10353, lr=9.99252e-05, gnorm=2.109, loss_scale=2, train_wall=10, gb_free=2.8, wall=118949
2021-06-20 03:41:26 | INFO | train_inner | epoch 004: 1411 / 3002 loss=2.585, ppl=6, wps=5814.8, ups=0.09, wpb=64809, bsz=128, num_updates=10354, lr=9.99252e-05, gnorm=3.862, loss_scale=2, train_wall=11, gb_free=2.8, wall=118960
2021-06-20 03:41:37 | INFO | train_inner | epoch 004: 1412 / 3002 loss=2.621, ppl=6.15, wps=5793.3, ups=0.09, wpb=64856, bsz=128, num_updates=10355, lr=9.99252e-05, gnorm=2.369, loss_scale=2, train_wall=11, gb_free=2.8, wall=118971
2021-06-20 03:41:48 | INFO | train_inner | epoch 004: 1413 / 3002 loss=2.557, ppl=5.88, wps=5926, ups=0.09, wpb=64836, bsz=128, num_updates=10356, lr=9.99251e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=118982
2021-06-20 03:41:59 | INFO | train_inner | epoch 004: 1414 / 3002 loss=2.607, ppl=6.09, wps=5665.3, ups=0.09, wpb=64811, bsz=128, num_updates=10357, lr=9.99251e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=118994
2021-06-20 03:42:10 | INFO | train_inner | epoch 004: 1415 / 3002 loss=2.589, ppl=6.02, wps=5896.8, ups=0.09, wpb=64887, bsz=128, num_updates=10358, lr=9.99251e-05, gnorm=2.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=119005
2021-06-20 03:42:21 | INFO | train_inner | epoch 004: 1416 / 3002 loss=2.439, ppl=5.42, wps=5810.7, ups=0.09, wpb=64785, bsz=128, num_updates=10359, lr=9.99251e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=119016
2021-06-20 03:42:33 | INFO | train_inner | epoch 004: 1417 / 3002 loss=2.61, ppl=6.1, wps=5784.3, ups=0.09, wpb=64804, bsz=128, num_updates=10360, lr=9.99251e-05, gnorm=2.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=119027
2021-06-20 03:42:44 | INFO | train_inner | epoch 004: 1418 / 3002 loss=2.659, ppl=6.32, wps=5803.6, ups=0.09, wpb=64764, bsz=128, num_updates=10361, lr=9.99251e-05, gnorm=2.247, loss_scale=2, train_wall=11, gb_free=2.8, wall=119038
2021-06-20 03:42:55 | INFO | train_inner | epoch 004: 1419 / 3002 loss=2.464, ppl=5.52, wps=5926.4, ups=0.09, wpb=64891, bsz=128, num_updates=10362, lr=9.99251e-05, gnorm=1.976, loss_scale=2, train_wall=10, gb_free=2.8, wall=119049
2021-06-20 03:43:06 | INFO | train_inner | epoch 004: 1420 / 3002 loss=2.702, ppl=6.51, wps=5799.6, ups=0.09, wpb=64870, bsz=128, num_updates=10363, lr=9.99251e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=119060
2021-06-20 03:43:17 | INFO | train_inner | epoch 004: 1421 / 3002 loss=2.615, ppl=6.13, wps=5895.1, ups=0.09, wpb=64744, bsz=128, num_updates=10364, lr=9.99251e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=119071
2021-06-20 03:43:28 | INFO | train_inner | epoch 004: 1422 / 3002 loss=2.612, ppl=6.11, wps=5747.9, ups=0.09, wpb=64872, bsz=128, num_updates=10365, lr=9.99251e-05, gnorm=2.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=119083
2021-06-20 03:43:39 | INFO | train_inner | epoch 004: 1423 / 3002 loss=2.538, ppl=5.81, wps=5956.3, ups=0.09, wpb=64922, bsz=128, num_updates=10366, lr=9.99251e-05, gnorm=2.043, loss_scale=2, train_wall=10, gb_free=2.8, wall=119093
2021-06-20 03:43:50 | INFO | train_inner | epoch 004: 1424 / 3002 loss=2.649, ppl=6.27, wps=5819.4, ups=0.09, wpb=64813, bsz=128, num_updates=10367, lr=9.99251e-05, gnorm=2.068, loss_scale=2, train_wall=11, gb_free=2.8, wall=119105
2021-06-20 03:44:01 | INFO | train_inner | epoch 004: 1425 / 3002 loss=2.442, ppl=5.43, wps=5823.4, ups=0.09, wpb=64790, bsz=128, num_updates=10368, lr=9.99251e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=119116
2021-06-20 03:44:13 | INFO | train_inner | epoch 004: 1426 / 3002 loss=2.552, ppl=5.86, wps=5795, ups=0.09, wpb=64813, bsz=128, num_updates=10369, lr=9.9925e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=119127
2021-06-20 03:44:24 | INFO | train_inner | epoch 004: 1427 / 3002 loss=2.669, ppl=6.36, wps=5770.8, ups=0.09, wpb=64731, bsz=128, num_updates=10370, lr=9.9925e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=119138
2021-06-20 03:44:35 | INFO | train_inner | epoch 004: 1428 / 3002 loss=2.669, ppl=6.36, wps=5810.7, ups=0.09, wpb=64810, bsz=128, num_updates=10371, lr=9.9925e-05, gnorm=2.009, loss_scale=2, train_wall=11, gb_free=2.8, wall=119149
2021-06-20 03:44:46 | INFO | train_inner | epoch 004: 1429 / 3002 loss=2.655, ppl=6.3, wps=5943.5, ups=0.09, wpb=64805, bsz=128, num_updates=10372, lr=9.9925e-05, gnorm=2.034, loss_scale=2, train_wall=10, gb_free=2.8, wall=119160
2021-06-20 03:44:57 | INFO | train_inner | epoch 004: 1430 / 3002 loss=2.599, ppl=6.06, wps=5741.1, ups=0.09, wpb=64751, bsz=128, num_updates=10373, lr=9.9925e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=119171
2021-06-20 03:45:08 | INFO | train_inner | epoch 004: 1431 / 3002 loss=2.545, ppl=5.84, wps=5749.8, ups=0.09, wpb=64779, bsz=128, num_updates=10374, lr=9.9925e-05, gnorm=1.992, loss_scale=2, train_wall=11, gb_free=2.8, wall=119183
2021-06-20 03:45:19 | INFO | train_inner | epoch 004: 1432 / 3002 loss=2.52, ppl=5.74, wps=5986.6, ups=0.09, wpb=64892, bsz=128, num_updates=10375, lr=9.9925e-05, gnorm=1.998, loss_scale=2, train_wall=10, gb_free=2.8, wall=119194
2021-06-20 03:45:31 | INFO | train_inner | epoch 004: 1433 / 3002 loss=2.671, ppl=6.37, wps=5757.2, ups=0.09, wpb=64747, bsz=128, num_updates=10376, lr=9.9925e-05, gnorm=2.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=119205
2021-06-20 03:45:42 | INFO | train_inner | epoch 004: 1434 / 3002 loss=2.646, ppl=6.26, wps=5789.3, ups=0.09, wpb=64832, bsz=128, num_updates=10377, lr=9.9925e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=119216
2021-06-20 03:45:53 | INFO | train_inner | epoch 004: 1435 / 3002 loss=2.573, ppl=5.95, wps=5875.6, ups=0.09, wpb=64839, bsz=128, num_updates=10378, lr=9.9925e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=119227
2021-06-20 03:46:04 | INFO | train_inner | epoch 004: 1436 / 3002 loss=2.647, ppl=6.27, wps=5903.4, ups=0.09, wpb=64820, bsz=128, num_updates=10379, lr=9.9925e-05, gnorm=2.059, loss_scale=2, train_wall=11, gb_free=2.8, wall=119238
2021-06-20 03:46:15 | INFO | train_inner | epoch 004: 1437 / 3002 loss=2.567, ppl=5.93, wps=5890.1, ups=0.09, wpb=64841, bsz=128, num_updates=10380, lr=9.9925e-05, gnorm=1.956, loss_scale=2, train_wall=11, gb_free=2.8, wall=119249
2021-06-20 03:46:26 | INFO | train_inner | epoch 004: 1438 / 3002 loss=2.413, ppl=5.33, wps=5899.1, ups=0.09, wpb=64909, bsz=128, num_updates=10381, lr=9.99249e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=119260
2021-06-20 03:46:37 | INFO | train_inner | epoch 004: 1439 / 3002 loss=2.563, ppl=5.91, wps=5937.4, ups=0.09, wpb=64951, bsz=128, num_updates=10382, lr=9.99249e-05, gnorm=2.064, loss_scale=2, train_wall=10, gb_free=2.8, wall=119271
2021-06-20 03:46:48 | INFO | train_inner | epoch 004: 1440 / 3002 loss=2.455, ppl=5.48, wps=5940.5, ups=0.09, wpb=64932, bsz=128, num_updates=10383, lr=9.99249e-05, gnorm=2.1, loss_scale=2, train_wall=10, gb_free=2.8, wall=119282
2021-06-20 03:46:59 | INFO | train_inner | epoch 004: 1441 / 3002 loss=2.604, ppl=6.08, wps=5753.7, ups=0.09, wpb=64749, bsz=128, num_updates=10384, lr=9.99249e-05, gnorm=2.097, loss_scale=2, train_wall=11, gb_free=2.8, wall=119293
2021-06-20 03:47:10 | INFO | train_inner | epoch 004: 1442 / 3002 loss=2.597, ppl=6.05, wps=5956.2, ups=0.09, wpb=64857, bsz=128, num_updates=10385, lr=9.99249e-05, gnorm=2.056, loss_scale=2, train_wall=10, gb_free=2.8, wall=119304
2021-06-20 03:47:21 | INFO | train_inner | epoch 004: 1443 / 3002 loss=2.606, ppl=6.09, wps=5819.8, ups=0.09, wpb=64843, bsz=128, num_updates=10386, lr=9.99249e-05, gnorm=2.102, loss_scale=2, train_wall=11, gb_free=2.8, wall=119315
2021-06-20 03:47:32 | INFO | train_inner | epoch 004: 1444 / 3002 loss=2.518, ppl=5.73, wps=5929.7, ups=0.09, wpb=64832, bsz=128, num_updates=10387, lr=9.99249e-05, gnorm=2.018, loss_scale=2, train_wall=10, gb_free=2.8, wall=119326
2021-06-20 03:47:43 | INFO | train_inner | epoch 004: 1445 / 3002 loss=2.672, ppl=6.37, wps=5895.6, ups=0.09, wpb=64841, bsz=128, num_updates=10388, lr=9.99249e-05, gnorm=2.225, loss_scale=2, train_wall=11, gb_free=2.8, wall=119337
2021-06-20 03:47:54 | INFO | train_inner | epoch 004: 1446 / 3002 loss=2.592, ppl=6.03, wps=5787.3, ups=0.09, wpb=64815, bsz=128, num_updates=10389, lr=9.99249e-05, gnorm=1.957, loss_scale=2, train_wall=11, gb_free=2.8, wall=119348
2021-06-20 03:48:05 | INFO | train_inner | epoch 004: 1447 / 3002 loss=2.539, ppl=5.81, wps=5824.1, ups=0.09, wpb=64867, bsz=128, num_updates=10390, lr=9.99249e-05, gnorm=2.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=119359
2021-06-20 03:48:16 | INFO | train_inner | epoch 004: 1448 / 3002 loss=2.483, ppl=5.59, wps=5796.7, ups=0.09, wpb=64813, bsz=128, num_updates=10391, lr=9.99249e-05, gnorm=2.004, loss_scale=2, train_wall=11, gb_free=2.8, wall=119371
2021-06-20 03:48:27 | INFO | train_inner | epoch 004: 1449 / 3002 loss=2.66, ppl=6.32, wps=5930.8, ups=0.09, wpb=64846, bsz=128, num_updates=10392, lr=9.99249e-05, gnorm=2.186, loss_scale=2, train_wall=11, gb_free=2.8, wall=119382
2021-06-20 03:48:38 | INFO | train_inner | epoch 004: 1450 / 3002 loss=2.586, ppl=6, wps=5816.5, ups=0.09, wpb=64842, bsz=128, num_updates=10393, lr=9.99248e-05, gnorm=2.057, loss_scale=2, train_wall=11, gb_free=2.8, wall=119393
2021-06-20 03:48:50 | INFO | train_inner | epoch 004: 1451 / 3002 loss=2.545, ppl=5.84, wps=5820, ups=0.09, wpb=64891, bsz=128, num_updates=10394, lr=9.99248e-05, gnorm=2.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=119404
2021-06-20 03:49:01 | INFO | train_inner | epoch 004: 1452 / 3002 loss=2.513, ppl=5.71, wps=5838, ups=0.09, wpb=64783, bsz=128, num_updates=10395, lr=9.99248e-05, gnorm=2.085, loss_scale=2, train_wall=11, gb_free=2.8, wall=119415
2021-06-20 03:49:12 | INFO | train_inner | epoch 004: 1453 / 3002 loss=2.486, ppl=5.6, wps=5922, ups=0.09, wpb=64882, bsz=128, num_updates=10396, lr=9.99248e-05, gnorm=2.04, loss_scale=2, train_wall=10, gb_free=2.8, wall=119426
2021-06-20 03:49:23 | INFO | train_inner | epoch 004: 1454 / 3002 loss=2.645, ppl=6.25, wps=5873.3, ups=0.09, wpb=64819, bsz=128, num_updates=10397, lr=9.99248e-05, gnorm=2.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=119437
2021-06-20 03:49:34 | INFO | train_inner | epoch 004: 1455 / 3002 loss=2.48, ppl=5.58, wps=5852.7, ups=0.09, wpb=64883, bsz=128, num_updates=10398, lr=9.99248e-05, gnorm=1.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=119448
2021-06-20 03:49:45 | INFO | train_inner | epoch 004: 1456 / 3002 loss=2.396, ppl=5.26, wps=5867.3, ups=0.09, wpb=64841, bsz=128, num_updates=10399, lr=9.99248e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=119459
2021-06-20 03:49:56 | INFO | train_inner | epoch 004: 1457 / 3002 loss=2.692, ppl=6.46, wps=5750.6, ups=0.09, wpb=64711, bsz=128, num_updates=10400, lr=9.99248e-05, gnorm=2.028, loss_scale=2, train_wall=11, gb_free=2.8, wall=119470
2021-06-20 03:50:07 | INFO | train_inner | epoch 004: 1458 / 3002 loss=2.459, ppl=5.5, wps=5780, ups=0.09, wpb=64823, bsz=128, num_updates=10401, lr=9.99248e-05, gnorm=2.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=119482
2021-06-20 03:50:18 | INFO | train_inner | epoch 004: 1459 / 3002 loss=2.557, ppl=5.88, wps=5866, ups=0.09, wpb=64899, bsz=128, num_updates=10402, lr=9.99248e-05, gnorm=2.139, loss_scale=2, train_wall=11, gb_free=2.8, wall=119493
2021-06-20 03:50:29 | INFO | train_inner | epoch 004: 1460 / 3002 loss=2.708, ppl=6.53, wps=5915.4, ups=0.09, wpb=64837, bsz=128, num_updates=10403, lr=9.99248e-05, gnorm=2.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=119504
2021-06-20 03:50:40 | INFO | train_inner | epoch 004: 1461 / 3002 loss=2.635, ppl=6.21, wps=5834.2, ups=0.09, wpb=64792, bsz=128, num_updates=10404, lr=9.99248e-05, gnorm=1.944, loss_scale=2, train_wall=11, gb_free=2.8, wall=119515
2021-06-20 03:50:51 | INFO | train_inner | epoch 004: 1462 / 3002 loss=2.541, ppl=5.82, wps=5894, ups=0.09, wpb=64902, bsz=128, num_updates=10405, lr=9.99248e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=119526
2021-06-20 03:51:02 | INFO | train_inner | epoch 004: 1463 / 3002 loss=2.485, ppl=5.6, wps=5911.8, ups=0.09, wpb=64803, bsz=128, num_updates=10406, lr=9.99247e-05, gnorm=1.997, loss_scale=2, train_wall=10, gb_free=2.8, wall=119537
2021-06-20 03:51:14 | INFO | train_inner | epoch 004: 1464 / 3002 loss=2.549, ppl=5.85, wps=5786.2, ups=0.09, wpb=64892, bsz=128, num_updates=10407, lr=9.99247e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=119548
2021-06-20 03:51:25 | INFO | train_inner | epoch 004: 1465 / 3002 loss=2.46, ppl=5.5, wps=5838.5, ups=0.09, wpb=64787, bsz=128, num_updates=10408, lr=9.99247e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=119559
2021-06-20 03:51:36 | INFO | train_inner | epoch 004: 1466 / 3002 loss=2.766, ppl=6.8, wps=5864.3, ups=0.09, wpb=64728, bsz=128, num_updates=10409, lr=9.99247e-05, gnorm=2.111, loss_scale=2, train_wall=11, gb_free=2.8, wall=119570
2021-06-20 03:51:47 | INFO | train_inner | epoch 004: 1467 / 3002 loss=2.676, ppl=6.39, wps=5836.4, ups=0.09, wpb=64824, bsz=128, num_updates=10410, lr=9.99247e-05, gnorm=2.087, loss_scale=2, train_wall=11, gb_free=2.8, wall=119581
2021-06-20 03:51:58 | INFO | train_inner | epoch 004: 1468 / 3002 loss=2.755, ppl=6.75, wps=5835.8, ups=0.09, wpb=64690, bsz=128, num_updates=10411, lr=9.99247e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=119592
2021-06-20 03:52:09 | INFO | train_inner | epoch 004: 1469 / 3002 loss=2.647, ppl=6.26, wps=5801.7, ups=0.09, wpb=64857, bsz=128, num_updates=10412, lr=9.99247e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=119603
2021-06-20 03:52:20 | INFO | train_inner | epoch 004: 1470 / 3002 loss=2.563, ppl=5.91, wps=5697.5, ups=0.09, wpb=64851, bsz=128, num_updates=10413, lr=9.99247e-05, gnorm=2.093, loss_scale=2, train_wall=11, gb_free=2.8, wall=119615
2021-06-20 03:52:31 | INFO | train_inner | epoch 004: 1471 / 3002 loss=2.492, ppl=5.63, wps=5910.5, ups=0.09, wpb=64763, bsz=128, num_updates=10414, lr=9.99247e-05, gnorm=2.036, loss_scale=2, train_wall=11, gb_free=2.8, wall=119626
2021-06-20 03:52:42 | INFO | train_inner | epoch 004: 1472 / 3002 loss=2.46, ppl=5.5, wps=5915, ups=0.09, wpb=64860, bsz=128, num_updates=10415, lr=9.99247e-05, gnorm=2.128, loss_scale=2, train_wall=10, gb_free=2.8, wall=119637
2021-06-20 03:52:53 | INFO | train_inner | epoch 004: 1473 / 3002 loss=2.545, ppl=5.84, wps=6011.5, ups=0.09, wpb=64836, bsz=128, num_updates=10416, lr=9.99247e-05, gnorm=2.09, loss_scale=2, train_wall=10, gb_free=2.8, wall=119648
2021-06-20 03:53:04 | INFO | train_inner | epoch 004: 1474 / 3002 loss=2.77, ppl=6.82, wps=6031.8, ups=0.09, wpb=64836, bsz=128, num_updates=10417, lr=9.99247e-05, gnorm=2.121, loss_scale=2, train_wall=10, gb_free=2.8, wall=119658
2021-06-20 03:53:15 | INFO | train_inner | epoch 004: 1475 / 3002 loss=2.616, ppl=6.13, wps=5815.4, ups=0.09, wpb=64744, bsz=128, num_updates=10418, lr=9.99246e-05, gnorm=2.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=119669
2021-06-20 03:53:26 | INFO | train_inner | epoch 004: 1476 / 3002 loss=2.523, ppl=5.75, wps=5983.3, ups=0.09, wpb=64834, bsz=128, num_updates=10419, lr=9.99246e-05, gnorm=2.161, loss_scale=2, train_wall=10, gb_free=2.8, wall=119680
2021-06-20 03:53:37 | INFO | train_inner | epoch 004: 1477 / 3002 loss=2.569, ppl=5.93, wps=5757.3, ups=0.09, wpb=64767, bsz=128, num_updates=10420, lr=9.99246e-05, gnorm=2.222, loss_scale=2, train_wall=11, gb_free=2.8, wall=119691
2021-06-20 03:53:48 | INFO | train_inner | epoch 004: 1478 / 3002 loss=2.679, ppl=6.4, wps=5870.3, ups=0.09, wpb=64892, bsz=128, num_updates=10421, lr=9.99246e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=119703
2021-06-20 03:53:59 | INFO | train_inner | epoch 004: 1479 / 3002 loss=2.447, ppl=5.45, wps=5933.6, ups=0.09, wpb=64849, bsz=128, num_updates=10422, lr=9.99246e-05, gnorm=1.968, loss_scale=2, train_wall=10, gb_free=2.8, wall=119713
2021-06-20 03:54:10 | INFO | train_inner | epoch 004: 1480 / 3002 loss=2.639, ppl=6.23, wps=5790.3, ups=0.09, wpb=64863, bsz=128, num_updates=10423, lr=9.99246e-05, gnorm=2.025, loss_scale=2, train_wall=11, gb_free=2.8, wall=119725
2021-06-20 03:54:21 | INFO | train_inner | epoch 004: 1481 / 3002 loss=2.608, ppl=6.1, wps=5835.1, ups=0.09, wpb=64853, bsz=128, num_updates=10424, lr=9.99246e-05, gnorm=1.959, loss_scale=2, train_wall=11, gb_free=2.8, wall=119736
2021-06-20 03:54:33 | INFO | train_inner | epoch 004: 1482 / 3002 loss=2.656, ppl=6.3, wps=5773, ups=0.09, wpb=64847, bsz=128, num_updates=10425, lr=9.99246e-05, gnorm=2.266, loss_scale=2, train_wall=11, gb_free=2.8, wall=119747
2021-06-20 03:54:44 | INFO | train_inner | epoch 004: 1483 / 3002 loss=2.634, ppl=6.21, wps=5936.1, ups=0.09, wpb=64799, bsz=128, num_updates=10426, lr=9.99246e-05, gnorm=2.03, loss_scale=2, train_wall=10, gb_free=2.8, wall=119758
2021-06-20 03:54:55 | INFO | train_inner | epoch 004: 1484 / 3002 loss=2.692, ppl=6.46, wps=5803.8, ups=0.09, wpb=64681, bsz=128, num_updates=10427, lr=9.99246e-05, gnorm=2.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=119769
2021-06-20 03:55:06 | INFO | train_inner | epoch 004: 1485 / 3002 loss=2.53, ppl=5.77, wps=5891.8, ups=0.09, wpb=64860, bsz=128, num_updates=10428, lr=9.99246e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=119780
2021-06-20 03:55:17 | INFO | train_inner | epoch 004: 1486 / 3002 loss=2.758, ppl=6.77, wps=5765, ups=0.09, wpb=64848, bsz=128, num_updates=10429, lr=9.99246e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=119791
2021-06-20 03:55:28 | INFO | train_inner | epoch 004: 1487 / 3002 loss=2.66, ppl=6.32, wps=5857.7, ups=0.09, wpb=64715, bsz=128, num_updates=10430, lr=9.99246e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=119802
2021-06-20 03:55:39 | INFO | train_inner | epoch 004: 1488 / 3002 loss=2.586, ppl=6.01, wps=5773.9, ups=0.09, wpb=64777, bsz=128, num_updates=10431, lr=9.99245e-05, gnorm=2.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=119814
2021-06-20 03:55:50 | INFO | train_inner | epoch 004: 1489 / 3002 loss=2.577, ppl=5.97, wps=5851.2, ups=0.09, wpb=64771, bsz=128, num_updates=10432, lr=9.99245e-05, gnorm=2.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=119825
2021-06-20 03:56:01 | INFO | train_inner | epoch 004: 1490 / 3002 loss=2.49, ppl=5.62, wps=5999.2, ups=0.09, wpb=64905, bsz=128, num_updates=10433, lr=9.99245e-05, gnorm=2.065, loss_scale=2, train_wall=10, gb_free=2.8, wall=119836
2021-06-20 03:56:12 | INFO | train_inner | epoch 004: 1491 / 3002 loss=2.662, ppl=6.33, wps=5911.1, ups=0.09, wpb=64875, bsz=128, num_updates=10434, lr=9.99245e-05, gnorm=2.12, loss_scale=2, train_wall=11, gb_free=2.8, wall=119846
2021-06-20 03:56:23 | INFO | train_inner | epoch 004: 1492 / 3002 loss=2.47, ppl=5.54, wps=5768.7, ups=0.09, wpb=64771, bsz=128, num_updates=10435, lr=9.99245e-05, gnorm=2.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=119858
2021-06-20 03:56:34 | INFO | train_inner | epoch 004: 1493 / 3002 loss=2.721, ppl=6.59, wps=5862.9, ups=0.09, wpb=64775, bsz=128, num_updates=10436, lr=9.99245e-05, gnorm=2.096, loss_scale=2, train_wall=11, gb_free=2.8, wall=119869
2021-06-20 03:56:46 | INFO | train_inner | epoch 004: 1494 / 3002 loss=2.608, ppl=6.1, wps=5737.9, ups=0.09, wpb=64771, bsz=128, num_updates=10437, lr=9.99245e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=119880
2021-06-20 03:56:57 | INFO | train_inner | epoch 004: 1495 / 3002 loss=2.547, ppl=5.84, wps=5832.7, ups=0.09, wpb=64865, bsz=128, num_updates=10438, lr=9.99245e-05, gnorm=2.153, loss_scale=2, train_wall=11, gb_free=2.8, wall=119891
2021-06-20 03:57:08 | INFO | train_inner | epoch 004: 1496 / 3002 loss=2.625, ppl=6.17, wps=5750.7, ups=0.09, wpb=64884, bsz=128, num_updates=10439, lr=9.99245e-05, gnorm=2.142, loss_scale=2, train_wall=11, gb_free=2.8, wall=119902
2021-06-20 03:57:19 | INFO | train_inner | epoch 004: 1497 / 3002 loss=2.586, ppl=6.01, wps=5779.6, ups=0.09, wpb=64839, bsz=128, num_updates=10440, lr=9.99245e-05, gnorm=2.088, loss_scale=2, train_wall=11, gb_free=2.8, wall=119914
2021-06-20 03:57:31 | INFO | train_inner | epoch 004: 1498 / 3002 loss=2.525, ppl=5.75, wps=5754, ups=0.09, wpb=64841, bsz=128, num_updates=10441, lr=9.99245e-05, gnorm=2.006, loss_scale=2, train_wall=11, gb_free=2.8, wall=119925
2021-06-20 03:57:42 | INFO | train_inner | epoch 004: 1499 / 3002 loss=2.579, ppl=5.97, wps=5750.7, ups=0.09, wpb=64797, bsz=128, num_updates=10442, lr=9.99245e-05, gnorm=2.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=119936
2021-06-20 03:57:53 | INFO | train_inner | epoch 004: 1500 / 3002 loss=2.549, ppl=5.85, wps=5886.3, ups=0.09, wpb=64918, bsz=128, num_updates=10443, lr=9.99244e-05, gnorm=1.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=119947
2021-06-20 03:58:04 | INFO | train_inner | epoch 004: 1501 / 3002 loss=2.634, ppl=6.21, wps=5735.2, ups=0.09, wpb=64703, bsz=128, num_updates=10444, lr=9.99244e-05, gnorm=2.028, loss_scale=2, train_wall=11, gb_free=2.8, wall=119959
2021-06-20 03:58:16 | INFO | train_inner | epoch 004: 1502 / 3002 loss=2.522, ppl=5.75, wps=5700.1, ups=0.09, wpb=64856, bsz=128, num_updates=10445, lr=9.99244e-05, gnorm=4.233, loss_scale=2, train_wall=11, gb_free=2.8, wall=119970
2021-06-20 03:58:26 | INFO | train_inner | epoch 004: 1503 / 3002 loss=2.68, ppl=6.41, wps=5929, ups=0.09, wpb=64841, bsz=128, num_updates=10446, lr=9.99244e-05, gnorm=2.057, loss_scale=2, train_wall=10, gb_free=2.8, wall=119981
2021-06-20 03:58:38 | INFO | train_inner | epoch 004: 1504 / 3002 loss=2.599, ppl=6.06, wps=5857.7, ups=0.09, wpb=64835, bsz=128, num_updates=10447, lr=9.99244e-05, gnorm=2.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=119992
2021-06-20 03:58:49 | INFO | train_inner | epoch 004: 1505 / 3002 loss=2.573, ppl=5.95, wps=5757.2, ups=0.09, wpb=64840, bsz=128, num_updates=10448, lr=9.99244e-05, gnorm=2.243, loss_scale=2, train_wall=11, gb_free=2.8, wall=120003
2021-06-20 03:59:00 | INFO | train_inner | epoch 004: 1506 / 3002 loss=2.611, ppl=6.11, wps=5935.2, ups=0.09, wpb=64873, bsz=128, num_updates=10449, lr=9.99244e-05, gnorm=2.072, loss_scale=2, train_wall=10, gb_free=2.8, wall=120014
2021-06-20 03:59:11 | INFO | train_inner | epoch 004: 1507 / 3002 loss=2.492, ppl=5.63, wps=5938.5, ups=0.09, wpb=64861, bsz=128, num_updates=10450, lr=9.99244e-05, gnorm=2.023, loss_scale=2, train_wall=10, gb_free=2.8, wall=120025
2021-06-20 03:59:22 | INFO | train_inner | epoch 004: 1508 / 3002 loss=2.558, ppl=5.89, wps=5768.2, ups=0.09, wpb=64849, bsz=128, num_updates=10451, lr=9.99244e-05, gnorm=2.051, loss_scale=2, train_wall=11, gb_free=2.8, wall=120036
2021-06-20 03:59:33 | INFO | train_inner | epoch 004: 1509 / 3002 loss=2.548, ppl=5.85, wps=5817.2, ups=0.09, wpb=64797, bsz=128, num_updates=10452, lr=9.99244e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=120047
2021-06-20 03:59:44 | INFO | train_inner | epoch 004: 1510 / 3002 loss=2.696, ppl=6.48, wps=5835.7, ups=0.09, wpb=64800, bsz=128, num_updates=10453, lr=9.99244e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=120059
2021-06-20 03:59:55 | INFO | train_inner | epoch 004: 1511 / 3002 loss=2.493, ppl=5.63, wps=5877.5, ups=0.09, wpb=64798, bsz=128, num_updates=10454, lr=9.99244e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=120070
2021-06-20 04:00:06 | INFO | train_inner | epoch 004: 1512 / 3002 loss=2.435, ppl=5.41, wps=5764.8, ups=0.09, wpb=64761, bsz=128, num_updates=10455, lr=9.99244e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=120081
2021-06-20 04:00:18 | INFO | train_inner | epoch 004: 1513 / 3002 loss=2.569, ppl=5.94, wps=5846.1, ups=0.09, wpb=64874, bsz=128, num_updates=10456, lr=9.99243e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=120092
2021-06-20 04:00:29 | INFO | train_inner | epoch 004: 1514 / 3002 loss=2.466, ppl=5.52, wps=5880, ups=0.09, wpb=64829, bsz=128, num_updates=10457, lr=9.99243e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=120103
2021-06-20 04:00:40 | INFO | train_inner | epoch 004: 1515 / 3002 loss=2.676, ppl=6.39, wps=5828.8, ups=0.09, wpb=64782, bsz=128, num_updates=10458, lr=9.99243e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=120114
2021-06-20 04:00:51 | INFO | train_inner | epoch 004: 1516 / 3002 loss=2.607, ppl=6.09, wps=5814.8, ups=0.09, wpb=64839, bsz=128, num_updates=10459, lr=9.99243e-05, gnorm=2.028, loss_scale=4, train_wall=11, gb_free=2.8, wall=120125
2021-06-20 04:01:02 | INFO | train_inner | epoch 004: 1517 / 3002 loss=2.628, ppl=6.18, wps=5882, ups=0.09, wpb=64770, bsz=128, num_updates=10460, lr=9.99243e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=120136
2021-06-20 04:01:13 | INFO | train_inner | epoch 004: 1518 / 3002 loss=2.504, ppl=5.67, wps=5868.4, ups=0.09, wpb=64786, bsz=128, num_updates=10461, lr=9.99243e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=120147
2021-06-20 04:01:24 | INFO | train_inner | epoch 004: 1519 / 3002 loss=2.666, ppl=6.35, wps=5840.5, ups=0.09, wpb=64829, bsz=128, num_updates=10462, lr=9.99243e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=120158
2021-06-20 04:01:35 | INFO | train_inner | epoch 004: 1520 / 3002 loss=2.61, ppl=6.11, wps=5831.4, ups=0.09, wpb=64837, bsz=128, num_updates=10463, lr=9.99243e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=120169
2021-06-20 04:01:46 | INFO | train_inner | epoch 004: 1521 / 3002 loss=2.53, ppl=5.78, wps=5949.4, ups=0.09, wpb=64909, bsz=128, num_updates=10464, lr=9.99243e-05, gnorm=2.327, loss_scale=4, train_wall=10, gb_free=2.8, wall=120180
2021-06-20 04:01:57 | INFO | train_inner | epoch 004: 1522 / 3002 loss=2.604, ppl=6.08, wps=5808.1, ups=0.09, wpb=64871, bsz=128, num_updates=10465, lr=9.99243e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=120192
2021-06-20 04:02:08 | INFO | train_inner | epoch 004: 1523 / 3002 loss=2.528, ppl=5.77, wps=5789.9, ups=0.09, wpb=64906, bsz=128, num_updates=10466, lr=9.99243e-05, gnorm=2.175, loss_scale=4, train_wall=11, gb_free=2.8, wall=120203
2021-06-20 04:02:20 | INFO | train_inner | epoch 004: 1524 / 3002 loss=2.578, ppl=5.97, wps=5752.7, ups=0.09, wpb=64798, bsz=128, num_updates=10467, lr=9.99243e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=120214
2021-06-20 04:02:31 | INFO | train_inner | epoch 004: 1525 / 3002 loss=2.631, ppl=6.19, wps=5846.3, ups=0.09, wpb=64798, bsz=128, num_updates=10468, lr=9.99242e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=120225
2021-06-20 04:02:42 | INFO | train_inner | epoch 004: 1526 / 3002 loss=2.428, ppl=5.38, wps=5997.5, ups=0.09, wpb=64931, bsz=128, num_updates=10469, lr=9.99242e-05, gnorm=2.04, loss_scale=4, train_wall=10, gb_free=2.8, wall=120236
2021-06-20 04:02:53 | INFO | train_inner | epoch 004: 1527 / 3002 loss=2.602, ppl=6.07, wps=5865.6, ups=0.09, wpb=64845, bsz=128, num_updates=10470, lr=9.99242e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=120247
2021-06-20 04:03:04 | INFO | train_inner | epoch 004: 1528 / 3002 loss=2.575, ppl=5.96, wps=5914.7, ups=0.09, wpb=64787, bsz=128, num_updates=10471, lr=9.99242e-05, gnorm=2.015, loss_scale=4, train_wall=10, gb_free=2.8, wall=120258
2021-06-20 04:03:15 | INFO | train_inner | epoch 004: 1529 / 3002 loss=2.551, ppl=5.86, wps=5895.2, ups=0.09, wpb=64828, bsz=128, num_updates=10472, lr=9.99242e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=120269
2021-06-20 04:03:26 | INFO | train_inner | epoch 004: 1530 / 3002 loss=2.52, ppl=5.74, wps=5806.5, ups=0.09, wpb=64759, bsz=128, num_updates=10473, lr=9.99242e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=120280
2021-06-20 04:03:37 | INFO | train_inner | epoch 004: 1531 / 3002 loss=2.638, ppl=6.22, wps=5874.8, ups=0.09, wpb=64765, bsz=128, num_updates=10474, lr=9.99242e-05, gnorm=2.39, loss_scale=4, train_wall=11, gb_free=2.8, wall=120291
2021-06-20 04:03:48 | INFO | train_inner | epoch 004: 1532 / 3002 loss=2.602, ppl=6.07, wps=5859.2, ups=0.09, wpb=64827, bsz=128, num_updates=10475, lr=9.99242e-05, gnorm=2.086, loss_scale=4, train_wall=11, gb_free=2.8, wall=120302
2021-06-20 04:03:59 | INFO | train_inner | epoch 004: 1533 / 3002 loss=2.516, ppl=5.72, wps=5818.3, ups=0.09, wpb=64840, bsz=128, num_updates=10476, lr=9.99242e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=120313
2021-06-20 04:04:10 | INFO | train_inner | epoch 004: 1534 / 3002 loss=2.675, ppl=6.39, wps=5863.2, ups=0.09, wpb=64831, bsz=128, num_updates=10477, lr=9.99242e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=120324
2021-06-20 04:04:21 | INFO | train_inner | epoch 004: 1535 / 3002 loss=2.532, ppl=5.78, wps=5956.3, ups=0.09, wpb=64882, bsz=128, num_updates=10478, lr=9.99242e-05, gnorm=2.424, loss_scale=4, train_wall=10, gb_free=2.8, wall=120335
2021-06-20 04:04:32 | INFO | train_inner | epoch 004: 1536 / 3002 loss=2.466, ppl=5.53, wps=5898.7, ups=0.09, wpb=64924, bsz=128, num_updates=10479, lr=9.99242e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=120346
2021-06-20 04:04:43 | INFO | train_inner | epoch 004: 1537 / 3002 loss=2.716, ppl=6.57, wps=5796.3, ups=0.09, wpb=64737, bsz=128, num_updates=10480, lr=9.99242e-05, gnorm=2.094, loss_scale=4, train_wall=11, gb_free=2.8, wall=120357
2021-06-20 04:04:54 | INFO | train_inner | epoch 004: 1538 / 3002 loss=2.472, ppl=5.55, wps=5985.9, ups=0.09, wpb=64910, bsz=128, num_updates=10481, lr=9.99241e-05, gnorm=2.031, loss_scale=4, train_wall=10, gb_free=2.8, wall=120368
2021-06-20 04:05:05 | INFO | train_inner | epoch 004: 1539 / 3002 loss=2.523, ppl=5.75, wps=5791.8, ups=0.09, wpb=64789, bsz=128, num_updates=10482, lr=9.99241e-05, gnorm=2.733, loss_scale=4, train_wall=11, gb_free=2.8, wall=120379
2021-06-20 04:05:16 | INFO | train_inner | epoch 004: 1540 / 3002 loss=2.556, ppl=5.88, wps=5815.9, ups=0.09, wpb=64872, bsz=128, num_updates=10483, lr=9.99241e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=120391
2021-06-20 04:05:27 | INFO | train_inner | epoch 004: 1541 / 3002 loss=2.501, ppl=5.66, wps=5903.2, ups=0.09, wpb=64799, bsz=128, num_updates=10484, lr=9.99241e-05, gnorm=2.018, loss_scale=4, train_wall=10, gb_free=2.8, wall=120402
2021-06-20 04:05:39 | INFO | train_inner | epoch 004: 1542 / 3002 loss=2.427, ppl=5.38, wps=5734.5, ups=0.09, wpb=64831, bsz=128, num_updates=10485, lr=9.99241e-05, gnorm=2.12, loss_scale=4, train_wall=11, gb_free=2.8, wall=120413
2021-06-20 04:05:50 | INFO | train_inner | epoch 004: 1543 / 3002 loss=2.496, ppl=5.64, wps=5757, ups=0.09, wpb=64797, bsz=128, num_updates=10486, lr=9.99241e-05, gnorm=3.631, loss_scale=4, train_wall=11, gb_free=2.8, wall=120424
2021-06-20 04:06:01 | INFO | train_inner | epoch 004: 1544 / 3002 loss=2.467, ppl=5.53, wps=5827.2, ups=0.09, wpb=64775, bsz=128, num_updates=10487, lr=9.99241e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=120435
2021-06-20 04:06:12 | INFO | train_inner | epoch 004: 1545 / 3002 loss=2.522, ppl=5.74, wps=5874.7, ups=0.09, wpb=64862, bsz=128, num_updates=10488, lr=9.99241e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=120446
2021-06-20 04:06:23 | INFO | train_inner | epoch 004: 1546 / 3002 loss=2.548, ppl=5.85, wps=5922.8, ups=0.09, wpb=64820, bsz=128, num_updates=10489, lr=9.99241e-05, gnorm=1.967, loss_scale=4, train_wall=10, gb_free=2.8, wall=120457
2021-06-20 04:06:34 | INFO | train_inner | epoch 004: 1547 / 3002 loss=2.484, ppl=5.6, wps=6017.3, ups=0.09, wpb=64879, bsz=128, num_updates=10490, lr=9.99241e-05, gnorm=1.99, loss_scale=4, train_wall=10, gb_free=2.8, wall=120468
2021-06-20 04:06:45 | INFO | train_inner | epoch 004: 1548 / 3002 loss=2.524, ppl=5.75, wps=5901.3, ups=0.09, wpb=64796, bsz=128, num_updates=10491, lr=9.99241e-05, gnorm=2.15, loss_scale=4, train_wall=10, gb_free=2.8, wall=120479
2021-06-20 04:06:56 | INFO | train_inner | epoch 004: 1549 / 3002 loss=2.484, ppl=5.6, wps=5894.3, ups=0.09, wpb=64789, bsz=128, num_updates=10492, lr=9.99241e-05, gnorm=1.926, loss_scale=4, train_wall=11, gb_free=2.8, wall=120490
2021-06-20 04:07:07 | INFO | train_inner | epoch 004: 1550 / 3002 loss=2.54, ppl=5.82, wps=5826.6, ups=0.09, wpb=64773, bsz=128, num_updates=10493, lr=9.9924e-05, gnorm=2.45, loss_scale=4, train_wall=11, gb_free=2.8, wall=120501
2021-06-20 04:07:18 | INFO | train_inner | epoch 004: 1551 / 3002 loss=2.678, ppl=6.4, wps=5756, ups=0.09, wpb=64857, bsz=128, num_updates=10494, lr=9.9924e-05, gnorm=2.258, loss_scale=4, train_wall=11, gb_free=2.8, wall=120512
2021-06-20 04:07:29 | INFO | train_inner | epoch 004: 1552 / 3002 loss=2.622, ppl=6.16, wps=5818.2, ups=0.09, wpb=64830, bsz=128, num_updates=10495, lr=9.9924e-05, gnorm=2.124, loss_scale=4, train_wall=11, gb_free=2.8, wall=120524
2021-06-20 04:07:40 | INFO | train_inner | epoch 004: 1553 / 3002 loss=2.563, ppl=5.91, wps=5773.1, ups=0.09, wpb=64809, bsz=128, num_updates=10496, lr=9.9924e-05, gnorm=2.07, loss_scale=4, train_wall=11, gb_free=2.8, wall=120535
2021-06-20 04:07:51 | INFO | train_inner | epoch 004: 1554 / 3002 loss=2.554, ppl=5.87, wps=5872, ups=0.09, wpb=64919, bsz=128, num_updates=10497, lr=9.9924e-05, gnorm=3.438, loss_scale=4, train_wall=11, gb_free=2.8, wall=120546
2021-06-20 04:08:02 | INFO | train_inner | epoch 004: 1555 / 3002 loss=2.505, ppl=5.68, wps=5901, ups=0.09, wpb=64837, bsz=128, num_updates=10498, lr=9.9924e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=120557
2021-06-20 04:08:13 | INFO | train_inner | epoch 004: 1556 / 3002 loss=2.534, ppl=5.79, wps=5894.3, ups=0.09, wpb=64890, bsz=128, num_updates=10499, lr=9.9924e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=120568
2021-06-20 04:08:24 | INFO | train_inner | epoch 004: 1557 / 3002 loss=2.646, ppl=6.26, wps=5887.7, ups=0.09, wpb=64820, bsz=128, num_updates=10500, lr=9.9924e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=120579
2021-06-20 04:08:36 | INFO | train_inner | epoch 004: 1558 / 3002 loss=2.771, ppl=6.83, wps=5813, ups=0.09, wpb=64862, bsz=128, num_updates=10501, lr=9.9924e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=120590
2021-06-20 04:08:47 | INFO | train_inner | epoch 004: 1559 / 3002 loss=2.665, ppl=6.34, wps=5769.5, ups=0.09, wpb=64853, bsz=128, num_updates=10502, lr=9.9924e-05, gnorm=2.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=120601
2021-06-20 04:08:58 | INFO | train_inner | epoch 004: 1560 / 3002 loss=2.467, ppl=5.53, wps=5844, ups=0.09, wpb=64820, bsz=128, num_updates=10503, lr=9.9924e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=120612
2021-06-20 04:09:09 | INFO | train_inner | epoch 004: 1561 / 3002 loss=2.722, ppl=6.6, wps=5924.6, ups=0.09, wpb=64821, bsz=128, num_updates=10504, lr=9.9924e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=120623
2021-06-20 04:09:20 | INFO | train_inner | epoch 004: 1562 / 3002 loss=2.337, ppl=5.05, wps=5780.5, ups=0.09, wpb=64871, bsz=128, num_updates=10505, lr=9.9924e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=120634
2021-06-20 04:09:31 | INFO | train_inner | epoch 004: 1563 / 3002 loss=2.56, ppl=5.9, wps=5876.6, ups=0.09, wpb=64828, bsz=128, num_updates=10506, lr=9.99239e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=120645
2021-06-20 04:09:42 | INFO | train_inner | epoch 004: 1564 / 3002 loss=2.541, ppl=5.82, wps=5886.6, ups=0.09, wpb=64865, bsz=128, num_updates=10507, lr=9.99239e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=120657
2021-06-20 04:09:53 | INFO | train_inner | epoch 004: 1565 / 3002 loss=2.66, ppl=6.32, wps=5743.7, ups=0.09, wpb=64808, bsz=128, num_updates=10508, lr=9.99239e-05, gnorm=2.093, loss_scale=4, train_wall=11, gb_free=2.8, wall=120668
2021-06-20 04:10:05 | INFO | train_inner | epoch 004: 1566 / 3002 loss=2.734, ppl=6.65, wps=5767.1, ups=0.09, wpb=64840, bsz=128, num_updates=10509, lr=9.99239e-05, gnorm=2.401, loss_scale=4, train_wall=11, gb_free=2.8, wall=120679
2021-06-20 04:10:16 | INFO | train_inner | epoch 004: 1567 / 3002 loss=2.682, ppl=6.42, wps=5876.7, ups=0.09, wpb=64902, bsz=128, num_updates=10510, lr=9.99239e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=120690
2021-06-20 04:10:27 | INFO | train_inner | epoch 004: 1568 / 3002 loss=2.573, ppl=5.95, wps=5774, ups=0.09, wpb=64814, bsz=128, num_updates=10511, lr=9.99239e-05, gnorm=2.117, loss_scale=4, train_wall=11, gb_free=2.8, wall=120701
2021-06-20 04:10:38 | INFO | train_inner | epoch 004: 1569 / 3002 loss=2.425, ppl=5.37, wps=5855.9, ups=0.09, wpb=64828, bsz=128, num_updates=10512, lr=9.99239e-05, gnorm=2.219, loss_scale=4, train_wall=11, gb_free=2.8, wall=120712
2021-06-20 04:10:49 | INFO | train_inner | epoch 004: 1570 / 3002 loss=2.707, ppl=6.53, wps=5846.7, ups=0.09, wpb=64729, bsz=128, num_updates=10513, lr=9.99239e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=120723
2021-06-20 04:11:00 | INFO | train_inner | epoch 004: 1571 / 3002 loss=2.564, ppl=5.91, wps=5920, ups=0.09, wpb=64889, bsz=128, num_updates=10514, lr=9.99239e-05, gnorm=2.023, loss_scale=4, train_wall=11, gb_free=2.8, wall=120734
2021-06-20 04:11:11 | INFO | train_inner | epoch 004: 1572 / 3002 loss=2.521, ppl=5.74, wps=5774, ups=0.09, wpb=64872, bsz=128, num_updates=10515, lr=9.99239e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=120746
2021-06-20 04:11:22 | INFO | train_inner | epoch 004: 1573 / 3002 loss=2.632, ppl=6.2, wps=5832.3, ups=0.09, wpb=64835, bsz=128, num_updates=10516, lr=9.99239e-05, gnorm=2.174, loss_scale=4, train_wall=11, gb_free=2.8, wall=120757
2021-06-20 04:11:33 | INFO | train_inner | epoch 004: 1574 / 3002 loss=2.466, ppl=5.52, wps=5932.5, ups=0.09, wpb=64782, bsz=128, num_updates=10517, lr=9.99239e-05, gnorm=1.996, loss_scale=4, train_wall=10, gb_free=2.8, wall=120768
2021-06-20 04:11:45 | INFO | train_inner | epoch 004: 1575 / 3002 loss=2.621, ppl=6.15, wps=5775.8, ups=0.09, wpb=64858, bsz=128, num_updates=10518, lr=9.99238e-05, gnorm=2.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=120779
2021-06-20 04:11:56 | INFO | train_inner | epoch 004: 1576 / 3002 loss=2.796, ppl=6.95, wps=5719.9, ups=0.09, wpb=64768, bsz=128, num_updates=10519, lr=9.99238e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=120790
2021-06-20 04:12:07 | INFO | train_inner | epoch 004: 1577 / 3002 loss=2.725, ppl=6.61, wps=5804.7, ups=0.09, wpb=64845, bsz=128, num_updates=10520, lr=9.99238e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=120801
2021-06-20 04:12:18 | INFO | train_inner | epoch 004: 1578 / 3002 loss=2.561, ppl=5.9, wps=5781.8, ups=0.09, wpb=64824, bsz=128, num_updates=10521, lr=9.99238e-05, gnorm=2.243, loss_scale=4, train_wall=11, gb_free=2.8, wall=120813
2021-06-20 04:12:29 | INFO | train_inner | epoch 004: 1579 / 3002 loss=2.542, ppl=5.82, wps=5822.2, ups=0.09, wpb=64826, bsz=128, num_updates=10522, lr=9.99238e-05, gnorm=2.001, loss_scale=4, train_wall=11, gb_free=2.8, wall=120824
2021-06-20 04:12:41 | INFO | train_inner | epoch 004: 1580 / 3002 loss=2.431, ppl=5.39, wps=5823.4, ups=0.09, wpb=64889, bsz=128, num_updates=10523, lr=9.99238e-05, gnorm=2.058, loss_scale=4, train_wall=11, gb_free=2.8, wall=120835
2021-06-20 04:12:52 | INFO | train_inner | epoch 004: 1581 / 3002 loss=2.404, ppl=5.29, wps=5829.3, ups=0.09, wpb=64831, bsz=128, num_updates=10524, lr=9.99238e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=120846
2021-06-20 04:13:03 | INFO | train_inner | epoch 004: 1582 / 3002 loss=2.5, ppl=5.66, wps=5835.4, ups=0.09, wpb=64831, bsz=128, num_updates=10525, lr=9.99238e-05, gnorm=2.293, loss_scale=4, train_wall=11, gb_free=2.8, wall=120857
2021-06-20 04:13:14 | INFO | train_inner | epoch 004: 1583 / 3002 loss=2.48, ppl=5.58, wps=6012.7, ups=0.09, wpb=64877, bsz=128, num_updates=10526, lr=9.99238e-05, gnorm=2.046, loss_scale=4, train_wall=10, gb_free=2.8, wall=120868
2021-06-20 04:13:25 | INFO | train_inner | epoch 004: 1584 / 3002 loss=2.635, ppl=6.21, wps=5884.4, ups=0.09, wpb=64829, bsz=128, num_updates=10527, lr=9.99238e-05, gnorm=2.086, loss_scale=4, train_wall=11, gb_free=2.8, wall=120879
2021-06-20 04:13:36 | INFO | train_inner | epoch 004: 1585 / 3002 loss=2.621, ppl=6.15, wps=5852.4, ups=0.09, wpb=64856, bsz=128, num_updates=10528, lr=9.99238e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=120890
2021-06-20 04:13:47 | INFO | train_inner | epoch 004: 1586 / 3002 loss=2.807, ppl=7, wps=5812, ups=0.09, wpb=64846, bsz=128, num_updates=10529, lr=9.99238e-05, gnorm=2.167, loss_scale=4, train_wall=11, gb_free=2.8, wall=120901
2021-06-20 04:13:58 | INFO | train_inner | epoch 004: 1587 / 3002 loss=2.551, ppl=5.86, wps=5850.9, ups=0.09, wpb=64847, bsz=128, num_updates=10530, lr=9.99238e-05, gnorm=4.521, loss_scale=4, train_wall=11, gb_free=2.8, wall=120912
2021-06-20 04:14:09 | INFO | train_inner | epoch 004: 1588 / 3002 loss=2.619, ppl=6.15, wps=5860.9, ups=0.09, wpb=64861, bsz=128, num_updates=10531, lr=9.99237e-05, gnorm=2.674, loss_scale=4, train_wall=11, gb_free=2.8, wall=120923
2021-06-20 04:14:20 | INFO | train_inner | epoch 004: 1589 / 3002 loss=2.542, ppl=5.82, wps=5882.5, ups=0.09, wpb=64843, bsz=128, num_updates=10532, lr=9.99237e-05, gnorm=2.074, loss_scale=4, train_wall=11, gb_free=2.8, wall=120934
2021-06-20 04:14:31 | INFO | train_inner | epoch 004: 1590 / 3002 loss=2.484, ppl=5.6, wps=5838, ups=0.09, wpb=64838, bsz=128, num_updates=10533, lr=9.99237e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=120945
2021-06-20 04:14:42 | INFO | train_inner | epoch 004: 1591 / 3002 loss=2.629, ppl=6.19, wps=5952.2, ups=0.09, wpb=64804, bsz=128, num_updates=10534, lr=9.99237e-05, gnorm=2.074, loss_scale=4, train_wall=10, gb_free=2.8, wall=120956
2021-06-20 04:14:53 | INFO | train_inner | epoch 004: 1592 / 3002 loss=2.56, ppl=5.9, wps=5902.3, ups=0.09, wpb=64949, bsz=128, num_updates=10535, lr=9.99237e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=120967
2021-06-20 04:15:04 | INFO | train_inner | epoch 004: 1593 / 3002 loss=2.794, ppl=6.94, wps=5873.9, ups=0.09, wpb=64773, bsz=128, num_updates=10536, lr=9.99237e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=120978
2021-06-20 04:15:15 | INFO | train_inner | epoch 004: 1594 / 3002 loss=2.713, ppl=6.56, wps=5749.2, ups=0.09, wpb=64853, bsz=128, num_updates=10537, lr=9.99237e-05, gnorm=2.092, loss_scale=4, train_wall=11, gb_free=2.8, wall=120990
2021-06-20 04:15:27 | INFO | train_inner | epoch 004: 1595 / 3002 loss=2.446, ppl=5.45, wps=5798.5, ups=0.09, wpb=64839, bsz=128, num_updates=10538, lr=9.99237e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=121001
2021-06-20 04:15:37 | INFO | train_inner | epoch 004: 1596 / 3002 loss=2.573, ppl=5.95, wps=5912.8, ups=0.09, wpb=64801, bsz=128, num_updates=10539, lr=9.99237e-05, gnorm=2.247, loss_scale=4, train_wall=10, gb_free=2.8, wall=121012
2021-06-20 04:15:49 | INFO | train_inner | epoch 004: 1597 / 3002 loss=2.56, ppl=5.9, wps=5861.5, ups=0.09, wpb=64871, bsz=128, num_updates=10540, lr=9.99237e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=121023
2021-06-20 04:15:59 | INFO | train_inner | epoch 004: 1598 / 3002 loss=2.594, ppl=6.04, wps=5972.4, ups=0.09, wpb=64955, bsz=128, num_updates=10541, lr=9.99237e-05, gnorm=1.998, loss_scale=4, train_wall=10, gb_free=2.8, wall=121034
2021-06-20 04:16:10 | INFO | train_inner | epoch 004: 1599 / 3002 loss=2.55, ppl=5.86, wps=5928.8, ups=0.09, wpb=64647, bsz=128, num_updates=10542, lr=9.99237e-05, gnorm=2.206, loss_scale=4, train_wall=10, gb_free=2.8, wall=121045
2021-06-20 04:16:22 | INFO | train_inner | epoch 004: 1600 / 3002 loss=2.489, ppl=5.61, wps=5788, ups=0.09, wpb=64861, bsz=128, num_updates=10543, lr=9.99236e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=121056
2021-06-20 04:16:32 | INFO | train_inner | epoch 004: 1601 / 3002 loss=2.525, ppl=5.76, wps=5971.4, ups=0.09, wpb=64896, bsz=128, num_updates=10544, lr=9.99236e-05, gnorm=2.101, loss_scale=4, train_wall=10, gb_free=2.8, wall=121067
2021-06-20 04:16:43 | INFO | train_inner | epoch 004: 1602 / 3002 loss=2.669, ppl=6.36, wps=5913, ups=0.09, wpb=64838, bsz=128, num_updates=10545, lr=9.99236e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=121078
2021-06-20 04:16:55 | INFO | train_inner | epoch 004: 1603 / 3002 loss=2.497, ppl=5.65, wps=5815.7, ups=0.09, wpb=64843, bsz=128, num_updates=10546, lr=9.99236e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=121089
2021-06-20 04:17:06 | INFO | train_inner | epoch 004: 1604 / 3002 loss=2.62, ppl=6.15, wps=5817.5, ups=0.09, wpb=64776, bsz=128, num_updates=10547, lr=9.99236e-05, gnorm=2.042, loss_scale=4, train_wall=11, gb_free=2.8, wall=121100
2021-06-20 04:17:17 | INFO | train_inner | epoch 004: 1605 / 3002 loss=2.496, ppl=5.64, wps=5842.3, ups=0.09, wpb=64890, bsz=128, num_updates=10548, lr=9.99236e-05, gnorm=2.561, loss_scale=4, train_wall=11, gb_free=2.8, wall=121111
2021-06-20 04:17:28 | INFO | train_inner | epoch 004: 1606 / 3002 loss=2.429, ppl=5.38, wps=5991.3, ups=0.09, wpb=64796, bsz=128, num_updates=10549, lr=9.99236e-05, gnorm=2.033, loss_scale=4, train_wall=10, gb_free=2.8, wall=121122
2021-06-20 04:17:39 | INFO | train_inner | epoch 004: 1607 / 3002 loss=2.396, ppl=5.26, wps=5894.8, ups=0.09, wpb=64912, bsz=128, num_updates=10550, lr=9.99236e-05, gnorm=2.208, loss_scale=4, train_wall=11, gb_free=2.8, wall=121133
2021-06-20 04:17:50 | INFO | train_inner | epoch 004: 1608 / 3002 loss=2.651, ppl=6.28, wps=5872.6, ups=0.09, wpb=64783, bsz=128, num_updates=10551, lr=9.99236e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=121144
2021-06-20 04:18:01 | INFO | train_inner | epoch 004: 1609 / 3002 loss=2.543, ppl=5.83, wps=5919.6, ups=0.09, wpb=64883, bsz=128, num_updates=10552, lr=9.99236e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=121155
2021-06-20 04:18:12 | INFO | train_inner | epoch 004: 1610 / 3002 loss=2.49, ppl=5.62, wps=5910.6, ups=0.09, wpb=64856, bsz=128, num_updates=10553, lr=9.99236e-05, gnorm=2.011, loss_scale=4, train_wall=10, gb_free=2.8, wall=121166
2021-06-20 04:18:22 | INFO | train_inner | epoch 004: 1611 / 3002 loss=2.562, ppl=5.9, wps=5963.2, ups=0.09, wpb=64879, bsz=128, num_updates=10554, lr=9.99236e-05, gnorm=2.048, loss_scale=4, train_wall=10, gb_free=2.8, wall=121177
2021-06-20 04:18:34 | INFO | train_inner | epoch 004: 1612 / 3002 loss=2.554, ppl=5.87, wps=5792.1, ups=0.09, wpb=64772, bsz=128, num_updates=10555, lr=9.99236e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=121188
2021-06-20 04:18:45 | INFO | train_inner | epoch 004: 1613 / 3002 loss=2.558, ppl=5.89, wps=5843.9, ups=0.09, wpb=64806, bsz=128, num_updates=10556, lr=9.99235e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=121199
2021-06-20 04:18:56 | INFO | train_inner | epoch 004: 1614 / 3002 loss=2.524, ppl=5.75, wps=5823.5, ups=0.09, wpb=64818, bsz=128, num_updates=10557, lr=9.99235e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=121210
2021-06-20 04:19:07 | INFO | train_inner | epoch 004: 1615 / 3002 loss=2.593, ppl=6.03, wps=5841.9, ups=0.09, wpb=64792, bsz=128, num_updates=10558, lr=9.99235e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=121221
2021-06-20 04:19:18 | INFO | train_inner | epoch 004: 1616 / 3002 loss=2.502, ppl=5.66, wps=5750.7, ups=0.09, wpb=64829, bsz=128, num_updates=10559, lr=9.99235e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=121233
2021-06-20 04:19:29 | INFO | train_inner | epoch 004: 1617 / 3002 loss=2.712, ppl=6.55, wps=5849, ups=0.09, wpb=64890, bsz=128, num_updates=10560, lr=9.99235e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=121244
2021-06-20 04:19:40 | INFO | train_inner | epoch 004: 1618 / 3002 loss=2.615, ppl=6.13, wps=5796.5, ups=0.09, wpb=64823, bsz=128, num_updates=10561, lr=9.99235e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=121255
2021-06-20 04:19:52 | INFO | train_inner | epoch 004: 1619 / 3002 loss=2.439, ppl=5.42, wps=5717.7, ups=0.09, wpb=64814, bsz=128, num_updates=10562, lr=9.99235e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=121266
2021-06-20 04:20:03 | INFO | train_inner | epoch 004: 1620 / 3002 loss=2.54, ppl=5.82, wps=5949, ups=0.09, wpb=64883, bsz=128, num_updates=10563, lr=9.99235e-05, gnorm=2.091, loss_scale=4, train_wall=10, gb_free=2.8, wall=121277
2021-06-20 04:20:14 | INFO | train_inner | epoch 004: 1621 / 3002 loss=2.465, ppl=5.52, wps=5789.9, ups=0.09, wpb=64818, bsz=128, num_updates=10564, lr=9.99235e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=121288
2021-06-20 04:20:25 | INFO | train_inner | epoch 004: 1622 / 3002 loss=2.499, ppl=5.65, wps=5933, ups=0.09, wpb=64832, bsz=128, num_updates=10565, lr=9.99235e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=121299
2021-06-20 04:20:36 | INFO | train_inner | epoch 004: 1623 / 3002 loss=2.636, ppl=6.21, wps=5744.5, ups=0.09, wpb=64783, bsz=128, num_updates=10566, lr=9.99235e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=121310
2021-06-20 04:20:47 | INFO | train_inner | epoch 004: 1624 / 3002 loss=2.664, ppl=6.34, wps=5776.3, ups=0.09, wpb=64853, bsz=128, num_updates=10567, lr=9.99235e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=121322
2021-06-20 04:20:58 | INFO | train_inner | epoch 004: 1625 / 3002 loss=2.528, ppl=5.77, wps=5838.2, ups=0.09, wpb=64817, bsz=128, num_updates=10568, lr=9.99234e-05, gnorm=2.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=121333
2021-06-20 04:21:10 | INFO | train_inner | epoch 004: 1626 / 3002 loss=2.484, ppl=5.59, wps=5679.6, ups=0.09, wpb=64763, bsz=128, num_updates=10569, lr=9.99234e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=121344
2021-06-20 04:21:21 | INFO | train_inner | epoch 004: 1627 / 3002 loss=2.544, ppl=5.83, wps=5867.7, ups=0.09, wpb=64859, bsz=128, num_updates=10570, lr=9.99234e-05, gnorm=2.112, loss_scale=4, train_wall=11, gb_free=2.8, wall=121355
2021-06-20 04:21:32 | INFO | train_inner | epoch 004: 1628 / 3002 loss=2.777, ppl=6.85, wps=5742.4, ups=0.09, wpb=64819, bsz=128, num_updates=10571, lr=9.99234e-05, gnorm=2.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=121367
2021-06-20 04:21:43 | INFO | train_inner | epoch 004: 1629 / 3002 loss=2.563, ppl=5.91, wps=5914.2, ups=0.09, wpb=64848, bsz=128, num_updates=10572, lr=9.99234e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=121377
2021-06-20 04:21:54 | INFO | train_inner | epoch 004: 1630 / 3002 loss=2.456, ppl=5.49, wps=5920.7, ups=0.09, wpb=64804, bsz=128, num_updates=10573, lr=9.99234e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=121388
2021-06-20 04:22:05 | INFO | train_inner | epoch 004: 1631 / 3002 loss=2.485, ppl=5.6, wps=5806.8, ups=0.09, wpb=64837, bsz=128, num_updates=10574, lr=9.99234e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=121400
2021-06-20 04:22:16 | INFO | train_inner | epoch 004: 1632 / 3002 loss=2.397, ppl=5.27, wps=5865, ups=0.09, wpb=64882, bsz=128, num_updates=10575, lr=9.99234e-05, gnorm=2.382, loss_scale=4, train_wall=11, gb_free=2.8, wall=121411
2021-06-20 04:22:28 | INFO | train_inner | epoch 004: 1633 / 3002 loss=2.594, ppl=6.04, wps=5687.8, ups=0.09, wpb=64740, bsz=128, num_updates=10576, lr=9.99234e-05, gnorm=2.176, loss_scale=4, train_wall=11, gb_free=2.8, wall=121422
2021-06-20 04:22:39 | INFO | train_inner | epoch 004: 1634 / 3002 loss=2.657, ppl=6.31, wps=5903, ups=0.09, wpb=64852, bsz=128, num_updates=10577, lr=9.99234e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=121433
2021-06-20 04:22:50 | INFO | train_inner | epoch 004: 1635 / 3002 loss=2.61, ppl=6.11, wps=5930.2, ups=0.09, wpb=64926, bsz=128, num_updates=10578, lr=9.99234e-05, gnorm=2.049, loss_scale=4, train_wall=10, gb_free=2.8, wall=121444
2021-06-20 04:23:01 | INFO | train_inner | epoch 004: 1636 / 3002 loss=2.674, ppl=6.38, wps=5926, ups=0.09, wpb=64887, bsz=128, num_updates=10579, lr=9.99234e-05, gnorm=2.351, loss_scale=4, train_wall=11, gb_free=2.8, wall=121455
2021-06-20 04:23:12 | INFO | train_inner | epoch 004: 1637 / 3002 loss=2.674, ppl=6.38, wps=5664.9, ups=0.09, wpb=64768, bsz=128, num_updates=10580, lr=9.99234e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=121466
2021-06-20 04:23:23 | INFO | train_inner | epoch 004: 1638 / 3002 loss=2.537, ppl=5.8, wps=5987, ups=0.09, wpb=64807, bsz=128, num_updates=10581, lr=9.99233e-05, gnorm=2.037, loss_scale=8, train_wall=10, gb_free=2.8, wall=121477
2021-06-20 04:23:34 | INFO | train_inner | epoch 004: 1639 / 3002 loss=2.585, ppl=6, wps=5818.2, ups=0.09, wpb=64764, bsz=128, num_updates=10582, lr=9.99233e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=121488
2021-06-20 04:23:45 | INFO | train_inner | epoch 004: 1640 / 3002 loss=2.437, ppl=5.42, wps=5913.9, ups=0.09, wpb=64814, bsz=128, num_updates=10583, lr=9.99233e-05, gnorm=2.013, loss_scale=8, train_wall=10, gb_free=2.8, wall=121499
2021-06-20 04:23:56 | INFO | train_inner | epoch 004: 1641 / 3002 loss=2.458, ppl=5.49, wps=5874.4, ups=0.09, wpb=64806, bsz=128, num_updates=10584, lr=9.99233e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=121510
2021-06-20 04:24:07 | INFO | train_inner | epoch 004: 1642 / 3002 loss=2.558, ppl=5.89, wps=5786.6, ups=0.09, wpb=64823, bsz=128, num_updates=10585, lr=9.99233e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=121522
2021-06-20 04:24:18 | INFO | train_inner | epoch 004: 1643 / 3002 loss=2.361, ppl=5.14, wps=5894.3, ups=0.09, wpb=64888, bsz=128, num_updates=10586, lr=9.99233e-05, gnorm=2.08, loss_scale=8, train_wall=11, gb_free=2.8, wall=121533
2021-06-20 04:24:29 | INFO | train_inner | epoch 004: 1644 / 3002 loss=2.584, ppl=6, wps=5844.3, ups=0.09, wpb=64786, bsz=128, num_updates=10587, lr=9.99233e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=121544
2021-06-20 04:24:40 | INFO | train_inner | epoch 004: 1645 / 3002 loss=2.676, ppl=6.39, wps=5809.5, ups=0.09, wpb=64824, bsz=128, num_updates=10588, lr=9.99233e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=121555
2021-06-20 04:24:52 | INFO | train_inner | epoch 004: 1646 / 3002 loss=2.568, ppl=5.93, wps=5837.5, ups=0.09, wpb=64912, bsz=128, num_updates=10589, lr=9.99233e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=121566
2021-06-20 04:25:03 | INFO | train_inner | epoch 004: 1647 / 3002 loss=2.571, ppl=5.94, wps=5875.5, ups=0.09, wpb=64923, bsz=128, num_updates=10590, lr=9.99233e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=121577
2021-06-20 04:25:14 | INFO | train_inner | epoch 004: 1648 / 3002 loss=2.617, ppl=6.13, wps=5925, ups=0.09, wpb=64816, bsz=128, num_updates=10591, lr=9.99233e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=121588
2021-06-20 04:25:25 | INFO | train_inner | epoch 004: 1649 / 3002 loss=2.476, ppl=5.56, wps=5904, ups=0.09, wpb=64860, bsz=128, num_updates=10592, lr=9.99233e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=121599
2021-06-20 04:25:35 | INFO | train_inner | epoch 004: 1650 / 3002 loss=2.603, ppl=6.08, wps=5927.7, ups=0.09, wpb=64810, bsz=128, num_updates=10593, lr=9.99232e-05, gnorm=2.047, loss_scale=8, train_wall=10, gb_free=2.8, wall=121610
2021-06-20 04:25:47 | INFO | train_inner | epoch 004: 1651 / 3002 loss=2.549, ppl=5.85, wps=5851.1, ups=0.09, wpb=64736, bsz=128, num_updates=10594, lr=9.99232e-05, gnorm=2.1, loss_scale=8, train_wall=11, gb_free=2.8, wall=121621
2021-06-20 04:25:58 | INFO | train_inner | epoch 004: 1652 / 3002 loss=2.441, ppl=5.43, wps=5822, ups=0.09, wpb=64814, bsz=128, num_updates=10595, lr=9.99232e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=121632
2021-06-20 04:26:09 | INFO | train_inner | epoch 004: 1653 / 3002 loss=2.539, ppl=5.81, wps=5928.2, ups=0.09, wpb=64854, bsz=128, num_updates=10596, lr=9.99232e-05, gnorm=2.059, loss_scale=8, train_wall=10, gb_free=2.8, wall=121643
2021-06-20 04:26:19 | INFO | train_inner | epoch 004: 1654 / 3002 loss=2.503, ppl=5.67, wps=5978.6, ups=0.09, wpb=64824, bsz=128, num_updates=10597, lr=9.99232e-05, gnorm=2.048, loss_scale=8, train_wall=10, gb_free=2.8, wall=121654
2021-06-20 04:26:31 | INFO | train_inner | epoch 004: 1655 / 3002 loss=2.499, ppl=5.65, wps=5780.5, ups=0.09, wpb=64798, bsz=128, num_updates=10598, lr=9.99232e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=121665
2021-06-20 04:26:42 | INFO | train_inner | epoch 004: 1656 / 3002 loss=2.446, ppl=5.45, wps=5879, ups=0.09, wpb=64865, bsz=128, num_updates=10599, lr=9.99232e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=121676
2021-06-20 04:26:53 | INFO | train_inner | epoch 004: 1657 / 3002 loss=2.565, ppl=5.92, wps=5926.8, ups=0.09, wpb=64804, bsz=128, num_updates=10600, lr=9.99232e-05, gnorm=2.09, loss_scale=8, train_wall=10, gb_free=2.8, wall=121687
2021-06-20 04:27:04 | INFO | train_inner | epoch 004: 1658 / 3002 loss=2.459, ppl=5.5, wps=5854.2, ups=0.09, wpb=64843, bsz=128, num_updates=10601, lr=9.99232e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=121698
2021-06-20 04:27:15 | INFO | train_inner | epoch 004: 1659 / 3002 loss=2.609, ppl=6.1, wps=5742.1, ups=0.09, wpb=64758, bsz=128, num_updates=10602, lr=9.99232e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=121709
2021-06-20 04:27:26 | INFO | train_inner | epoch 004: 1660 / 3002 loss=2.527, ppl=5.76, wps=5830.3, ups=0.09, wpb=64796, bsz=128, num_updates=10603, lr=9.99232e-05, gnorm=2.079, loss_scale=8, train_wall=11, gb_free=2.8, wall=121720
2021-06-20 04:27:37 | INFO | train_inner | epoch 004: 1661 / 3002 loss=2.502, ppl=5.66, wps=5910.2, ups=0.09, wpb=64844, bsz=128, num_updates=10604, lr=9.99232e-05, gnorm=2.055, loss_scale=8, train_wall=10, gb_free=2.8, wall=121731
2021-06-20 04:27:48 | INFO | train_inner | epoch 004: 1662 / 3002 loss=2.481, ppl=5.58, wps=5891.8, ups=0.09, wpb=64829, bsz=128, num_updates=10605, lr=9.99232e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=121742
2021-06-20 04:27:59 | INFO | train_inner | epoch 004: 1663 / 3002 loss=2.775, ppl=6.84, wps=5897.4, ups=0.09, wpb=64903, bsz=128, num_updates=10606, lr=9.99231e-05, gnorm=2.761, loss_scale=8, train_wall=11, gb_free=2.8, wall=121753
2021-06-20 04:28:10 | INFO | train_inner | epoch 004: 1664 / 3002 loss=2.408, ppl=5.31, wps=5973.9, ups=0.09, wpb=64770, bsz=128, num_updates=10607, lr=9.99231e-05, gnorm=2.139, loss_scale=8, train_wall=10, gb_free=2.8, wall=121764
2021-06-20 04:28:21 | INFO | train_inner | epoch 004: 1665 / 3002 loss=2.587, ppl=6.01, wps=5813.5, ups=0.09, wpb=64820, bsz=128, num_updates=10608, lr=9.99231e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=121775
2021-06-20 04:28:32 | INFO | train_inner | epoch 004: 1666 / 3002 loss=2.454, ppl=5.48, wps=5776.8, ups=0.09, wpb=64832, bsz=128, num_updates=10609, lr=9.99231e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=121787
2021-06-20 04:28:43 | INFO | train_inner | epoch 004: 1667 / 3002 loss=2.526, ppl=5.76, wps=5908.7, ups=0.09, wpb=64836, bsz=128, num_updates=10610, lr=9.99231e-05, gnorm=2.008, loss_scale=8, train_wall=10, gb_free=2.8, wall=121798
2021-06-20 04:28:54 | INFO | train_inner | epoch 004: 1668 / 3002 loss=2.573, ppl=5.95, wps=5771.1, ups=0.09, wpb=64784, bsz=128, num_updates=10611, lr=9.99231e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=121809
2021-06-20 04:29:06 | INFO | train_inner | epoch 004: 1669 / 3002 loss=2.579, ppl=5.98, wps=5847.9, ups=0.09, wpb=64835, bsz=128, num_updates=10612, lr=9.99231e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=121820
2021-06-20 04:29:17 | INFO | train_inner | epoch 004: 1670 / 3002 loss=2.568, ppl=5.93, wps=5813.1, ups=0.09, wpb=64833, bsz=128, num_updates=10613, lr=9.99231e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=121831
2021-06-20 04:29:28 | INFO | train_inner | epoch 004: 1671 / 3002 loss=2.528, ppl=5.77, wps=5882.5, ups=0.09, wpb=64804, bsz=128, num_updates=10614, lr=9.99231e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=121842
2021-06-20 04:29:39 | INFO | train_inner | epoch 004: 1672 / 3002 loss=2.504, ppl=5.67, wps=5725.3, ups=0.09, wpb=64797, bsz=128, num_updates=10615, lr=9.99231e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=121853
2021-06-20 04:29:50 | INFO | train_inner | epoch 004: 1673 / 3002 loss=2.632, ppl=6.2, wps=5826.3, ups=0.09, wpb=64863, bsz=128, num_updates=10616, lr=9.99231e-05, gnorm=2.059, loss_scale=8, train_wall=11, gb_free=2.8, wall=121865
2021-06-20 04:30:01 | INFO | train_inner | epoch 004: 1674 / 3002 loss=2.657, ppl=6.31, wps=5894.2, ups=0.09, wpb=64858, bsz=128, num_updates=10617, lr=9.99231e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=121876
2021-06-20 04:30:12 | INFO | train_inner | epoch 004: 1675 / 3002 loss=2.583, ppl=5.99, wps=5830.1, ups=0.09, wpb=64745, bsz=128, num_updates=10618, lr=9.9923e-05, gnorm=4.538, loss_scale=8, train_wall=11, gb_free=2.8, wall=121887
2021-06-20 04:30:23 | INFO | train_inner | epoch 004: 1676 / 3002 loss=2.592, ppl=6.03, wps=5943.9, ups=0.09, wpb=64854, bsz=128, num_updates=10619, lr=9.9923e-05, gnorm=2.145, loss_scale=8, train_wall=10, gb_free=2.8, wall=121898
2021-06-20 04:30:34 | INFO | train_inner | epoch 004: 1677 / 3002 loss=2.437, ppl=5.42, wps=5850.3, ups=0.09, wpb=64818, bsz=128, num_updates=10620, lr=9.9923e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=121909
2021-06-20 04:30:45 | INFO | train_inner | epoch 004: 1678 / 3002 loss=2.582, ppl=5.99, wps=5884, ups=0.09, wpb=64867, bsz=128, num_updates=10621, lr=9.9923e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=121920
2021-06-20 04:30:56 | INFO | train_inner | epoch 004: 1679 / 3002 loss=2.664, ppl=6.34, wps=5940.3, ups=0.09, wpb=64842, bsz=128, num_updates=10622, lr=9.9923e-05, gnorm=3.402, loss_scale=8, train_wall=10, gb_free=2.8, wall=121931
2021-06-20 04:31:07 | INFO | train_inner | epoch 004: 1680 / 3002 loss=2.68, ppl=6.41, wps=5775.5, ups=0.09, wpb=64757, bsz=128, num_updates=10623, lr=9.9923e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=121942
2021-06-20 04:31:18 | INFO | train_inner | epoch 004: 1681 / 3002 loss=2.632, ppl=6.2, wps=5912.2, ups=0.09, wpb=64880, bsz=128, num_updates=10624, lr=9.9923e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=121953
2021-06-20 04:31:30 | INFO | train_inner | epoch 004: 1682 / 3002 loss=2.74, ppl=6.68, wps=5828.8, ups=0.09, wpb=64898, bsz=128, num_updates=10625, lr=9.9923e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=121964
2021-06-20 04:31:41 | INFO | train_inner | epoch 004: 1683 / 3002 loss=2.507, ppl=5.69, wps=5808.4, ups=0.09, wpb=64885, bsz=128, num_updates=10626, lr=9.9923e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=121975
2021-06-20 04:31:52 | INFO | train_inner | epoch 004: 1684 / 3002 loss=2.567, ppl=5.93, wps=5851.3, ups=0.09, wpb=64750, bsz=128, num_updates=10627, lr=9.9923e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=121986
2021-06-20 04:32:03 | INFO | train_inner | epoch 004: 1685 / 3002 loss=2.581, ppl=5.98, wps=5905.6, ups=0.09, wpb=64919, bsz=128, num_updates=10628, lr=9.9923e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=121997
2021-06-20 04:32:14 | INFO | train_inner | epoch 004: 1686 / 3002 loss=2.478, ppl=5.57, wps=5870.6, ups=0.09, wpb=64791, bsz=128, num_updates=10629, lr=9.9923e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=122008
2021-06-20 04:32:25 | INFO | train_inner | epoch 004: 1687 / 3002 loss=2.633, ppl=6.2, wps=5841.9, ups=0.09, wpb=64768, bsz=128, num_updates=10630, lr=9.9923e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=122019
2021-06-20 04:32:36 | INFO | train_inner | epoch 004: 1688 / 3002 loss=2.627, ppl=6.18, wps=5957.1, ups=0.09, wpb=64842, bsz=128, num_updates=10631, lr=9.99229e-05, gnorm=1.999, loss_scale=8, train_wall=10, gb_free=2.8, wall=122030
2021-06-20 04:32:47 | INFO | train_inner | epoch 004: 1689 / 3002 loss=2.569, ppl=5.93, wps=5862.2, ups=0.09, wpb=64801, bsz=128, num_updates=10632, lr=9.99229e-05, gnorm=2.135, loss_scale=8, train_wall=11, gb_free=2.8, wall=122041
2021-06-20 04:32:58 | INFO | train_inner | epoch 004: 1690 / 3002 loss=2.44, ppl=5.43, wps=5879, ups=0.09, wpb=64774, bsz=128, num_updates=10633, lr=9.99229e-05, gnorm=2.108, loss_scale=8, train_wall=11, gb_free=2.8, wall=122052
2021-06-20 04:33:09 | INFO | train_inner | epoch 004: 1691 / 3002 loss=2.601, ppl=6.07, wps=5729.9, ups=0.09, wpb=64791, bsz=128, num_updates=10634, lr=9.99229e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=122064
2021-06-20 04:33:20 | INFO | train_inner | epoch 004: 1692 / 3002 loss=2.502, ppl=5.66, wps=5936.6, ups=0.09, wpb=64844, bsz=128, num_updates=10635, lr=9.99229e-05, gnorm=2.156, loss_scale=8, train_wall=10, gb_free=2.8, wall=122074
2021-06-20 04:33:31 | INFO | train_inner | epoch 004: 1693 / 3002 loss=2.606, ppl=6.09, wps=5824.1, ups=0.09, wpb=64797, bsz=128, num_updates=10636, lr=9.99229e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=122086
2021-06-20 04:33:42 | INFO | train_inner | epoch 004: 1694 / 3002 loss=2.531, ppl=5.78, wps=5863, ups=0.09, wpb=64838, bsz=128, num_updates=10637, lr=9.99229e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=122097
2021-06-20 04:33:53 | INFO | train_inner | epoch 004: 1695 / 3002 loss=2.737, ppl=6.67, wps=5806.4, ups=0.09, wpb=64808, bsz=128, num_updates=10638, lr=9.99229e-05, gnorm=2.09, loss_scale=8, train_wall=11, gb_free=2.8, wall=122108
2021-06-20 04:34:05 | INFO | train_inner | epoch 004: 1696 / 3002 loss=2.684, ppl=6.42, wps=5818.1, ups=0.09, wpb=64874, bsz=128, num_updates=10639, lr=9.99229e-05, gnorm=2.057, loss_scale=8, train_wall=11, gb_free=2.8, wall=122119
2021-06-20 04:34:16 | INFO | train_inner | epoch 004: 1697 / 3002 loss=2.61, ppl=6.11, wps=5672.3, ups=0.09, wpb=64756, bsz=128, num_updates=10640, lr=9.99229e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=122130
2021-06-20 04:34:27 | INFO | train_inner | epoch 004: 1698 / 3002 loss=2.404, ppl=5.29, wps=5771.3, ups=0.09, wpb=64808, bsz=128, num_updates=10641, lr=9.99229e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=122142
2021-06-20 04:34:38 | INFO | train_inner | epoch 004: 1699 / 3002 loss=2.508, ppl=5.69, wps=5834, ups=0.09, wpb=64801, bsz=128, num_updates=10642, lr=9.99229e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=122153
2021-06-20 04:34:49 | INFO | train_inner | epoch 004: 1700 / 3002 loss=2.631, ppl=6.19, wps=5827.4, ups=0.09, wpb=64733, bsz=128, num_updates=10643, lr=9.99228e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=122164
2021-06-20 04:35:00 | INFO | train_inner | epoch 004: 1701 / 3002 loss=2.513, ppl=5.71, wps=5929.4, ups=0.09, wpb=64875, bsz=128, num_updates=10644, lr=9.99228e-05, gnorm=2.039, loss_scale=8, train_wall=10, gb_free=2.8, wall=122175
2021-06-20 04:35:11 | INFO | train_inner | epoch 004: 1702 / 3002 loss=2.65, ppl=6.28, wps=5864.2, ups=0.09, wpb=64834, bsz=128, num_updates=10645, lr=9.99228e-05, gnorm=2.094, loss_scale=8, train_wall=11, gb_free=2.8, wall=122186
2021-06-20 04:35:23 | INFO | train_inner | epoch 004: 1703 / 3002 loss=2.592, ppl=6.03, wps=5716.4, ups=0.09, wpb=64806, bsz=128, num_updates=10646, lr=9.99228e-05, gnorm=2.069, loss_scale=8, train_wall=11, gb_free=2.8, wall=122197
2021-06-20 04:35:34 | INFO | train_inner | epoch 004: 1704 / 3002 loss=2.642, ppl=6.24, wps=5770.1, ups=0.09, wpb=64893, bsz=128, num_updates=10647, lr=9.99228e-05, gnorm=3.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=122208
2021-06-20 04:35:45 | INFO | train_inner | epoch 004: 1705 / 3002 loss=2.535, ppl=5.8, wps=5849.7, ups=0.09, wpb=64847, bsz=128, num_updates=10648, lr=9.99228e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=122219
2021-06-20 04:35:56 | INFO | train_inner | epoch 004: 1706 / 3002 loss=2.593, ppl=6.03, wps=5832.7, ups=0.09, wpb=64787, bsz=128, num_updates=10649, lr=9.99228e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=122231
2021-06-20 04:36:07 | INFO | train_inner | epoch 004: 1707 / 3002 loss=2.421, ppl=5.36, wps=5933.5, ups=0.09, wpb=64884, bsz=128, num_updates=10650, lr=9.99228e-05, gnorm=2.052, loss_scale=8, train_wall=10, gb_free=2.8, wall=122242
2021-06-20 04:36:18 | INFO | train_inner | epoch 004: 1708 / 3002 loss=2.624, ppl=6.16, wps=5930.6, ups=0.09, wpb=64788, bsz=128, num_updates=10651, lr=9.99228e-05, gnorm=2.128, loss_scale=8, train_wall=10, gb_free=2.8, wall=122252
2021-06-20 04:36:29 | INFO | train_inner | epoch 004: 1709 / 3002 loss=2.653, ppl=6.29, wps=5779.8, ups=0.09, wpb=64819, bsz=128, num_updates=10652, lr=9.99228e-05, gnorm=2.043, loss_scale=8, train_wall=11, gb_free=2.8, wall=122264
2021-06-20 04:36:40 | INFO | train_inner | epoch 004: 1710 / 3002 loss=2.457, ppl=5.49, wps=5918.8, ups=0.09, wpb=64802, bsz=128, num_updates=10653, lr=9.99228e-05, gnorm=1.958, loss_scale=8, train_wall=10, gb_free=2.8, wall=122275
2021-06-20 04:36:51 | INFO | train_inner | epoch 004: 1711 / 3002 loss=2.63, ppl=6.19, wps=5803, ups=0.09, wpb=64800, bsz=128, num_updates=10654, lr=9.99228e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=122286
2021-06-20 04:37:02 | INFO | train_inner | epoch 004: 1712 / 3002 loss=2.553, ppl=5.87, wps=5962.3, ups=0.09, wpb=64854, bsz=128, num_updates=10655, lr=9.99228e-05, gnorm=2.008, loss_scale=8, train_wall=10, gb_free=2.8, wall=122297
2021-06-20 04:37:13 | INFO | train_inner | epoch 004: 1713 / 3002 loss=2.692, ppl=6.46, wps=5797.8, ups=0.09, wpb=64827, bsz=128, num_updates=10656, lr=9.99227e-05, gnorm=2.742, loss_scale=8, train_wall=11, gb_free=2.8, wall=122308
2021-06-20 04:37:24 | INFO | train_inner | epoch 004: 1714 / 3002 loss=2.642, ppl=6.24, wps=5887.6, ups=0.09, wpb=64777, bsz=128, num_updates=10657, lr=9.99227e-05, gnorm=2.14, loss_scale=8, train_wall=11, gb_free=2.8, wall=122319
2021-06-20 04:37:36 | INFO | train_inner | epoch 004: 1715 / 3002 loss=2.386, ppl=5.23, wps=5812.7, ups=0.09, wpb=64831, bsz=128, num_updates=10658, lr=9.99227e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=122330
2021-06-20 04:37:47 | INFO | train_inner | epoch 004: 1716 / 3002 loss=2.704, ppl=6.52, wps=5903.6, ups=0.09, wpb=64808, bsz=128, num_updates=10659, lr=9.99227e-05, gnorm=2.117, loss_scale=8, train_wall=10, gb_free=2.8, wall=122341
2021-06-20 04:37:58 | INFO | train_inner | epoch 004: 1717 / 3002 loss=2.569, ppl=5.93, wps=5856.7, ups=0.09, wpb=64778, bsz=128, num_updates=10660, lr=9.99227e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=122352
2021-06-20 04:38:09 | INFO | train_inner | epoch 004: 1718 / 3002 loss=2.636, ppl=6.22, wps=5924.1, ups=0.09, wpb=64817, bsz=128, num_updates=10661, lr=9.99227e-05, gnorm=2.069, loss_scale=8, train_wall=10, gb_free=2.8, wall=122363
2021-06-20 04:38:20 | INFO | train_inner | epoch 004: 1719 / 3002 loss=2.658, ppl=6.31, wps=5889, ups=0.09, wpb=64841, bsz=128, num_updates=10662, lr=9.99227e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=122374
2021-06-20 04:38:31 | INFO | train_inner | epoch 004: 1720 / 3002 loss=2.615, ppl=6.13, wps=5849.7, ups=0.09, wpb=64866, bsz=128, num_updates=10663, lr=9.99227e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=122385
2021-06-20 04:38:42 | INFO | train_inner | epoch 004: 1721 / 3002 loss=2.532, ppl=5.78, wps=5944.8, ups=0.09, wpb=64808, bsz=128, num_updates=10664, lr=9.99227e-05, gnorm=2.221, loss_scale=8, train_wall=10, gb_free=2.8, wall=122396
2021-06-20 04:38:53 | INFO | train_inner | epoch 004: 1722 / 3002 loss=2.689, ppl=6.45, wps=5873.7, ups=0.09, wpb=64894, bsz=128, num_updates=10665, lr=9.99227e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=122407
2021-06-20 04:39:04 | INFO | train_inner | epoch 004: 1723 / 3002 loss=2.4, ppl=5.28, wps=5870, ups=0.09, wpb=64859, bsz=128, num_updates=10666, lr=9.99227e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=122418
2021-06-20 04:39:14 | INFO | train_inner | epoch 004: 1724 / 3002 loss=2.481, ppl=5.58, wps=6037, ups=0.09, wpb=64920, bsz=128, num_updates=10667, lr=9.99227e-05, gnorm=2.091, loss_scale=8, train_wall=10, gb_free=2.8, wall=122429
2021-06-20 04:39:25 | INFO | train_inner | epoch 004: 1725 / 3002 loss=2.606, ppl=6.09, wps=5945.6, ups=0.09, wpb=64844, bsz=128, num_updates=10668, lr=9.99226e-05, gnorm=3, loss_scale=8, train_wall=10, gb_free=2.8, wall=122440
2021-06-20 04:39:37 | INFO | train_inner | epoch 004: 1726 / 3002 loss=2.527, ppl=5.76, wps=5828.1, ups=0.09, wpb=64842, bsz=128, num_updates=10669, lr=9.99226e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=122451
2021-06-20 04:39:48 | INFO | train_inner | epoch 004: 1727 / 3002 loss=2.451, ppl=5.47, wps=5847.6, ups=0.09, wpb=64805, bsz=128, num_updates=10670, lr=9.99226e-05, gnorm=2.057, loss_scale=8, train_wall=11, gb_free=2.8, wall=122462
2021-06-20 04:39:59 | INFO | train_inner | epoch 004: 1728 / 3002 loss=2.444, ppl=5.44, wps=5864.4, ups=0.09, wpb=64793, bsz=128, num_updates=10671, lr=9.99226e-05, gnorm=9.521, loss_scale=8, train_wall=11, gb_free=2.8, wall=122473
2021-06-20 04:40:10 | INFO | train_inner | epoch 004: 1729 / 3002 loss=2.616, ppl=6.13, wps=5857.6, ups=0.09, wpb=64846, bsz=128, num_updates=10672, lr=9.99226e-05, gnorm=6.097, loss_scale=8, train_wall=11, gb_free=2.8, wall=122484
2021-06-20 04:40:21 | INFO | train_inner | epoch 004: 1730 / 3002 loss=2.494, ppl=5.63, wps=5712.4, ups=0.09, wpb=64755, bsz=128, num_updates=10673, lr=9.99226e-05, gnorm=2.698, loss_scale=8, train_wall=11, gb_free=2.8, wall=122495
2021-06-20 04:40:32 | INFO | train_inner | epoch 004: 1731 / 3002 loss=2.531, ppl=5.78, wps=5760.5, ups=0.09, wpb=64842, bsz=128, num_updates=10674, lr=9.99226e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=122507
2021-06-20 04:40:44 | INFO | train_inner | epoch 004: 1732 / 3002 loss=2.499, ppl=5.65, wps=5752.2, ups=0.09, wpb=64817, bsz=128, num_updates=10675, lr=9.99226e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=122518
2021-06-20 04:40:54 | INFO | train_inner | epoch 004: 1733 / 3002 loss=2.633, ppl=6.2, wps=5996.3, ups=0.09, wpb=64866, bsz=128, num_updates=10676, lr=9.99226e-05, gnorm=2.144, loss_scale=8, train_wall=10, gb_free=2.8, wall=122529
2021-06-20 04:41:05 | INFO | train_inner | epoch 004: 1734 / 3002 loss=2.665, ppl=6.34, wps=5882.7, ups=0.09, wpb=64805, bsz=128, num_updates=10677, lr=9.99226e-05, gnorm=2.374, loss_scale=8, train_wall=11, gb_free=2.8, wall=122540
2021-06-20 04:41:16 | INFO | train_inner | epoch 004: 1735 / 3002 loss=2.609, ppl=6.1, wps=5892.7, ups=0.09, wpb=64790, bsz=128, num_updates=10678, lr=9.99226e-05, gnorm=2.206, loss_scale=8, train_wall=11, gb_free=2.8, wall=122551
2021-06-20 04:41:27 | INFO | train_inner | epoch 004: 1736 / 3002 loss=2.537, ppl=5.8, wps=5866.8, ups=0.09, wpb=64748, bsz=128, num_updates=10679, lr=9.99226e-05, gnorm=2.324, loss_scale=8, train_wall=11, gb_free=2.8, wall=122562
2021-06-20 04:41:39 | INFO | train_inner | epoch 004: 1737 / 3002 loss=2.634, ppl=6.21, wps=5809.6, ups=0.09, wpb=64802, bsz=128, num_updates=10680, lr=9.99226e-05, gnorm=2.09, loss_scale=8, train_wall=11, gb_free=2.8, wall=122573
2021-06-20 04:41:50 | INFO | train_inner | epoch 004: 1738 / 3002 loss=2.657, ppl=6.31, wps=5908.5, ups=0.09, wpb=64832, bsz=128, num_updates=10681, lr=9.99225e-05, gnorm=2.292, loss_scale=8, train_wall=11, gb_free=2.8, wall=122584
2021-06-20 04:42:01 | INFO | train_inner | epoch 004: 1739 / 3002 loss=2.562, ppl=5.9, wps=5732.9, ups=0.09, wpb=64842, bsz=128, num_updates=10682, lr=9.99225e-05, gnorm=2.312, loss_scale=8, train_wall=11, gb_free=2.8, wall=122595
2021-06-20 04:42:12 | INFO | train_inner | epoch 004: 1740 / 3002 loss=2.55, ppl=5.85, wps=5732.2, ups=0.09, wpb=64749, bsz=128, num_updates=10683, lr=9.99225e-05, gnorm=2.163, loss_scale=8, train_wall=11, gb_free=2.8, wall=122607
2021-06-20 04:42:23 | INFO | train_inner | epoch 004: 1741 / 3002 loss=2.727, ppl=6.62, wps=5881.8, ups=0.09, wpb=64789, bsz=128, num_updates=10684, lr=9.99225e-05, gnorm=2.128, loss_scale=8, train_wall=11, gb_free=2.8, wall=122618
2021-06-20 04:42:34 | INFO | train_inner | epoch 004: 1742 / 3002 loss=2.576, ppl=5.96, wps=5881.5, ups=0.09, wpb=64810, bsz=128, num_updates=10685, lr=9.99225e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=122629
2021-06-20 04:42:45 | INFO | train_inner | epoch 004: 1743 / 3002 loss=2.698, ppl=6.49, wps=5821, ups=0.09, wpb=64797, bsz=128, num_updates=10686, lr=9.99225e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=122640
2021-06-20 04:42:56 | INFO | train_inner | epoch 004: 1744 / 3002 loss=2.487, ppl=5.61, wps=5907.5, ups=0.09, wpb=64855, bsz=128, num_updates=10687, lr=9.99225e-05, gnorm=2.118, loss_scale=8, train_wall=10, gb_free=2.8, wall=122651
2021-06-20 04:43:08 | INFO | train_inner | epoch 004: 1745 / 3002 loss=2.652, ppl=6.29, wps=5699.1, ups=0.09, wpb=64853, bsz=128, num_updates=10688, lr=9.99225e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=122662
2021-06-20 04:43:19 | INFO | train_inner | epoch 004: 1746 / 3002 loss=2.642, ppl=6.24, wps=5752, ups=0.09, wpb=64832, bsz=128, num_updates=10689, lr=9.99225e-05, gnorm=2.167, loss_scale=8, train_wall=11, gb_free=2.8, wall=122673
2021-06-20 04:43:30 | INFO | train_inner | epoch 004: 1747 / 3002 loss=2.628, ppl=6.18, wps=5861.5, ups=0.09, wpb=64802, bsz=128, num_updates=10690, lr=9.99225e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=122684
2021-06-20 04:43:41 | INFO | train_inner | epoch 004: 1748 / 3002 loss=2.6, ppl=6.06, wps=5784, ups=0.09, wpb=64857, bsz=128, num_updates=10691, lr=9.99225e-05, gnorm=2.398, loss_scale=8, train_wall=11, gb_free=2.8, wall=122696
2021-06-20 04:43:52 | INFO | train_inner | epoch 004: 1749 / 3002 loss=2.521, ppl=5.74, wps=6137.5, ups=0.09, wpb=64906, bsz=128, num_updates=10692, lr=9.99225e-05, gnorm=2.322, loss_scale=8, train_wall=10, gb_free=2.8, wall=122706
2021-06-20 04:44:03 | INFO | train_inner | epoch 004: 1750 / 3002 loss=2.683, ppl=6.42, wps=5805, ups=0.09, wpb=64827, bsz=128, num_updates=10693, lr=9.99224e-05, gnorm=2.146, loss_scale=8, train_wall=11, gb_free=2.8, wall=122717
2021-06-20 04:44:14 | INFO | train_inner | epoch 004: 1751 / 3002 loss=2.663, ppl=6.33, wps=5801, ups=0.09, wpb=64820, bsz=128, num_updates=10694, lr=9.99224e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=122728
2021-06-20 04:44:25 | INFO | train_inner | epoch 004: 1752 / 3002 loss=2.631, ppl=6.19, wps=5830.4, ups=0.09, wpb=64771, bsz=128, num_updates=10695, lr=9.99224e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=122740
2021-06-20 04:44:36 | INFO | train_inner | epoch 004: 1753 / 3002 loss=2.446, ppl=5.45, wps=5828.5, ups=0.09, wpb=64814, bsz=128, num_updates=10696, lr=9.99224e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=122751
2021-06-20 04:44:47 | INFO | train_inner | epoch 004: 1754 / 3002 loss=2.588, ppl=6.01, wps=5899.5, ups=0.09, wpb=64871, bsz=128, num_updates=10697, lr=9.99224e-05, gnorm=2.118, loss_scale=8, train_wall=11, gb_free=2.8, wall=122762
2021-06-20 04:44:59 | INFO | train_inner | epoch 004: 1755 / 3002 loss=2.397, ppl=5.27, wps=5818.3, ups=0.09, wpb=64879, bsz=128, num_updates=10698, lr=9.99224e-05, gnorm=3.268, loss_scale=8, train_wall=11, gb_free=2.8, wall=122773
2021-06-20 04:45:10 | INFO | train_inner | epoch 004: 1756 / 3002 loss=2.569, ppl=5.93, wps=5817.6, ups=0.09, wpb=64879, bsz=128, num_updates=10699, lr=9.99224e-05, gnorm=2.012, loss_scale=8, train_wall=11, gb_free=2.8, wall=122784
2021-06-20 04:45:21 | INFO | train_inner | epoch 004: 1757 / 3002 loss=2.559, ppl=5.89, wps=5895.2, ups=0.09, wpb=64846, bsz=128, num_updates=10700, lr=9.99224e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=122795
2021-06-20 04:45:32 | INFO | train_inner | epoch 004: 1758 / 3002 loss=2.44, ppl=5.43, wps=5884.5, ups=0.09, wpb=64881, bsz=128, num_updates=10701, lr=9.99224e-05, gnorm=2.142, loss_scale=8, train_wall=11, gb_free=2.8, wall=122806
2021-06-20 04:45:43 | INFO | train_inner | epoch 004: 1759 / 3002 loss=2.515, ppl=5.72, wps=5821.1, ups=0.09, wpb=64827, bsz=128, num_updates=10702, lr=9.99224e-05, gnorm=2.11, loss_scale=8, train_wall=11, gb_free=2.8, wall=122817
2021-06-20 04:45:54 | INFO | train_inner | epoch 004: 1760 / 3002 loss=2.382, ppl=5.21, wps=5965, ups=0.09, wpb=64857, bsz=128, num_updates=10703, lr=9.99224e-05, gnorm=2.002, loss_scale=8, train_wall=10, gb_free=2.8, wall=122828
2021-06-20 04:46:05 | INFO | train_inner | epoch 004: 1761 / 3002 loss=2.651, ppl=6.28, wps=5873.6, ups=0.09, wpb=64773, bsz=128, num_updates=10704, lr=9.99224e-05, gnorm=2.135, loss_scale=8, train_wall=11, gb_free=2.8, wall=122839
2021-06-20 04:46:16 | INFO | train_inner | epoch 004: 1762 / 3002 loss=2.515, ppl=5.71, wps=5973.1, ups=0.09, wpb=64848, bsz=128, num_updates=10705, lr=9.99224e-05, gnorm=2.272, loss_scale=8, train_wall=10, gb_free=2.8, wall=122850
2021-06-20 04:46:27 | INFO | train_inner | epoch 004: 1763 / 3002 loss=2.576, ppl=5.96, wps=5910.8, ups=0.09, wpb=64887, bsz=128, num_updates=10706, lr=9.99223e-05, gnorm=2.481, loss_scale=8, train_wall=11, gb_free=2.8, wall=122861
2021-06-20 04:46:38 | INFO | train_inner | epoch 004: 1764 / 3002 loss=2.662, ppl=6.33, wps=5897.5, ups=0.09, wpb=64857, bsz=128, num_updates=10707, lr=9.99223e-05, gnorm=2.125, loss_scale=8, train_wall=11, gb_free=2.8, wall=122872
2021-06-20 04:46:49 | INFO | train_inner | epoch 004: 1765 / 3002 loss=2.616, ppl=6.13, wps=5873.7, ups=0.09, wpb=64846, bsz=128, num_updates=10708, lr=9.99223e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=122883
2021-06-20 04:47:00 | INFO | train_inner | epoch 004: 1766 / 3002 loss=2.596, ppl=6.05, wps=5960.9, ups=0.09, wpb=64872, bsz=128, num_updates=10709, lr=9.99223e-05, gnorm=2.038, loss_scale=16, train_wall=10, gb_free=2.8, wall=122894
2021-06-20 04:47:11 | INFO | train_inner | epoch 004: 1767 / 3002 loss=2.533, ppl=5.79, wps=5861.7, ups=0.09, wpb=64872, bsz=128, num_updates=10710, lr=9.99223e-05, gnorm=2.085, loss_scale=16, train_wall=11, gb_free=2.8, wall=122905
2021-06-20 04:47:22 | INFO | train_inner | epoch 004: 1768 / 3002 loss=2.615, ppl=6.12, wps=5913.7, ups=0.09, wpb=64896, bsz=128, num_updates=10711, lr=9.99223e-05, gnorm=2.121, loss_scale=16, train_wall=11, gb_free=2.8, wall=122916
2021-06-20 04:47:33 | INFO | train_inner | epoch 004: 1769 / 3002 loss=2.451, ppl=5.47, wps=5830.6, ups=0.09, wpb=64774, bsz=128, num_updates=10712, lr=9.99223e-05, gnorm=2.021, loss_scale=16, train_wall=11, gb_free=2.8, wall=122927
2021-06-20 04:47:44 | INFO | train_inner | epoch 004: 1770 / 3002 loss=2.602, ppl=6.07, wps=5755.7, ups=0.09, wpb=64811, bsz=128, num_updates=10713, lr=9.99223e-05, gnorm=2.018, loss_scale=16, train_wall=11, gb_free=2.8, wall=122938
2021-06-20 04:47:55 | INFO | train_inner | epoch 004: 1771 / 3002 loss=2.497, ppl=5.64, wps=5770.5, ups=0.09, wpb=64852, bsz=128, num_updates=10714, lr=9.99223e-05, gnorm=2.043, loss_scale=16, train_wall=11, gb_free=2.8, wall=122949
2021-06-20 04:48:06 | INFO | train_inner | epoch 004: 1772 / 3002 loss=2.71, ppl=6.54, wps=5799.3, ups=0.09, wpb=64775, bsz=128, num_updates=10715, lr=9.99223e-05, gnorm=2.278, loss_scale=16, train_wall=11, gb_free=2.8, wall=122961
2021-06-20 04:48:17 | INFO | train_inner | epoch 004: 1773 / 3002 loss=2.561, ppl=5.9, wps=5806.4, ups=0.09, wpb=64846, bsz=128, num_updates=10716, lr=9.99223e-05, gnorm=3.744, loss_scale=16, train_wall=11, gb_free=2.8, wall=122972
2021-06-20 04:48:29 | INFO | train_inner | epoch 004: 1774 / 3002 loss=2.657, ppl=6.31, wps=5745.4, ups=0.09, wpb=64802, bsz=128, num_updates=10717, lr=9.99223e-05, gnorm=2.205, loss_scale=16, train_wall=11, gb_free=2.8, wall=122983
2021-06-20 04:48:40 | INFO | train_inner | epoch 004: 1775 / 3002 loss=2.608, ppl=6.1, wps=5858.8, ups=0.09, wpb=64826, bsz=128, num_updates=10718, lr=9.99222e-05, gnorm=2.138, loss_scale=16, train_wall=11, gb_free=2.8, wall=122994
2021-06-20 04:48:51 | INFO | train_inner | epoch 004: 1776 / 3002 loss=2.564, ppl=5.91, wps=5894.1, ups=0.09, wpb=64897, bsz=128, num_updates=10719, lr=9.99222e-05, gnorm=2.078, loss_scale=16, train_wall=11, gb_free=2.8, wall=123005
2021-06-20 04:49:02 | INFO | train_inner | epoch 004: 1777 / 3002 loss=2.435, ppl=5.41, wps=5872.8, ups=0.09, wpb=64802, bsz=128, num_updates=10720, lr=9.99222e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=123016
2021-06-20 04:49:13 | INFO | train_inner | epoch 004: 1778 / 3002 loss=2.608, ppl=6.1, wps=5916.1, ups=0.09, wpb=64887, bsz=128, num_updates=10721, lr=9.99222e-05, gnorm=2.039, loss_scale=16, train_wall=11, gb_free=2.8, wall=123027
2021-06-20 04:49:24 | INFO | train_inner | epoch 004: 1779 / 3002 loss=2.489, ppl=5.61, wps=5917, ups=0.09, wpb=64814, bsz=128, num_updates=10722, lr=9.99222e-05, gnorm=1.947, loss_scale=16, train_wall=10, gb_free=2.8, wall=123038
2021-06-20 04:49:35 | INFO | train_inner | epoch 004: 1780 / 3002 loss=2.578, ppl=5.97, wps=5936.4, ups=0.09, wpb=64914, bsz=128, num_updates=10723, lr=9.99222e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=123049
2021-06-20 04:49:46 | INFO | train_inner | epoch 004: 1781 / 3002 loss=2.605, ppl=6.08, wps=5845.2, ups=0.09, wpb=64799, bsz=128, num_updates=10724, lr=9.99222e-05, gnorm=2.082, loss_scale=16, train_wall=11, gb_free=2.8, wall=123060
2021-06-20 04:49:57 | INFO | train_inner | epoch 004: 1782 / 3002 loss=2.628, ppl=6.18, wps=6016.6, ups=0.09, wpb=64787, bsz=128, num_updates=10725, lr=9.99222e-05, gnorm=2.004, loss_scale=16, train_wall=10, gb_free=2.8, wall=123071
2021-06-20 04:50:08 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 04:50:19 | INFO | train_inner | epoch 004: 1784 / 3002 loss=2.495, ppl=5.64, wps=2941.9, ups=0.05, wpb=64766, bsz=128, num_updates=10726, lr=9.99222e-05, gnorm=2.084, loss_scale=8, train_wall=21, gb_free=2.8, wall=123093
2021-06-20 04:50:30 | INFO | train_inner | epoch 004: 1785 / 3002 loss=2.479, ppl=5.57, wps=5884, ups=0.09, wpb=64842, bsz=128, num_updates=10727, lr=9.99222e-05, gnorm=2.503, loss_scale=8, train_wall=11, gb_free=2.8, wall=123104
2021-06-20 04:50:41 | INFO | train_inner | epoch 004: 1786 / 3002 loss=2.541, ppl=5.82, wps=5773.5, ups=0.09, wpb=64902, bsz=128, num_updates=10728, lr=9.99222e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=123115
2021-06-20 04:50:52 | INFO | train_inner | epoch 004: 1787 / 3002 loss=2.593, ppl=6.03, wps=5805.5, ups=0.09, wpb=64871, bsz=128, num_updates=10729, lr=9.99222e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=123126
2021-06-20 04:51:03 | INFO | train_inner | epoch 004: 1788 / 3002 loss=2.573, ppl=5.95, wps=5908.2, ups=0.09, wpb=64855, bsz=128, num_updates=10730, lr=9.99222e-05, gnorm=2.107, loss_scale=8, train_wall=11, gb_free=2.8, wall=123137
2021-06-20 04:51:14 | INFO | train_inner | epoch 004: 1789 / 3002 loss=2.524, ppl=5.75, wps=5880.3, ups=0.09, wpb=64823, bsz=128, num_updates=10731, lr=9.99221e-05, gnorm=2.118, loss_scale=8, train_wall=11, gb_free=2.8, wall=123148
2021-06-20 04:51:25 | INFO | train_inner | epoch 004: 1790 / 3002 loss=2.531, ppl=5.78, wps=5872.3, ups=0.09, wpb=64764, bsz=128, num_updates=10732, lr=9.99221e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=123159
2021-06-20 04:51:36 | INFO | train_inner | epoch 004: 1791 / 3002 loss=2.528, ppl=5.77, wps=5861, ups=0.09, wpb=64810, bsz=128, num_updates=10733, lr=9.99221e-05, gnorm=2.186, loss_scale=8, train_wall=11, gb_free=2.8, wall=123170
2021-06-20 04:51:47 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-20 04:51:58 | INFO | train_inner | epoch 004: 1793 / 3002 loss=2.55, ppl=5.86, wps=2949.6, ups=0.05, wpb=64794, bsz=128, num_updates=10734, lr=9.99221e-05, gnorm=2.061, loss_scale=4, train_wall=21, gb_free=2.8, wall=123192
2021-06-20 04:52:09 | INFO | train_inner | epoch 004: 1794 / 3002 loss=2.612, ppl=6.11, wps=5861.2, ups=0.09, wpb=64832, bsz=128, num_updates=10735, lr=9.99221e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=123203
2021-06-20 04:52:20 | INFO | train_inner | epoch 004: 1795 / 3002 loss=2.509, ppl=5.69, wps=5827.2, ups=0.09, wpb=64805, bsz=128, num_updates=10736, lr=9.99221e-05, gnorm=2.522, loss_scale=4, train_wall=11, gb_free=2.8, wall=123215
2021-06-20 04:52:32 | INFO | train_inner | epoch 004: 1796 / 3002 loss=2.482, ppl=5.59, wps=5746.2, ups=0.09, wpb=64817, bsz=128, num_updates=10737, lr=9.99221e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=123226
2021-06-20 04:52:43 | INFO | train_inner | epoch 004: 1797 / 3002 loss=2.425, ppl=5.37, wps=5801.5, ups=0.09, wpb=64841, bsz=128, num_updates=10738, lr=9.99221e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=123237
2021-06-20 04:52:54 | INFO | train_inner | epoch 004: 1798 / 3002 loss=2.643, ppl=6.25, wps=5868.7, ups=0.09, wpb=64755, bsz=128, num_updates=10739, lr=9.99221e-05, gnorm=5.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=123248
2021-06-20 04:53:05 | INFO | train_inner | epoch 004: 1799 / 3002 loss=2.568, ppl=5.93, wps=5828.8, ups=0.09, wpb=64735, bsz=128, num_updates=10740, lr=9.99221e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=123259
2021-06-20 04:53:16 | INFO | train_inner | epoch 004: 1800 / 3002 loss=2.547, ppl=5.84, wps=5796.8, ups=0.09, wpb=64839, bsz=128, num_updates=10741, lr=9.99221e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=123270
2021-06-20 04:53:27 | INFO | train_inner | epoch 004: 1801 / 3002 loss=2.542, ppl=5.82, wps=5800.3, ups=0.09, wpb=64912, bsz=128, num_updates=10742, lr=9.99221e-05, gnorm=2.535, loss_scale=4, train_wall=11, gb_free=2.8, wall=123282
2021-06-20 04:53:38 | INFO | train_inner | epoch 004: 1802 / 3002 loss=2.565, ppl=5.92, wps=5862.3, ups=0.09, wpb=64773, bsz=128, num_updates=10743, lr=9.9922e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=123293
2021-06-20 04:53:49 | INFO | train_inner | epoch 004: 1803 / 3002 loss=2.429, ppl=5.38, wps=5827.5, ups=0.09, wpb=64775, bsz=128, num_updates=10744, lr=9.9922e-05, gnorm=2.102, loss_scale=4, train_wall=11, gb_free=2.8, wall=123304
2021-06-20 04:54:00 | INFO | train_inner | epoch 004: 1804 / 3002 loss=2.607, ppl=6.09, wps=5864, ups=0.09, wpb=64822, bsz=128, num_updates=10745, lr=9.9922e-05, gnorm=2.462, loss_scale=4, train_wall=11, gb_free=2.8, wall=123315
2021-06-20 04:54:12 | INFO | train_inner | epoch 004: 1805 / 3002 loss=2.499, ppl=5.65, wps=5790.3, ups=0.09, wpb=64781, bsz=128, num_updates=10746, lr=9.9922e-05, gnorm=2.065, loss_scale=4, train_wall=11, gb_free=2.8, wall=123326
2021-06-20 04:54:23 | INFO | train_inner | epoch 004: 1806 / 3002 loss=2.513, ppl=5.71, wps=5788, ups=0.09, wpb=64828, bsz=128, num_updates=10747, lr=9.9922e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=123337
2021-06-20 04:54:34 | INFO | train_inner | epoch 004: 1807 / 3002 loss=2.614, ppl=6.12, wps=5920.6, ups=0.09, wpb=64865, bsz=128, num_updates=10748, lr=9.9922e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=123348
2021-06-20 04:54:45 | INFO | train_inner | epoch 004: 1808 / 3002 loss=2.529, ppl=5.77, wps=5870, ups=0.09, wpb=64882, bsz=128, num_updates=10749, lr=9.9922e-05, gnorm=2.103, loss_scale=4, train_wall=11, gb_free=2.8, wall=123359
2021-06-20 04:54:56 | INFO | train_inner | epoch 004: 1809 / 3002 loss=2.463, ppl=5.51, wps=5925.9, ups=0.09, wpb=64881, bsz=128, num_updates=10750, lr=9.9922e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=123370
2021-06-20 04:55:07 | INFO | train_inner | epoch 004: 1810 / 3002 loss=2.739, ppl=6.68, wps=5884.3, ups=0.09, wpb=64797, bsz=128, num_updates=10751, lr=9.9922e-05, gnorm=2.052, loss_scale=4, train_wall=11, gb_free=2.8, wall=123381
2021-06-20 04:55:18 | INFO | train_inner | epoch 004: 1811 / 3002 loss=2.569, ppl=5.93, wps=5832.4, ups=0.09, wpb=64827, bsz=128, num_updates=10752, lr=9.9922e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=123392
2021-06-20 04:55:29 | INFO | train_inner | epoch 004: 1812 / 3002 loss=2.577, ppl=5.97, wps=5771.7, ups=0.09, wpb=64836, bsz=128, num_updates=10753, lr=9.9922e-05, gnorm=8.358, loss_scale=4, train_wall=11, gb_free=2.8, wall=123404
2021-06-20 04:55:40 | INFO | train_inner | epoch 004: 1813 / 3002 loss=2.657, ppl=6.31, wps=5864.7, ups=0.09, wpb=64881, bsz=128, num_updates=10754, lr=9.9922e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=123415
2021-06-20 04:55:51 | INFO | train_inner | epoch 004: 1814 / 3002 loss=2.661, ppl=6.33, wps=5884.2, ups=0.09, wpb=64859, bsz=128, num_updates=10755, lr=9.9922e-05, gnorm=2.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=123426
2021-06-20 04:56:03 | INFO | train_inner | epoch 004: 1815 / 3002 loss=2.677, ppl=6.4, wps=5732.1, ups=0.09, wpb=64795, bsz=128, num_updates=10756, lr=9.99219e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=123437
2021-06-20 04:56:14 | INFO | train_inner | epoch 004: 1816 / 3002 loss=2.607, ppl=6.09, wps=5832.2, ups=0.09, wpb=64789, bsz=128, num_updates=10757, lr=9.99219e-05, gnorm=2.141, loss_scale=4, train_wall=11, gb_free=2.8, wall=123448
2021-06-20 04:56:25 | INFO | train_inner | epoch 004: 1817 / 3002 loss=2.718, ppl=6.58, wps=5825.9, ups=0.09, wpb=64830, bsz=128, num_updates=10758, lr=9.99219e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=123459
2021-06-20 04:56:36 | INFO | train_inner | epoch 004: 1818 / 3002 loss=2.691, ppl=6.46, wps=5815.8, ups=0.09, wpb=64851, bsz=128, num_updates=10759, lr=9.99219e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=123470
2021-06-20 04:56:47 | INFO | train_inner | epoch 004: 1819 / 3002 loss=2.469, ppl=5.53, wps=5820.1, ups=0.09, wpb=64822, bsz=128, num_updates=10760, lr=9.99219e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=123481
2021-06-20 04:56:58 | INFO | train_inner | epoch 004: 1820 / 3002 loss=2.463, ppl=5.51, wps=5897.1, ups=0.09, wpb=64867, bsz=128, num_updates=10761, lr=9.99219e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=123492
2021-06-20 04:57:09 | INFO | train_inner | epoch 004: 1821 / 3002 loss=2.42, ppl=5.35, wps=5900.2, ups=0.09, wpb=64787, bsz=128, num_updates=10762, lr=9.99219e-05, gnorm=2.008, loss_scale=4, train_wall=10, gb_free=2.8, wall=123503
2021-06-20 04:57:20 | INFO | train_inner | epoch 004: 1822 / 3002 loss=2.529, ppl=5.77, wps=5853.3, ups=0.09, wpb=64832, bsz=128, num_updates=10763, lr=9.99219e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=123514
2021-06-20 04:57:31 | INFO | train_inner | epoch 004: 1823 / 3002 loss=2.442, ppl=5.43, wps=5765.5, ups=0.09, wpb=64826, bsz=128, num_updates=10764, lr=9.99219e-05, gnorm=2.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=123526
2021-06-20 04:57:43 | INFO | train_inner | epoch 004: 1824 / 3002 loss=2.719, ppl=6.58, wps=5763, ups=0.09, wpb=64845, bsz=128, num_updates=10765, lr=9.99219e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=123537
2021-06-20 04:57:54 | INFO | train_inner | epoch 004: 1825 / 3002 loss=2.511, ppl=5.7, wps=5942.6, ups=0.09, wpb=64884, bsz=128, num_updates=10766, lr=9.99219e-05, gnorm=2.087, loss_scale=4, train_wall=10, gb_free=2.8, wall=123548
2021-06-20 04:58:05 | INFO | train_inner | epoch 004: 1826 / 3002 loss=2.551, ppl=5.86, wps=5728.7, ups=0.09, wpb=64824, bsz=128, num_updates=10767, lr=9.99219e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=123559
2021-06-20 04:58:16 | INFO | train_inner | epoch 004: 1827 / 3002 loss=2.565, ppl=5.92, wps=5876.6, ups=0.09, wpb=64819, bsz=128, num_updates=10768, lr=9.99218e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=123570
2021-06-20 04:58:27 | INFO | train_inner | epoch 004: 1828 / 3002 loss=2.478, ppl=5.57, wps=5887.6, ups=0.09, wpb=64782, bsz=128, num_updates=10769, lr=9.99218e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=123581
2021-06-20 04:58:38 | INFO | train_inner | epoch 004: 1829 / 3002 loss=2.527, ppl=5.76, wps=5795.4, ups=0.09, wpb=64895, bsz=128, num_updates=10770, lr=9.99218e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=123592
2021-06-20 04:58:49 | INFO | train_inner | epoch 004: 1830 / 3002 loss=2.543, ppl=5.83, wps=5848.1, ups=0.09, wpb=64807, bsz=128, num_updates=10771, lr=9.99218e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=123604
2021-06-20 04:59:00 | INFO | train_inner | epoch 004: 1831 / 3002 loss=2.587, ppl=6.01, wps=5873.4, ups=0.09, wpb=64892, bsz=128, num_updates=10772, lr=9.99218e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=123615
2021-06-20 04:59:11 | INFO | train_inner | epoch 004: 1832 / 3002 loss=2.812, ppl=7.02, wps=5759.5, ups=0.09, wpb=64756, bsz=128, num_updates=10773, lr=9.99218e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=123626
2021-06-20 04:59:22 | INFO | train_inner | epoch 004: 1833 / 3002 loss=2.481, ppl=5.58, wps=5894.5, ups=0.09, wpb=64795, bsz=128, num_updates=10774, lr=9.99218e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=123637
2021-06-20 04:59:34 | INFO | train_inner | epoch 004: 1834 / 3002 loss=2.594, ppl=6.04, wps=5781.9, ups=0.09, wpb=64904, bsz=128, num_updates=10775, lr=9.99218e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=123648
2021-06-20 04:59:45 | INFO | train_inner | epoch 004: 1835 / 3002 loss=2.546, ppl=5.84, wps=5801.4, ups=0.09, wpb=64779, bsz=128, num_updates=10776, lr=9.99218e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=123659
2021-06-20 04:59:56 | INFO | train_inner | epoch 004: 1836 / 3002 loss=2.565, ppl=5.92, wps=5834.4, ups=0.09, wpb=64783, bsz=128, num_updates=10777, lr=9.99218e-05, gnorm=2.084, loss_scale=4, train_wall=11, gb_free=2.8, wall=123670
2021-06-20 05:00:07 | INFO | train_inner | epoch 004: 1837 / 3002 loss=2.565, ppl=5.92, wps=6062.8, ups=0.09, wpb=64818, bsz=128, num_updates=10778, lr=9.99218e-05, gnorm=1.939, loss_scale=4, train_wall=10, gb_free=2.8, wall=123681
2021-06-20 05:00:18 | INFO | train_inner | epoch 004: 1838 / 3002 loss=2.457, ppl=5.49, wps=5963.4, ups=0.09, wpb=64914, bsz=128, num_updates=10779, lr=9.99218e-05, gnorm=1.989, loss_scale=4, train_wall=10, gb_free=2.8, wall=123692
2021-06-20 05:00:29 | INFO | train_inner | epoch 004: 1839 / 3002 loss=2.446, ppl=5.45, wps=5834.6, ups=0.09, wpb=64899, bsz=128, num_updates=10780, lr=9.99218e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=123703
2021-06-20 05:00:39 | INFO | train_inner | epoch 004: 1840 / 3002 loss=2.65, ppl=6.28, wps=6009.2, ups=0.09, wpb=64746, bsz=128, num_updates=10781, lr=9.99217e-05, gnorm=2.009, loss_scale=4, train_wall=10, gb_free=2.8, wall=123714
2021-06-20 05:00:51 | INFO | train_inner | epoch 004: 1841 / 3002 loss=2.474, ppl=5.56, wps=5768.3, ups=0.09, wpb=64853, bsz=128, num_updates=10782, lr=9.99217e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=123725
2021-06-20 05:01:02 | INFO | train_inner | epoch 004: 1842 / 3002 loss=2.622, ppl=6.15, wps=5855.9, ups=0.09, wpb=64821, bsz=128, num_updates=10783, lr=9.99217e-05, gnorm=2.076, loss_scale=4, train_wall=11, gb_free=2.8, wall=123736
2021-06-20 05:01:13 | INFO | train_inner | epoch 004: 1843 / 3002 loss=2.523, ppl=5.75, wps=5971.4, ups=0.09, wpb=64825, bsz=128, num_updates=10784, lr=9.99217e-05, gnorm=2.066, loss_scale=4, train_wall=10, gb_free=2.8, wall=123747
2021-06-20 05:01:24 | INFO | train_inner | epoch 004: 1844 / 3002 loss=2.555, ppl=5.88, wps=5725.5, ups=0.09, wpb=64758, bsz=128, num_updates=10785, lr=9.99217e-05, gnorm=2.094, loss_scale=4, train_wall=11, gb_free=2.8, wall=123758
2021-06-20 05:01:35 | INFO | train_inner | epoch 004: 1845 / 3002 loss=2.519, ppl=5.73, wps=5921.3, ups=0.09, wpb=64879, bsz=128, num_updates=10786, lr=9.99217e-05, gnorm=1.951, loss_scale=4, train_wall=11, gb_free=2.8, wall=123769
2021-06-20 05:01:46 | INFO | train_inner | epoch 004: 1846 / 3002 loss=2.672, ppl=6.37, wps=5777.4, ups=0.09, wpb=64823, bsz=128, num_updates=10787, lr=9.99217e-05, gnorm=1.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=123780
2021-06-20 05:01:57 | INFO | train_inner | epoch 004: 1847 / 3002 loss=2.538, ppl=5.81, wps=5794.3, ups=0.09, wpb=64899, bsz=128, num_updates=10788, lr=9.99217e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=123792
2021-06-20 05:02:08 | INFO | train_inner | epoch 004: 1848 / 3002 loss=2.507, ppl=5.68, wps=5883.7, ups=0.09, wpb=64838, bsz=128, num_updates=10789, lr=9.99217e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=123803
2021-06-20 05:02:19 | INFO | train_inner | epoch 004: 1849 / 3002 loss=2.505, ppl=5.67, wps=5866.3, ups=0.09, wpb=64883, bsz=128, num_updates=10790, lr=9.99217e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=123814
2021-06-20 05:02:30 | INFO | train_inner | epoch 004: 1850 / 3002 loss=2.701, ppl=6.5, wps=5881.7, ups=0.09, wpb=64835, bsz=128, num_updates=10791, lr=9.99217e-05, gnorm=2.193, loss_scale=4, train_wall=11, gb_free=2.8, wall=123825
2021-06-20 05:02:41 | INFO | train_inner | epoch 004: 1851 / 3002 loss=2.524, ppl=5.75, wps=5859, ups=0.09, wpb=64795, bsz=128, num_updates=10792, lr=9.99217e-05, gnorm=2.195, loss_scale=4, train_wall=11, gb_free=2.8, wall=123836
2021-06-20 05:02:53 | INFO | train_inner | epoch 004: 1852 / 3002 loss=2.649, ppl=6.27, wps=5881.6, ups=0.09, wpb=64852, bsz=128, num_updates=10793, lr=9.99216e-05, gnorm=2.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=123847
2021-06-20 05:03:03 | INFO | train_inner | epoch 004: 1853 / 3002 loss=2.598, ppl=6.05, wps=5967, ups=0.09, wpb=64809, bsz=128, num_updates=10794, lr=9.99216e-05, gnorm=2.39, loss_scale=4, train_wall=10, gb_free=2.8, wall=123858
2021-06-20 05:03:14 | INFO | train_inner | epoch 004: 1854 / 3002 loss=2.675, ppl=6.39, wps=5969, ups=0.09, wpb=64767, bsz=128, num_updates=10795, lr=9.99216e-05, gnorm=1.999, loss_scale=4, train_wall=10, gb_free=2.8, wall=123869
2021-06-20 05:03:25 | INFO | train_inner | epoch 004: 1855 / 3002 loss=2.492, ppl=5.62, wps=5878.9, ups=0.09, wpb=64913, bsz=128, num_updates=10796, lr=9.99216e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=123880
2021-06-20 05:03:36 | INFO | train_inner | epoch 004: 1856 / 3002 loss=2.574, ppl=5.95, wps=5962.5, ups=0.09, wpb=64913, bsz=128, num_updates=10797, lr=9.99216e-05, gnorm=2.032, loss_scale=4, train_wall=10, gb_free=2.8, wall=123890
2021-06-20 05:03:47 | INFO | train_inner | epoch 004: 1857 / 3002 loss=2.622, ppl=6.16, wps=5830.1, ups=0.09, wpb=64859, bsz=128, num_updates=10798, lr=9.99216e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=123902
2021-06-20 05:03:58 | INFO | train_inner | epoch 004: 1858 / 3002 loss=2.431, ppl=5.39, wps=5902.3, ups=0.09, wpb=64884, bsz=128, num_updates=10799, lr=9.99216e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=123913
2021-06-20 05:04:09 | INFO | train_inner | epoch 004: 1859 / 3002 loss=2.486, ppl=5.6, wps=5807.9, ups=0.09, wpb=64907, bsz=128, num_updates=10800, lr=9.99216e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=123924
2021-06-20 05:04:20 | INFO | train_inner | epoch 004: 1860 / 3002 loss=2.7, ppl=6.5, wps=5907.1, ups=0.09, wpb=64882, bsz=128, num_updates=10801, lr=9.99216e-05, gnorm=2.173, loss_scale=4, train_wall=11, gb_free=2.8, wall=123935
2021-06-20 05:04:31 | INFO | train_inner | epoch 004: 1861 / 3002 loss=2.475, ppl=5.56, wps=5901.3, ups=0.09, wpb=64837, bsz=128, num_updates=10802, lr=9.99216e-05, gnorm=2.119, loss_scale=4, train_wall=11, gb_free=2.8, wall=123946
2021-06-20 05:04:42 | INFO | train_inner | epoch 004: 1862 / 3002 loss=2.563, ppl=5.91, wps=5985.5, ups=0.09, wpb=64888, bsz=128, num_updates=10803, lr=9.99216e-05, gnorm=2.133, loss_scale=4, train_wall=10, gb_free=2.8, wall=123957
2021-06-20 05:04:53 | INFO | train_inner | epoch 004: 1863 / 3002 loss=2.488, ppl=5.61, wps=5801.7, ups=0.09, wpb=64749, bsz=128, num_updates=10804, lr=9.99216e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=123968
2021-06-20 05:05:04 | INFO | train_inner | epoch 004: 1864 / 3002 loss=2.603, ppl=6.07, wps=5922.3, ups=0.09, wpb=64810, bsz=128, num_updates=10805, lr=9.99216e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=123979
2021-06-20 05:05:15 | INFO | train_inner | epoch 004: 1865 / 3002 loss=2.681, ppl=6.41, wps=5902.7, ups=0.09, wpb=64834, bsz=128, num_updates=10806, lr=9.99215e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=123990
2021-06-20 05:05:26 | INFO | train_inner | epoch 004: 1866 / 3002 loss=2.602, ppl=6.07, wps=5916.6, ups=0.09, wpb=64816, bsz=128, num_updates=10807, lr=9.99215e-05, gnorm=2.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=124001
2021-06-20 05:05:37 | INFO | train_inner | epoch 004: 1867 / 3002 loss=2.562, ppl=5.91, wps=5852.4, ups=0.09, wpb=64852, bsz=128, num_updates=10808, lr=9.99215e-05, gnorm=2.031, loss_scale=4, train_wall=11, gb_free=2.8, wall=124012
2021-06-20 05:05:48 | INFO | train_inner | epoch 004: 1868 / 3002 loss=2.538, ppl=5.81, wps=5857.2, ups=0.09, wpb=64805, bsz=128, num_updates=10809, lr=9.99215e-05, gnorm=7.37, loss_scale=4, train_wall=11, gb_free=2.8, wall=124023
2021-06-20 05:06:00 | INFO | train_inner | epoch 004: 1869 / 3002 loss=2.597, ppl=6.05, wps=5778.7, ups=0.09, wpb=64760, bsz=128, num_updates=10810, lr=9.99215e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=124034
2021-06-20 05:06:11 | INFO | train_inner | epoch 004: 1870 / 3002 loss=2.692, ppl=6.46, wps=5879, ups=0.09, wpb=64820, bsz=128, num_updates=10811, lr=9.99215e-05, gnorm=1.962, loss_scale=4, train_wall=11, gb_free=2.8, wall=124045
2021-06-20 05:06:22 | INFO | train_inner | epoch 004: 1871 / 3002 loss=2.514, ppl=5.71, wps=5903.3, ups=0.09, wpb=64752, bsz=128, num_updates=10812, lr=9.99215e-05, gnorm=2.166, loss_scale=4, train_wall=11, gb_free=2.8, wall=124056
2021-06-20 05:06:33 | INFO | train_inner | epoch 004: 1872 / 3002 loss=2.623, ppl=6.16, wps=5943.4, ups=0.09, wpb=64901, bsz=128, num_updates=10813, lr=9.99215e-05, gnorm=2.139, loss_scale=4, train_wall=10, gb_free=2.8, wall=124067
2021-06-20 05:06:44 | INFO | train_inner | epoch 004: 1873 / 3002 loss=2.497, ppl=5.64, wps=5800.9, ups=0.09, wpb=64750, bsz=128, num_updates=10814, lr=9.99215e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=124078
2021-06-20 05:06:55 | INFO | train_inner | epoch 004: 1874 / 3002 loss=2.515, ppl=5.72, wps=5843.6, ups=0.09, wpb=64858, bsz=128, num_updates=10815, lr=9.99215e-05, gnorm=1.959, loss_scale=4, train_wall=11, gb_free=2.8, wall=124089
2021-06-20 05:07:06 | INFO | train_inner | epoch 004: 1875 / 3002 loss=2.488, ppl=5.61, wps=5821.9, ups=0.09, wpb=64767, bsz=128, num_updates=10816, lr=9.99215e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=124100
2021-06-20 05:07:17 | INFO | train_inner | epoch 004: 1876 / 3002 loss=2.624, ppl=6.16, wps=5867.9, ups=0.09, wpb=64807, bsz=128, num_updates=10817, lr=9.99215e-05, gnorm=2.212, loss_scale=4, train_wall=11, gb_free=2.8, wall=124111
2021-06-20 05:07:28 | INFO | train_inner | epoch 004: 1877 / 3002 loss=2.435, ppl=5.41, wps=5975.9, ups=0.09, wpb=64872, bsz=128, num_updates=10818, lr=9.99214e-05, gnorm=2.053, loss_scale=4, train_wall=10, gb_free=2.8, wall=124122
2021-06-20 05:07:39 | INFO | train_inner | epoch 004: 1878 / 3002 loss=2.709, ppl=6.54, wps=5994.3, ups=0.09, wpb=64855, bsz=128, num_updates=10819, lr=9.99214e-05, gnorm=2.109, loss_scale=4, train_wall=10, gb_free=2.8, wall=124133
2021-06-20 05:07:50 | INFO | train_inner | epoch 004: 1879 / 3002 loss=2.545, ppl=5.84, wps=5856.1, ups=0.09, wpb=64842, bsz=128, num_updates=10820, lr=9.99214e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=124144
2021-06-20 05:08:01 | INFO | train_inner | epoch 004: 1880 / 3002 loss=2.541, ppl=5.82, wps=5882.5, ups=0.09, wpb=64740, bsz=128, num_updates=10821, lr=9.99214e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=124155
2021-06-20 05:08:12 | INFO | train_inner | epoch 004: 1881 / 3002 loss=2.603, ppl=6.08, wps=5706.3, ups=0.09, wpb=64840, bsz=128, num_updates=10822, lr=9.99214e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=124166
2021-06-20 05:08:23 | INFO | train_inner | epoch 004: 1882 / 3002 loss=2.689, ppl=6.45, wps=5817.1, ups=0.09, wpb=64865, bsz=128, num_updates=10823, lr=9.99214e-05, gnorm=2.127, loss_scale=4, train_wall=11, gb_free=2.8, wall=124178
2021-06-20 05:08:34 | INFO | train_inner | epoch 004: 1883 / 3002 loss=2.558, ppl=5.89, wps=5946, ups=0.09, wpb=64932, bsz=128, num_updates=10824, lr=9.99214e-05, gnorm=2.054, loss_scale=4, train_wall=10, gb_free=2.8, wall=124189
2021-06-20 05:08:45 | INFO | train_inner | epoch 004: 1884 / 3002 loss=2.562, ppl=5.9, wps=5869.9, ups=0.09, wpb=64855, bsz=128, num_updates=10825, lr=9.99214e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=124200
2021-06-20 05:08:56 | INFO | train_inner | epoch 004: 1885 / 3002 loss=2.574, ppl=5.95, wps=5931.1, ups=0.09, wpb=64860, bsz=128, num_updates=10826, lr=9.99214e-05, gnorm=2.147, loss_scale=4, train_wall=10, gb_free=2.8, wall=124211
2021-06-20 05:09:07 | INFO | train_inner | epoch 004: 1886 / 3002 loss=2.56, ppl=5.9, wps=5886.4, ups=0.09, wpb=64871, bsz=128, num_updates=10827, lr=9.99214e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=124222
2021-06-20 05:09:18 | INFO | train_inner | epoch 004: 1887 / 3002 loss=2.671, ppl=6.37, wps=5903.3, ups=0.09, wpb=64892, bsz=128, num_updates=10828, lr=9.99214e-05, gnorm=2.143, loss_scale=4, train_wall=11, gb_free=2.8, wall=124233
2021-06-20 05:09:29 | INFO | train_inner | epoch 004: 1888 / 3002 loss=2.652, ppl=6.29, wps=5926.8, ups=0.09, wpb=64917, bsz=128, num_updates=10829, lr=9.99214e-05, gnorm=2.21, loss_scale=4, train_wall=10, gb_free=2.8, wall=124243
2021-06-20 05:09:40 | INFO | train_inner | epoch 004: 1889 / 3002 loss=2.527, ppl=5.76, wps=5872.2, ups=0.09, wpb=64815, bsz=128, num_updates=10830, lr=9.99214e-05, gnorm=2.484, loss_scale=4, train_wall=11, gb_free=2.8, wall=124255
2021-06-20 05:09:51 | INFO | train_inner | epoch 004: 1890 / 3002 loss=2.537, ppl=5.81, wps=5841.3, ups=0.09, wpb=64757, bsz=128, num_updates=10831, lr=9.99213e-05, gnorm=2.135, loss_scale=4, train_wall=11, gb_free=2.8, wall=124266
2021-06-20 05:10:02 | INFO | train_inner | epoch 004: 1891 / 3002 loss=2.658, ppl=6.31, wps=5912.2, ups=0.09, wpb=64889, bsz=128, num_updates=10832, lr=9.99213e-05, gnorm=2.053, loss_scale=4, train_wall=11, gb_free=2.8, wall=124277
2021-06-20 05:10:13 | INFO | train_inner | epoch 004: 1892 / 3002 loss=2.653, ppl=6.29, wps=5785.1, ups=0.09, wpb=64848, bsz=128, num_updates=10833, lr=9.99213e-05, gnorm=2.815, loss_scale=4, train_wall=11, gb_free=2.8, wall=124288
2021-06-20 05:10:24 | INFO | train_inner | epoch 004: 1893 / 3002 loss=2.727, ppl=6.62, wps=5941.4, ups=0.09, wpb=64865, bsz=128, num_updates=10834, lr=9.99213e-05, gnorm=3.259, loss_scale=4, train_wall=10, gb_free=2.8, wall=124299
2021-06-20 05:10:35 | INFO | train_inner | epoch 004: 1894 / 3002 loss=2.563, ppl=5.91, wps=5841.6, ups=0.09, wpb=64763, bsz=128, num_updates=10835, lr=9.99213e-05, gnorm=2.869, loss_scale=4, train_wall=11, gb_free=2.8, wall=124310
2021-06-20 05:10:46 | INFO | train_inner | epoch 004: 1895 / 3002 loss=2.336, ppl=5.05, wps=5896.6, ups=0.09, wpb=64859, bsz=128, num_updates=10836, lr=9.99213e-05, gnorm=2.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=124321
2021-06-20 05:10:57 | INFO | train_inner | epoch 004: 1896 / 3002 loss=2.474, ppl=5.56, wps=5977.9, ups=0.09, wpb=64854, bsz=128, num_updates=10837, lr=9.99213e-05, gnorm=5.839, loss_scale=4, train_wall=10, gb_free=2.8, wall=124332
2021-06-20 05:11:08 | INFO | train_inner | epoch 004: 1897 / 3002 loss=2.358, ppl=5.13, wps=5855.1, ups=0.09, wpb=64846, bsz=128, num_updates=10838, lr=9.99213e-05, gnorm=2.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=124343
2021-06-20 05:11:19 | INFO | train_inner | epoch 004: 1898 / 3002 loss=2.427, ppl=5.38, wps=5850.1, ups=0.09, wpb=64873, bsz=128, num_updates=10839, lr=9.99213e-05, gnorm=5.15, loss_scale=4, train_wall=11, gb_free=2.8, wall=124354
2021-06-20 05:11:31 | INFO | train_inner | epoch 004: 1899 / 3002 loss=2.525, ppl=5.76, wps=5844.8, ups=0.09, wpb=64888, bsz=128, num_updates=10840, lr=9.99213e-05, gnorm=2.088, loss_scale=4, train_wall=11, gb_free=2.8, wall=124365
2021-06-20 05:11:41 | INFO | train_inner | epoch 004: 1900 / 3002 loss=2.688, ppl=6.44, wps=5992, ups=0.09, wpb=64769, bsz=128, num_updates=10841, lr=9.99213e-05, gnorm=1.99, loss_scale=4, train_wall=10, gb_free=2.8, wall=124376
2021-06-20 05:11:52 | INFO | train_inner | epoch 004: 1901 / 3002 loss=2.515, ppl=5.72, wps=5856.8, ups=0.09, wpb=64842, bsz=128, num_updates=10842, lr=9.99213e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=124387
2021-06-20 05:12:04 | INFO | train_inner | epoch 004: 1902 / 3002 loss=2.474, ppl=5.55, wps=5797, ups=0.09, wpb=64821, bsz=128, num_updates=10843, lr=9.99212e-05, gnorm=2.416, loss_scale=4, train_wall=11, gb_free=2.8, wall=124398
2021-06-20 05:12:15 | INFO | train_inner | epoch 004: 1903 / 3002 loss=2.397, ppl=5.27, wps=5839.5, ups=0.09, wpb=64853, bsz=128, num_updates=10844, lr=9.99212e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=124409
2021-06-20 05:12:26 | INFO | train_inner | epoch 004: 1904 / 3002 loss=2.548, ppl=5.85, wps=5732.9, ups=0.09, wpb=64814, bsz=128, num_updates=10845, lr=9.99212e-05, gnorm=2.151, loss_scale=4, train_wall=11, gb_free=2.8, wall=124420
2021-06-20 05:12:37 | INFO | train_inner | epoch 004: 1905 / 3002 loss=2.442, ppl=5.43, wps=5835.8, ups=0.09, wpb=64855, bsz=128, num_updates=10846, lr=9.99212e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=124431
2021-06-20 05:12:48 | INFO | train_inner | epoch 004: 1906 / 3002 loss=2.66, ppl=6.32, wps=5811.8, ups=0.09, wpb=64870, bsz=128, num_updates=10847, lr=9.99212e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=124443
2021-06-20 05:12:59 | INFO | train_inner | epoch 004: 1907 / 3002 loss=2.499, ppl=5.65, wps=5897.9, ups=0.09, wpb=64872, bsz=128, num_updates=10848, lr=9.99212e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=124454
2021-06-20 05:13:10 | INFO | train_inner | epoch 004: 1908 / 3002 loss=2.596, ppl=6.04, wps=5959.5, ups=0.09, wpb=64901, bsz=128, num_updates=10849, lr=9.99212e-05, gnorm=2.173, loss_scale=4, train_wall=10, gb_free=2.8, wall=124465
2021-06-20 05:13:21 | INFO | train_inner | epoch 004: 1909 / 3002 loss=2.688, ppl=6.44, wps=5917.6, ups=0.09, wpb=64853, bsz=128, num_updates=10850, lr=9.99212e-05, gnorm=2.396, loss_scale=4, train_wall=10, gb_free=2.8, wall=124476
2021-06-20 05:13:32 | INFO | train_inner | epoch 004: 1910 / 3002 loss=2.619, ppl=6.14, wps=5811.2, ups=0.09, wpb=64854, bsz=128, num_updates=10851, lr=9.99212e-05, gnorm=2.201, loss_scale=4, train_wall=11, gb_free=2.8, wall=124487
2021-06-20 05:13:43 | INFO | train_inner | epoch 004: 1911 / 3002 loss=2.461, ppl=5.51, wps=5853, ups=0.09, wpb=64775, bsz=128, num_updates=10852, lr=9.99212e-05, gnorm=2.311, loss_scale=4, train_wall=11, gb_free=2.8, wall=124498
2021-06-20 05:13:54 | INFO | train_inner | epoch 004: 1912 / 3002 loss=2.644, ppl=6.25, wps=5872.1, ups=0.09, wpb=64781, bsz=128, num_updates=10853, lr=9.99212e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=124509
2021-06-20 05:14:06 | INFO | train_inner | epoch 004: 1913 / 3002 loss=2.539, ppl=5.81, wps=5789.8, ups=0.09, wpb=64836, bsz=128, num_updates=10854, lr=9.99212e-05, gnorm=2.167, loss_scale=4, train_wall=11, gb_free=2.8, wall=124520
2021-06-20 05:14:17 | INFO | train_inner | epoch 004: 1914 / 3002 loss=2.742, ppl=6.69, wps=5797.9, ups=0.09, wpb=64829, bsz=128, num_updates=10855, lr=9.99212e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=124531
2021-06-20 05:14:28 | INFO | train_inner | epoch 004: 1915 / 3002 loss=2.576, ppl=5.96, wps=5867.1, ups=0.09, wpb=64790, bsz=128, num_updates=10856, lr=9.99211e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=124542
2021-06-20 05:14:39 | INFO | train_inner | epoch 004: 1916 / 3002 loss=2.507, ppl=5.68, wps=5925.6, ups=0.09, wpb=64863, bsz=128, num_updates=10857, lr=9.99211e-05, gnorm=1.995, loss_scale=4, train_wall=10, gb_free=2.8, wall=124553
2021-06-20 05:14:50 | INFO | train_inner | epoch 004: 1917 / 3002 loss=2.593, ppl=6.03, wps=5874.3, ups=0.09, wpb=64717, bsz=128, num_updates=10858, lr=9.99211e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=124564
2021-06-20 05:15:01 | INFO | train_inner | epoch 004: 1918 / 3002 loss=2.551, ppl=5.86, wps=5814.4, ups=0.09, wpb=64805, bsz=128, num_updates=10859, lr=9.99211e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=124575
2021-06-20 05:15:12 | INFO | train_inner | epoch 004: 1919 / 3002 loss=2.648, ppl=6.27, wps=5897.4, ups=0.09, wpb=64833, bsz=128, num_updates=10860, lr=9.99211e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=124586
2021-06-20 05:15:23 | INFO | train_inner | epoch 004: 1920 / 3002 loss=2.504, ppl=5.67, wps=5907.5, ups=0.09, wpb=64804, bsz=128, num_updates=10861, lr=9.99211e-05, gnorm=2.763, loss_scale=8, train_wall=11, gb_free=2.8, wall=124597
2021-06-20 05:15:34 | INFO | train_inner | epoch 004: 1921 / 3002 loss=2.599, ppl=6.06, wps=5871, ups=0.09, wpb=64848, bsz=128, num_updates=10862, lr=9.99211e-05, gnorm=2.063, loss_scale=8, train_wall=11, gb_free=2.8, wall=124608
2021-06-20 05:15:45 | INFO | train_inner | epoch 004: 1922 / 3002 loss=2.519, ppl=5.73, wps=5874.6, ups=0.09, wpb=64835, bsz=128, num_updates=10863, lr=9.99211e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=124619
2021-06-20 05:15:56 | INFO | train_inner | epoch 004: 1923 / 3002 loss=2.566, ppl=5.92, wps=5894.3, ups=0.09, wpb=64934, bsz=128, num_updates=10864, lr=9.99211e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=124630
2021-06-20 05:16:07 | INFO | train_inner | epoch 004: 1924 / 3002 loss=2.546, ppl=5.84, wps=5836.2, ups=0.09, wpb=64800, bsz=128, num_updates=10865, lr=9.99211e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=124641
2021-06-20 05:16:18 | INFO | train_inner | epoch 004: 1925 / 3002 loss=2.597, ppl=6.05, wps=5768.6, ups=0.09, wpb=64844, bsz=128, num_updates=10866, lr=9.99211e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=124653
2021-06-20 05:16:29 | INFO | train_inner | epoch 004: 1926 / 3002 loss=2.762, ppl=6.79, wps=5854.9, ups=0.09, wpb=64801, bsz=128, num_updates=10867, lr=9.99211e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=124664
2021-06-20 05:16:41 | INFO | train_inner | epoch 004: 1927 / 3002 loss=2.544, ppl=5.83, wps=5773.4, ups=0.09, wpb=64806, bsz=128, num_updates=10868, lr=9.9921e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=124675
2021-06-20 05:16:52 | INFO | train_inner | epoch 004: 1928 / 3002 loss=2.525, ppl=5.75, wps=5879.9, ups=0.09, wpb=64788, bsz=128, num_updates=10869, lr=9.9921e-05, gnorm=2.099, loss_scale=8, train_wall=11, gb_free=2.8, wall=124686
2021-06-20 05:17:03 | INFO | train_inner | epoch 004: 1929 / 3002 loss=2.706, ppl=6.52, wps=5855.4, ups=0.09, wpb=64839, bsz=128, num_updates=10870, lr=9.9921e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=124697
2021-06-20 05:17:14 | INFO | train_inner | epoch 004: 1930 / 3002 loss=2.599, ppl=6.06, wps=5885.8, ups=0.09, wpb=64848, bsz=128, num_updates=10871, lr=9.9921e-05, gnorm=2.106, loss_scale=8, train_wall=11, gb_free=2.8, wall=124708
2021-06-20 05:17:25 | INFO | train_inner | epoch 004: 1931 / 3002 loss=2.522, ppl=5.74, wps=5873, ups=0.09, wpb=64833, bsz=128, num_updates=10872, lr=9.9921e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=124719
2021-06-20 05:17:36 | INFO | train_inner | epoch 004: 1932 / 3002 loss=2.588, ppl=6.01, wps=5772.5, ups=0.09, wpb=64741, bsz=128, num_updates=10873, lr=9.9921e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=124730
2021-06-20 05:17:47 | INFO | train_inner | epoch 004: 1933 / 3002 loss=2.58, ppl=5.98, wps=5907.7, ups=0.09, wpb=64818, bsz=128, num_updates=10874, lr=9.9921e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=124741
2021-06-20 05:17:58 | INFO | train_inner | epoch 004: 1934 / 3002 loss=2.542, ppl=5.82, wps=5782.7, ups=0.09, wpb=64805, bsz=128, num_updates=10875, lr=9.9921e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=124753
2021-06-20 05:18:09 | INFO | train_inner | epoch 004: 1935 / 3002 loss=2.569, ppl=5.93, wps=5794.8, ups=0.09, wpb=64860, bsz=128, num_updates=10876, lr=9.9921e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=124764
2021-06-20 05:18:21 | INFO | train_inner | epoch 004: 1936 / 3002 loss=2.432, ppl=5.4, wps=5711.8, ups=0.09, wpb=64908, bsz=128, num_updates=10877, lr=9.9921e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=124775
2021-06-20 05:18:32 | INFO | train_inner | epoch 004: 1937 / 3002 loss=2.761, ppl=6.78, wps=5807.7, ups=0.09, wpb=64876, bsz=128, num_updates=10878, lr=9.9921e-05, gnorm=2.308, loss_scale=8, train_wall=11, gb_free=2.8, wall=124786
2021-06-20 05:18:43 | INFO | train_inner | epoch 004: 1938 / 3002 loss=2.577, ppl=5.97, wps=5926.3, ups=0.09, wpb=64858, bsz=128, num_updates=10879, lr=9.9921e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=124797
2021-06-20 05:18:54 | INFO | train_inner | epoch 004: 1939 / 3002 loss=2.564, ppl=5.91, wps=5871.3, ups=0.09, wpb=64718, bsz=128, num_updates=10880, lr=9.9921e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=124808
2021-06-20 05:19:05 | INFO | train_inner | epoch 004: 1940 / 3002 loss=2.778, ppl=6.86, wps=5799.5, ups=0.09, wpb=64791, bsz=128, num_updates=10881, lr=9.99209e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=124819
2021-06-20 05:19:16 | INFO | train_inner | epoch 004: 1941 / 3002 loss=2.497, ppl=5.64, wps=5767.3, ups=0.09, wpb=64834, bsz=128, num_updates=10882, lr=9.99209e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=124831
2021-06-20 05:19:28 | INFO | train_inner | epoch 004: 1942 / 3002 loss=2.48, ppl=5.58, wps=5746.9, ups=0.09, wpb=64928, bsz=128, num_updates=10883, lr=9.99209e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=124842
2021-06-20 05:19:39 | INFO | train_inner | epoch 004: 1943 / 3002 loss=2.432, ppl=5.4, wps=5885, ups=0.09, wpb=64801, bsz=128, num_updates=10884, lr=9.99209e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=124853
2021-06-20 05:19:50 | INFO | train_inner | epoch 004: 1944 / 3002 loss=2.568, ppl=5.93, wps=5940.8, ups=0.09, wpb=64799, bsz=128, num_updates=10885, lr=9.99209e-05, gnorm=2.894, loss_scale=8, train_wall=10, gb_free=2.8, wall=124864
2021-06-20 05:20:00 | INFO | train_inner | epoch 004: 1945 / 3002 loss=2.683, ppl=6.42, wps=5965, ups=0.09, wpb=64813, bsz=128, num_updates=10886, lr=9.99209e-05, gnorm=2.145, loss_scale=8, train_wall=10, gb_free=2.8, wall=124875
2021-06-20 05:20:11 | INFO | train_inner | epoch 004: 1946 / 3002 loss=2.859, ppl=7.26, wps=5853.8, ups=0.09, wpb=64886, bsz=128, num_updates=10887, lr=9.99209e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=124886
2021-06-20 05:20:23 | INFO | train_inner | epoch 004: 1947 / 3002 loss=2.582, ppl=5.99, wps=5815.2, ups=0.09, wpb=64771, bsz=128, num_updates=10888, lr=9.99209e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=124897
2021-06-20 05:20:34 | INFO | train_inner | epoch 004: 1948 / 3002 loss=2.562, ppl=5.9, wps=5796.5, ups=0.09, wpb=64848, bsz=128, num_updates=10889, lr=9.99209e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=124908
2021-06-20 05:20:45 | INFO | train_inner | epoch 004: 1949 / 3002 loss=2.519, ppl=5.73, wps=5791.1, ups=0.09, wpb=64809, bsz=128, num_updates=10890, lr=9.99209e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=124919
2021-06-20 05:20:56 | INFO | train_inner | epoch 004: 1950 / 3002 loss=2.655, ppl=6.3, wps=5863.9, ups=0.09, wpb=64832, bsz=128, num_updates=10891, lr=9.99209e-05, gnorm=2.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=124930
2021-06-20 05:21:07 | INFO | train_inner | epoch 004: 1951 / 3002 loss=2.462, ppl=5.51, wps=5740.9, ups=0.09, wpb=64858, bsz=128, num_updates=10892, lr=9.99209e-05, gnorm=2.225, loss_scale=8, train_wall=11, gb_free=2.8, wall=124942
2021-06-20 05:21:18 | INFO | train_inner | epoch 004: 1952 / 3002 loss=2.652, ppl=6.29, wps=5839.2, ups=0.09, wpb=64833, bsz=128, num_updates=10893, lr=9.99208e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=124953
2021-06-20 05:21:30 | INFO | train_inner | epoch 004: 1953 / 3002 loss=2.541, ppl=5.82, wps=5828, ups=0.09, wpb=64817, bsz=128, num_updates=10894, lr=9.99208e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=124964
2021-06-20 05:21:41 | INFO | train_inner | epoch 004: 1954 / 3002 loss=2.405, ppl=5.3, wps=5797.8, ups=0.09, wpb=64819, bsz=128, num_updates=10895, lr=9.99208e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=124975
2021-06-20 05:21:52 | INFO | train_inner | epoch 004: 1955 / 3002 loss=2.562, ppl=5.91, wps=5838.4, ups=0.09, wpb=64773, bsz=128, num_updates=10896, lr=9.99208e-05, gnorm=2.169, loss_scale=8, train_wall=11, gb_free=2.8, wall=124986
2021-06-20 05:22:03 | INFO | train_inner | epoch 004: 1956 / 3002 loss=2.464, ppl=5.52, wps=5945.8, ups=0.09, wpb=64772, bsz=128, num_updates=10897, lr=9.99208e-05, gnorm=3.661, loss_scale=8, train_wall=10, gb_free=2.8, wall=124997
2021-06-20 05:22:14 | INFO | train_inner | epoch 004: 1957 / 3002 loss=2.683, ppl=6.42, wps=5757.8, ups=0.09, wpb=64830, bsz=128, num_updates=10898, lr=9.99208e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=125008
2021-06-20 05:22:25 | INFO | train_inner | epoch 004: 1958 / 3002 loss=2.682, ppl=6.42, wps=5864.4, ups=0.09, wpb=64813, bsz=128, num_updates=10899, lr=9.99208e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=125019
2021-06-20 05:22:36 | INFO | train_inner | epoch 004: 1959 / 3002 loss=2.495, ppl=5.64, wps=5821.9, ups=0.09, wpb=64835, bsz=128, num_updates=10900, lr=9.99208e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=125031
2021-06-20 05:22:47 | INFO | train_inner | epoch 004: 1960 / 3002 loss=2.522, ppl=5.75, wps=5901.1, ups=0.09, wpb=64891, bsz=128, num_updates=10901, lr=9.99208e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=125042
2021-06-20 05:22:58 | INFO | train_inner | epoch 004: 1961 / 3002 loss=2.519, ppl=5.73, wps=5830.6, ups=0.09, wpb=64853, bsz=128, num_updates=10902, lr=9.99208e-05, gnorm=2.266, loss_scale=8, train_wall=11, gb_free=2.8, wall=125053
2021-06-20 05:23:09 | INFO | train_inner | epoch 004: 1962 / 3002 loss=2.68, ppl=6.41, wps=5937.9, ups=0.09, wpb=64835, bsz=128, num_updates=10903, lr=9.99208e-05, gnorm=2.038, loss_scale=8, train_wall=10, gb_free=2.8, wall=125064
2021-06-20 05:23:20 | INFO | train_inner | epoch 004: 1963 / 3002 loss=2.81, ppl=7.01, wps=5886, ups=0.09, wpb=64868, bsz=128, num_updates=10904, lr=9.99208e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=125075
2021-06-20 05:23:32 | INFO | train_inner | epoch 004: 1964 / 3002 loss=2.419, ppl=5.35, wps=5766, ups=0.09, wpb=64865, bsz=128, num_updates=10905, lr=9.99208e-05, gnorm=3.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=125086
2021-06-20 05:23:43 | INFO | train_inner | epoch 004: 1965 / 3002 loss=2.534, ppl=5.79, wps=5788.1, ups=0.09, wpb=64813, bsz=128, num_updates=10906, lr=9.99207e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=125097
2021-06-20 05:23:54 | INFO | train_inner | epoch 004: 1966 / 3002 loss=2.515, ppl=5.72, wps=5856.2, ups=0.09, wpb=64817, bsz=128, num_updates=10907, lr=9.99207e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=125108
2021-06-20 05:24:05 | INFO | train_inner | epoch 004: 1967 / 3002 loss=2.563, ppl=5.91, wps=5875.4, ups=0.09, wpb=64788, bsz=128, num_updates=10908, lr=9.99207e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=125119
2021-06-20 05:24:16 | INFO | train_inner | epoch 004: 1968 / 3002 loss=2.481, ppl=5.58, wps=5821, ups=0.09, wpb=64941, bsz=128, num_updates=10909, lr=9.99207e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=125130
2021-06-20 05:24:27 | INFO | train_inner | epoch 004: 1969 / 3002 loss=2.602, ppl=6.07, wps=5893.7, ups=0.09, wpb=64771, bsz=128, num_updates=10910, lr=9.99207e-05, gnorm=2.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=125141
2021-06-20 05:24:38 | INFO | train_inner | epoch 004: 1970 / 3002 loss=2.452, ppl=5.47, wps=5745.7, ups=0.09, wpb=64877, bsz=128, num_updates=10911, lr=9.99207e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=125153
2021-06-20 05:24:49 | INFO | train_inner | epoch 004: 1971 / 3002 loss=2.614, ppl=6.12, wps=5886.9, ups=0.09, wpb=64870, bsz=128, num_updates=10912, lr=9.99207e-05, gnorm=2.053, loss_scale=8, train_wall=11, gb_free=2.8, wall=125164
2021-06-20 05:25:00 | INFO | train_inner | epoch 004: 1972 / 3002 loss=2.599, ppl=6.06, wps=5785.2, ups=0.09, wpb=64814, bsz=128, num_updates=10913, lr=9.99207e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=125175
2021-06-20 05:25:11 | INFO | train_inner | epoch 004: 1973 / 3002 loss=2.55, ppl=5.86, wps=5881.7, ups=0.09, wpb=64857, bsz=128, num_updates=10914, lr=9.99207e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=125186
2021-06-20 05:25:23 | INFO | train_inner | epoch 004: 1974 / 3002 loss=2.522, ppl=5.74, wps=5817.1, ups=0.09, wpb=64727, bsz=128, num_updates=10915, lr=9.99207e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=125197
2021-06-20 05:25:34 | INFO | train_inner | epoch 004: 1975 / 3002 loss=2.56, ppl=5.9, wps=5797.1, ups=0.09, wpb=64768, bsz=128, num_updates=10916, lr=9.99207e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=125208
2021-06-20 05:25:45 | INFO | train_inner | epoch 004: 1976 / 3002 loss=2.628, ppl=6.18, wps=5886.3, ups=0.09, wpb=64816, bsz=128, num_updates=10917, lr=9.99207e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=125219
2021-06-20 05:25:56 | INFO | train_inner | epoch 004: 1977 / 3002 loss=2.551, ppl=5.86, wps=5926.6, ups=0.09, wpb=64863, bsz=128, num_updates=10918, lr=9.99206e-05, gnorm=2.125, loss_scale=8, train_wall=10, gb_free=2.8, wall=125230
2021-06-20 05:26:07 | INFO | train_inner | epoch 004: 1978 / 3002 loss=2.587, ppl=6.01, wps=5786.5, ups=0.09, wpb=64826, bsz=128, num_updates=10919, lr=9.99206e-05, gnorm=2.2, loss_scale=8, train_wall=11, gb_free=2.8, wall=125241
2021-06-20 05:26:18 | INFO | train_inner | epoch 004: 1979 / 3002 loss=2.662, ppl=6.33, wps=5895.2, ups=0.09, wpb=64830, bsz=128, num_updates=10920, lr=9.99206e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=125252
2021-06-20 05:26:29 | INFO | train_inner | epoch 004: 1980 / 3002 loss=2.614, ppl=6.12, wps=5764.9, ups=0.09, wpb=64864, bsz=128, num_updates=10921, lr=9.99206e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=125264
2021-06-20 05:26:40 | INFO | train_inner | epoch 004: 1981 / 3002 loss=2.691, ppl=6.46, wps=5971.5, ups=0.09, wpb=64932, bsz=128, num_updates=10922, lr=9.99206e-05, gnorm=2.045, loss_scale=8, train_wall=10, gb_free=2.8, wall=125274
2021-06-20 05:26:51 | INFO | train_inner | epoch 004: 1982 / 3002 loss=2.482, ppl=5.59, wps=5862.5, ups=0.09, wpb=64819, bsz=128, num_updates=10923, lr=9.99206e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=125285
2021-06-20 05:27:02 | INFO | train_inner | epoch 004: 1983 / 3002 loss=2.549, ppl=5.85, wps=5865.1, ups=0.09, wpb=64796, bsz=128, num_updates=10924, lr=9.99206e-05, gnorm=2.085, loss_scale=8, train_wall=11, gb_free=2.8, wall=125297
2021-06-20 05:27:13 | INFO | train_inner | epoch 004: 1984 / 3002 loss=2.654, ppl=6.29, wps=5877, ups=0.09, wpb=64856, bsz=128, num_updates=10925, lr=9.99206e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=125308
2021-06-20 05:27:24 | INFO | train_inner | epoch 004: 1985 / 3002 loss=2.528, ppl=5.77, wps=5876.1, ups=0.09, wpb=64895, bsz=128, num_updates=10926, lr=9.99206e-05, gnorm=2.432, loss_scale=8, train_wall=11, gb_free=2.8, wall=125319
2021-06-20 05:27:35 | INFO | train_inner | epoch 004: 1986 / 3002 loss=2.536, ppl=5.8, wps=5914.4, ups=0.09, wpb=64930, bsz=128, num_updates=10927, lr=9.99206e-05, gnorm=2.138, loss_scale=8, train_wall=10, gb_free=2.8, wall=125330
2021-06-20 05:27:46 | INFO | train_inner | epoch 004: 1987 / 3002 loss=2.658, ppl=6.31, wps=5905.3, ups=0.09, wpb=64796, bsz=128, num_updates=10928, lr=9.99206e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=125341
2021-06-20 05:27:57 | INFO | train_inner | epoch 004: 1988 / 3002 loss=2.713, ppl=6.56, wps=5804.1, ups=0.09, wpb=64841, bsz=128, num_updates=10929, lr=9.99206e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=125352
2021-06-20 05:28:09 | INFO | train_inner | epoch 004: 1989 / 3002 loss=2.638, ppl=6.23, wps=5809.6, ups=0.09, wpb=64870, bsz=128, num_updates=10930, lr=9.99206e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=125363
2021-06-20 05:28:20 | INFO | train_inner | epoch 004: 1990 / 3002 loss=2.566, ppl=5.92, wps=5718, ups=0.09, wpb=64771, bsz=128, num_updates=10931, lr=9.99205e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=125374
2021-06-20 05:28:31 | INFO | train_inner | epoch 004: 1991 / 3002 loss=2.528, ppl=5.77, wps=5832.7, ups=0.09, wpb=64844, bsz=128, num_updates=10932, lr=9.99205e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=125385
2021-06-20 05:28:42 | INFO | train_inner | epoch 004: 1992 / 3002 loss=2.56, ppl=5.9, wps=5760, ups=0.09, wpb=64808, bsz=128, num_updates=10933, lr=9.99205e-05, gnorm=2.065, loss_scale=8, train_wall=11, gb_free=2.8, wall=125397
2021-06-20 05:28:53 | INFO | train_inner | epoch 004: 1993 / 3002 loss=2.668, ppl=6.36, wps=5923.1, ups=0.09, wpb=64796, bsz=128, num_updates=10934, lr=9.99205e-05, gnorm=2.006, loss_scale=8, train_wall=10, gb_free=2.8, wall=125408
2021-06-20 05:29:04 | INFO | train_inner | epoch 004: 1994 / 3002 loss=2.708, ppl=6.53, wps=5839.7, ups=0.09, wpb=64771, bsz=128, num_updates=10935, lr=9.99205e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=125419
2021-06-20 05:29:16 | INFO | train_inner | epoch 004: 1995 / 3002 loss=2.602, ppl=6.07, wps=5766.8, ups=0.09, wpb=64852, bsz=128, num_updates=10936, lr=9.99205e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=125430
2021-06-20 05:29:27 | INFO | train_inner | epoch 004: 1996 / 3002 loss=2.553, ppl=5.87, wps=5849.2, ups=0.09, wpb=64906, bsz=128, num_updates=10937, lr=9.99205e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=125441
2021-06-20 05:29:38 | INFO | train_inner | epoch 004: 1997 / 3002 loss=2.566, ppl=5.92, wps=5861.5, ups=0.09, wpb=64784, bsz=128, num_updates=10938, lr=9.99205e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=125452
2021-06-20 05:29:49 | INFO | train_inner | epoch 004: 1998 / 3002 loss=2.554, ppl=5.87, wps=5873.3, ups=0.09, wpb=64852, bsz=128, num_updates=10939, lr=9.99205e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=125463
2021-06-20 05:30:00 | INFO | train_inner | epoch 004: 1999 / 3002 loss=2.532, ppl=5.78, wps=5867.3, ups=0.09, wpb=64761, bsz=128, num_updates=10940, lr=9.99205e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=125474
2021-06-20 05:30:11 | INFO | train_inner | epoch 004: 2000 / 3002 loss=2.688, ppl=6.44, wps=5859.6, ups=0.09, wpb=64774, bsz=128, num_updates=10941, lr=9.99205e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=125485
2021-06-20 05:30:22 | INFO | train_inner | epoch 004: 2001 / 3002 loss=2.517, ppl=5.73, wps=5903.7, ups=0.09, wpb=64787, bsz=128, num_updates=10942, lr=9.99205e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=125496
2021-06-20 05:30:33 | INFO | train_inner | epoch 004: 2002 / 3002 loss=2.599, ppl=6.06, wps=6012.2, ups=0.09, wpb=64761, bsz=128, num_updates=10943, lr=9.99204e-05, gnorm=2.041, loss_scale=8, train_wall=10, gb_free=2.8, wall=125507
2021-06-20 05:30:44 | INFO | train_inner | epoch 004: 2003 / 3002 loss=2.614, ppl=6.12, wps=5867.4, ups=0.09, wpb=64853, bsz=128, num_updates=10944, lr=9.99204e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=125518
2021-06-20 05:30:54 | INFO | train_inner | epoch 004: 2004 / 3002 loss=2.651, ppl=6.28, wps=6009.8, ups=0.09, wpb=64840, bsz=128, num_updates=10945, lr=9.99204e-05, gnorm=2.07, loss_scale=8, train_wall=10, gb_free=2.8, wall=125529
2021-06-20 05:31:06 | INFO | train_inner | epoch 004: 2005 / 3002 loss=2.481, ppl=5.58, wps=5731.7, ups=0.09, wpb=64782, bsz=128, num_updates=10946, lr=9.99204e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=125540
2021-06-20 05:31:17 | INFO | train_inner | epoch 004: 2006 / 3002 loss=2.626, ppl=6.17, wps=5782.6, ups=0.09, wpb=64822, bsz=128, num_updates=10947, lr=9.99204e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=125551
2021-06-20 05:31:28 | INFO | train_inner | epoch 004: 2007 / 3002 loss=2.544, ppl=5.83, wps=5791.5, ups=0.09, wpb=64830, bsz=128, num_updates=10948, lr=9.99204e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=125562
2021-06-20 05:31:39 | INFO | train_inner | epoch 004: 2008 / 3002 loss=2.523, ppl=5.75, wps=5930.3, ups=0.09, wpb=64843, bsz=128, num_updates=10949, lr=9.99204e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=125573
2021-06-20 05:31:50 | INFO | train_inner | epoch 004: 2009 / 3002 loss=2.57, ppl=5.94, wps=5895.7, ups=0.09, wpb=64801, bsz=128, num_updates=10950, lr=9.99204e-05, gnorm=2.138, loss_scale=8, train_wall=11, gb_free=2.8, wall=125584
2021-06-20 05:32:01 | INFO | train_inner | epoch 004: 2010 / 3002 loss=2.642, ppl=6.24, wps=5899.2, ups=0.09, wpb=64823, bsz=128, num_updates=10951, lr=9.99204e-05, gnorm=2.097, loss_scale=8, train_wall=11, gb_free=2.8, wall=125595
2021-06-20 05:32:12 | INFO | train_inner | epoch 004: 2011 / 3002 loss=2.517, ppl=5.72, wps=5831.6, ups=0.09, wpb=64756, bsz=128, num_updates=10952, lr=9.99204e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=125606
2021-06-20 05:32:23 | INFO | train_inner | epoch 004: 2012 / 3002 loss=2.493, ppl=5.63, wps=5868.3, ups=0.09, wpb=64816, bsz=128, num_updates=10953, lr=9.99204e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=125618
2021-06-20 05:32:34 | INFO | train_inner | epoch 004: 2013 / 3002 loss=2.641, ppl=6.24, wps=5903.1, ups=0.09, wpb=64840, bsz=128, num_updates=10954, lr=9.99204e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=125628
2021-06-20 05:32:45 | INFO | train_inner | epoch 004: 2014 / 3002 loss=2.519, ppl=5.73, wps=5927.9, ups=0.09, wpb=64855, bsz=128, num_updates=10955, lr=9.99204e-05, gnorm=2.238, loss_scale=8, train_wall=10, gb_free=2.8, wall=125639
2021-06-20 05:32:56 | INFO | train_inner | epoch 004: 2015 / 3002 loss=2.53, ppl=5.78, wps=5845.4, ups=0.09, wpb=64854, bsz=128, num_updates=10956, lr=9.99203e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=125651
2021-06-20 05:33:07 | INFO | train_inner | epoch 004: 2016 / 3002 loss=2.599, ppl=6.06, wps=5919.6, ups=0.09, wpb=64926, bsz=128, num_updates=10957, lr=9.99203e-05, gnorm=2.105, loss_scale=8, train_wall=11, gb_free=2.8, wall=125662
2021-06-20 05:33:18 | INFO | train_inner | epoch 004: 2017 / 3002 loss=2.574, ppl=5.95, wps=5797.6, ups=0.09, wpb=64809, bsz=128, num_updates=10958, lr=9.99203e-05, gnorm=2.012, loss_scale=8, train_wall=11, gb_free=2.8, wall=125673
2021-06-20 05:33:29 | INFO | train_inner | epoch 004: 2018 / 3002 loss=2.521, ppl=5.74, wps=5858.3, ups=0.09, wpb=64832, bsz=128, num_updates=10959, lr=9.99203e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=125684
2021-06-20 05:33:41 | INFO | train_inner | epoch 004: 2019 / 3002 loss=2.395, ppl=5.26, wps=5758.6, ups=0.09, wpb=64974, bsz=128, num_updates=10960, lr=9.99203e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=125695
2021-06-20 05:33:52 | INFO | train_inner | epoch 004: 2020 / 3002 loss=2.578, ppl=5.97, wps=5922.6, ups=0.09, wpb=64857, bsz=128, num_updates=10961, lr=9.99203e-05, gnorm=2.104, loss_scale=8, train_wall=10, gb_free=2.8, wall=125706
2021-06-20 05:34:03 | INFO | train_inner | epoch 004: 2021 / 3002 loss=2.534, ppl=5.79, wps=5808.9, ups=0.09, wpb=64872, bsz=128, num_updates=10962, lr=9.99203e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=125717
2021-06-20 05:34:14 | INFO | train_inner | epoch 004: 2022 / 3002 loss=2.608, ppl=6.1, wps=5928, ups=0.09, wpb=64926, bsz=128, num_updates=10963, lr=9.99203e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=125728
2021-06-20 05:34:25 | INFO | train_inner | epoch 004: 2023 / 3002 loss=2.555, ppl=5.88, wps=5851.4, ups=0.09, wpb=64799, bsz=128, num_updates=10964, lr=9.99203e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=125739
2021-06-20 05:34:36 | INFO | train_inner | epoch 004: 2024 / 3002 loss=2.544, ppl=5.83, wps=5923.9, ups=0.09, wpb=64909, bsz=128, num_updates=10965, lr=9.99203e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=125750
2021-06-20 05:34:47 | INFO | train_inner | epoch 004: 2025 / 3002 loss=2.451, ppl=5.47, wps=5871.5, ups=0.09, wpb=64859, bsz=128, num_updates=10966, lr=9.99203e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=125761
2021-06-20 05:34:58 | INFO | train_inner | epoch 004: 2026 / 3002 loss=2.801, ppl=6.97, wps=5881.9, ups=0.09, wpb=64869, bsz=128, num_updates=10967, lr=9.99203e-05, gnorm=2.119, loss_scale=8, train_wall=11, gb_free=2.8, wall=125772
2021-06-20 05:35:09 | INFO | train_inner | epoch 004: 2027 / 3002 loss=2.485, ppl=5.6, wps=5876.1, ups=0.09, wpb=64791, bsz=128, num_updates=10968, lr=9.99202e-05, gnorm=2.434, loss_scale=8, train_wall=11, gb_free=2.8, wall=125783
2021-06-20 05:35:20 | INFO | train_inner | epoch 004: 2028 / 3002 loss=2.678, ppl=6.4, wps=5771.8, ups=0.09, wpb=64748, bsz=128, num_updates=10969, lr=9.99202e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=125794
2021-06-20 05:35:31 | INFO | train_inner | epoch 004: 2029 / 3002 loss=2.627, ppl=6.18, wps=5865.4, ups=0.09, wpb=64863, bsz=128, num_updates=10970, lr=9.99202e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=125806
2021-06-20 05:35:42 | INFO | train_inner | epoch 004: 2030 / 3002 loss=2.448, ppl=5.46, wps=5851.7, ups=0.09, wpb=64801, bsz=128, num_updates=10971, lr=9.99202e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=125817
2021-06-20 05:35:54 | INFO | train_inner | epoch 004: 2031 / 3002 loss=2.606, ppl=6.09, wps=5758.5, ups=0.09, wpb=64791, bsz=128, num_updates=10972, lr=9.99202e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=125828
2021-06-20 05:36:04 | INFO | train_inner | epoch 004: 2032 / 3002 loss=2.381, ppl=5.21, wps=5921.3, ups=0.09, wpb=64916, bsz=128, num_updates=10973, lr=9.99202e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=125839
2021-06-20 05:36:15 | INFO | train_inner | epoch 004: 2033 / 3002 loss=2.49, ppl=5.62, wps=5898.9, ups=0.09, wpb=64876, bsz=128, num_updates=10974, lr=9.99202e-05, gnorm=2.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=125850
2021-06-20 05:36:27 | INFO | train_inner | epoch 004: 2034 / 3002 loss=2.491, ppl=5.62, wps=5801.9, ups=0.09, wpb=64827, bsz=128, num_updates=10975, lr=9.99202e-05, gnorm=2.166, loss_scale=8, train_wall=11, gb_free=2.8, wall=125861
2021-06-20 05:36:38 | INFO | train_inner | epoch 004: 2035 / 3002 loss=2.497, ppl=5.65, wps=5886.1, ups=0.09, wpb=64843, bsz=128, num_updates=10976, lr=9.99202e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=125872
2021-06-20 05:36:49 | INFO | train_inner | epoch 004: 2036 / 3002 loss=2.671, ppl=6.37, wps=5910.4, ups=0.09, wpb=64864, bsz=128, num_updates=10977, lr=9.99202e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=125883
2021-06-20 05:37:00 | INFO | train_inner | epoch 004: 2037 / 3002 loss=2.506, ppl=5.68, wps=5789.9, ups=0.09, wpb=64886, bsz=128, num_updates=10978, lr=9.99202e-05, gnorm=2.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=125894
2021-06-20 05:37:11 | INFO | train_inner | epoch 004: 2038 / 3002 loss=2.542, ppl=5.82, wps=5936.8, ups=0.09, wpb=64785, bsz=128, num_updates=10979, lr=9.99202e-05, gnorm=2.219, loss_scale=8, train_wall=10, gb_free=2.8, wall=125905
2021-06-20 05:37:22 | INFO | train_inner | epoch 004: 2039 / 3002 loss=2.646, ppl=6.26, wps=5814.5, ups=0.09, wpb=64820, bsz=128, num_updates=10980, lr=9.99202e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=125916
2021-06-20 05:37:33 | INFO | train_inner | epoch 004: 2040 / 3002 loss=2.658, ppl=6.31, wps=5853.1, ups=0.09, wpb=64784, bsz=128, num_updates=10981, lr=9.99201e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=125927
2021-06-20 05:37:44 | INFO | train_inner | epoch 004: 2041 / 3002 loss=2.524, ppl=5.75, wps=5842.8, ups=0.09, wpb=64836, bsz=128, num_updates=10982, lr=9.99201e-05, gnorm=2.106, loss_scale=8, train_wall=11, gb_free=2.8, wall=125938
2021-06-20 05:37:55 | INFO | train_inner | epoch 004: 2042 / 3002 loss=2.536, ppl=5.8, wps=5759.2, ups=0.09, wpb=64873, bsz=128, num_updates=10983, lr=9.99201e-05, gnorm=2.126, loss_scale=8, train_wall=11, gb_free=2.8, wall=125950
2021-06-20 05:38:06 | INFO | train_inner | epoch 004: 2043 / 3002 loss=2.472, ppl=5.55, wps=5812.1, ups=0.09, wpb=64793, bsz=128, num_updates=10984, lr=9.99201e-05, gnorm=2.542, loss_scale=8, train_wall=11, gb_free=2.8, wall=125961
2021-06-20 05:38:17 | INFO | train_inner | epoch 004: 2044 / 3002 loss=2.394, ppl=5.26, wps=5993.4, ups=0.09, wpb=64764, bsz=128, num_updates=10985, lr=9.99201e-05, gnorm=2.05, loss_scale=8, train_wall=10, gb_free=2.8, wall=125972
2021-06-20 05:38:28 | INFO | train_inner | epoch 004: 2045 / 3002 loss=2.554, ppl=5.87, wps=5805.5, ups=0.09, wpb=64788, bsz=128, num_updates=10986, lr=9.99201e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=125983
2021-06-20 05:38:39 | INFO | train_inner | epoch 004: 2046 / 3002 loss=2.592, ppl=6.03, wps=5895.7, ups=0.09, wpb=64870, bsz=128, num_updates=10987, lr=9.99201e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=125994
2021-06-20 05:38:51 | INFO | train_inner | epoch 004: 2047 / 3002 loss=2.61, ppl=6.11, wps=5799.3, ups=0.09, wpb=64871, bsz=128, num_updates=10988, lr=9.99201e-05, gnorm=2.342, loss_scale=8, train_wall=11, gb_free=2.8, wall=126005
2021-06-20 05:39:02 | INFO | train_inner | epoch 004: 2048 / 3002 loss=2.405, ppl=5.3, wps=5881.2, ups=0.09, wpb=64791, bsz=128, num_updates=10989, lr=9.99201e-05, gnorm=2.087, loss_scale=16, train_wall=11, gb_free=2.8, wall=126016
2021-06-20 05:39:13 | INFO | train_inner | epoch 004: 2049 / 3002 loss=2.664, ppl=6.34, wps=5856.2, ups=0.09, wpb=64816, bsz=128, num_updates=10990, lr=9.99201e-05, gnorm=2.007, loss_scale=16, train_wall=11, gb_free=2.8, wall=126027
2021-06-20 05:39:24 | INFO | train_inner | epoch 004: 2050 / 3002 loss=2.561, ppl=5.9, wps=5794.8, ups=0.09, wpb=64850, bsz=128, num_updates=10991, lr=9.99201e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=126038
2021-06-20 05:39:35 | INFO | train_inner | epoch 004: 2051 / 3002 loss=2.555, ppl=5.88, wps=5923, ups=0.09, wpb=64793, bsz=128, num_updates=10992, lr=9.99201e-05, gnorm=2.063, loss_scale=16, train_wall=10, gb_free=2.8, wall=126049
2021-06-20 05:39:46 | INFO | train_inner | epoch 004: 2052 / 3002 loss=2.612, ppl=6.11, wps=5820.4, ups=0.09, wpb=64819, bsz=128, num_updates=10993, lr=9.992e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=126060
2021-06-20 05:39:57 | INFO | train_inner | epoch 004: 2053 / 3002 loss=2.542, ppl=5.83, wps=5910.7, ups=0.09, wpb=64896, bsz=128, num_updates=10994, lr=9.992e-05, gnorm=2.06, loss_scale=16, train_wall=11, gb_free=2.8, wall=126071
2021-06-20 05:40:08 | INFO | train_inner | epoch 004: 2054 / 3002 loss=2.438, ppl=5.42, wps=5814.6, ups=0.09, wpb=64835, bsz=128, num_updates=10995, lr=9.992e-05, gnorm=2.064, loss_scale=16, train_wall=11, gb_free=2.8, wall=126082
2021-06-20 05:40:19 | INFO | train_inner | epoch 004: 2055 / 3002 loss=2.546, ppl=5.84, wps=5792.7, ups=0.09, wpb=64772, bsz=128, num_updates=10996, lr=9.992e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=126094
2021-06-20 05:40:30 | INFO | train_inner | epoch 004: 2056 / 3002 loss=2.456, ppl=5.49, wps=5834.1, ups=0.09, wpb=64896, bsz=128, num_updates=10997, lr=9.992e-05, gnorm=2.096, loss_scale=16, train_wall=11, gb_free=2.8, wall=126105
2021-06-20 05:40:42 | INFO | train_inner | epoch 004: 2057 / 3002 loss=2.635, ppl=6.21, wps=5823.9, ups=0.09, wpb=64828, bsz=128, num_updates=10998, lr=9.992e-05, gnorm=2.086, loss_scale=16, train_wall=11, gb_free=2.8, wall=126116
2021-06-20 05:40:53 | INFO | train_inner | epoch 004: 2058 / 3002 loss=2.484, ppl=5.6, wps=5905.6, ups=0.09, wpb=64913, bsz=128, num_updates=10999, lr=9.992e-05, gnorm=2.21, loss_scale=16, train_wall=11, gb_free=2.8, wall=126127
2021-06-20 05:41:04 | INFO | train_inner | epoch 004: 2059 / 3002 loss=2.629, ppl=6.18, wps=5909.3, ups=0.09, wpb=64871, bsz=128, num_updates=11000, lr=9.992e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=126138
2021-06-20 05:41:15 | INFO | train_inner | epoch 004: 2060 / 3002 loss=2.547, ppl=5.84, wps=5853.6, ups=0.09, wpb=64908, bsz=128, num_updates=11001, lr=9.992e-05, gnorm=2.069, loss_scale=16, train_wall=11, gb_free=2.8, wall=126149
2021-06-20 05:41:26 | INFO | train_inner | epoch 004: 2061 / 3002 loss=2.565, ppl=5.92, wps=5849.6, ups=0.09, wpb=64855, bsz=128, num_updates=11002, lr=9.992e-05, gnorm=2.043, loss_scale=16, train_wall=11, gb_free=2.8, wall=126160
2021-06-20 05:41:37 | INFO | train_inner | epoch 004: 2062 / 3002 loss=2.576, ppl=5.96, wps=5728.6, ups=0.09, wpb=64824, bsz=128, num_updates=11003, lr=9.992e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=126171
2021-06-20 05:41:48 | INFO | train_inner | epoch 004: 2063 / 3002 loss=2.647, ppl=6.26, wps=5867.2, ups=0.09, wpb=64883, bsz=128, num_updates=11004, lr=9.992e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=126182
2021-06-20 05:41:59 | INFO | train_inner | epoch 004: 2064 / 3002 loss=2.533, ppl=5.79, wps=5844.5, ups=0.09, wpb=64851, bsz=128, num_updates=11005, lr=9.992e-05, gnorm=2.055, loss_scale=16, train_wall=11, gb_free=2.8, wall=126194
2021-06-20 05:42:10 | INFO | train_inner | epoch 004: 2065 / 3002 loss=2.472, ppl=5.55, wps=5989.2, ups=0.09, wpb=64829, bsz=128, num_updates=11006, lr=9.99199e-05, gnorm=2.555, loss_scale=16, train_wall=10, gb_free=2.8, wall=126204
2021-06-20 05:42:21 | INFO | train_inner | epoch 004: 2066 / 3002 loss=2.458, ppl=5.5, wps=5854.1, ups=0.09, wpb=64755, bsz=128, num_updates=11007, lr=9.99199e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=126215
2021-06-20 05:42:32 | INFO | train_inner | epoch 004: 2067 / 3002 loss=2.415, ppl=5.33, wps=5933.7, ups=0.09, wpb=64864, bsz=128, num_updates=11008, lr=9.99199e-05, gnorm=2.061, loss_scale=16, train_wall=10, gb_free=2.8, wall=126226
2021-06-20 05:42:43 | INFO | train_inner | epoch 004: 2068 / 3002 loss=2.547, ppl=5.84, wps=5846.9, ups=0.09, wpb=64836, bsz=128, num_updates=11009, lr=9.99199e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=126237
2021-06-20 05:42:54 | INFO | train_inner | epoch 004: 2069 / 3002 loss=2.51, ppl=5.7, wps=5843.3, ups=0.09, wpb=64857, bsz=128, num_updates=11010, lr=9.99199e-05, gnorm=2.527, loss_scale=16, train_wall=11, gb_free=2.8, wall=126249
2021-06-20 05:43:05 | INFO | train_inner | epoch 004: 2070 / 3002 loss=2.497, ppl=5.65, wps=5959, ups=0.09, wpb=64874, bsz=128, num_updates=11011, lr=9.99199e-05, gnorm=2.075, loss_scale=16, train_wall=10, gb_free=2.8, wall=126259
2021-06-20 05:43:16 | INFO | train_inner | epoch 004: 2071 / 3002 loss=2.476, ppl=5.56, wps=5759, ups=0.09, wpb=64818, bsz=128, num_updates=11012, lr=9.99199e-05, gnorm=2.041, loss_scale=16, train_wall=11, gb_free=2.8, wall=126271
2021-06-20 05:43:27 | INFO | train_inner | epoch 004: 2072 / 3002 loss=2.649, ppl=6.27, wps=5805.6, ups=0.09, wpb=64771, bsz=128, num_updates=11013, lr=9.99199e-05, gnorm=2.053, loss_scale=16, train_wall=11, gb_free=2.8, wall=126282
2021-06-20 05:43:39 | INFO | train_inner | epoch 004: 2073 / 3002 loss=2.479, ppl=5.58, wps=5823.8, ups=0.09, wpb=64826, bsz=128, num_updates=11014, lr=9.99199e-05, gnorm=2.158, loss_scale=16, train_wall=11, gb_free=2.8, wall=126293
2021-06-20 05:43:50 | INFO | train_inner | epoch 004: 2074 / 3002 loss=2.647, ppl=6.26, wps=5741, ups=0.09, wpb=64803, bsz=128, num_updates=11015, lr=9.99199e-05, gnorm=2.09, loss_scale=16, train_wall=11, gb_free=2.8, wall=126304
2021-06-20 05:44:01 | INFO | train_inner | epoch 004: 2075 / 3002 loss=2.664, ppl=6.34, wps=5984.6, ups=0.09, wpb=64807, bsz=128, num_updates=11016, lr=9.99199e-05, gnorm=2.125, loss_scale=16, train_wall=10, gb_free=2.8, wall=126315
2021-06-20 05:44:12 | INFO | train_inner | epoch 004: 2076 / 3002 loss=2.588, ppl=6.01, wps=5976.9, ups=0.09, wpb=64918, bsz=128, num_updates=11017, lr=9.99199e-05, gnorm=2.02, loss_scale=16, train_wall=10, gb_free=2.8, wall=126326
2021-06-20 05:44:23 | INFO | train_inner | epoch 004: 2077 / 3002 loss=2.336, ppl=5.05, wps=5908.3, ups=0.09, wpb=64872, bsz=128, num_updates=11018, lr=9.99198e-05, gnorm=2.056, loss_scale=16, train_wall=10, gb_free=2.8, wall=126337
2021-06-20 05:44:34 | INFO | train_inner | epoch 004: 2078 / 3002 loss=2.567, ppl=5.92, wps=5790.6, ups=0.09, wpb=64856, bsz=128, num_updates=11019, lr=9.99198e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=126348
2021-06-20 05:44:45 | INFO | train_inner | epoch 004: 2079 / 3002 loss=2.652, ppl=6.28, wps=5762.9, ups=0.09, wpb=64808, bsz=128, num_updates=11020, lr=9.99198e-05, gnorm=2.113, loss_scale=16, train_wall=11, gb_free=2.8, wall=126359
2021-06-20 05:44:56 | INFO | train_inner | epoch 004: 2080 / 3002 loss=2.481, ppl=5.58, wps=5817.4, ups=0.09, wpb=64811, bsz=128, num_updates=11021, lr=9.99198e-05, gnorm=2.024, loss_scale=16, train_wall=11, gb_free=2.8, wall=126370
2021-06-20 05:45:07 | INFO | train_inner | epoch 004: 2081 / 3002 loss=2.481, ppl=5.58, wps=5875, ups=0.09, wpb=64833, bsz=128, num_updates=11022, lr=9.99198e-05, gnorm=2.357, loss_scale=16, train_wall=11, gb_free=2.8, wall=126382
2021-06-20 05:45:18 | INFO | train_inner | epoch 004: 2082 / 3002 loss=2.645, ppl=6.26, wps=5789.9, ups=0.09, wpb=64822, bsz=128, num_updates=11023, lr=9.99198e-05, gnorm=2.157, loss_scale=16, train_wall=11, gb_free=2.8, wall=126393
2021-06-20 05:45:29 | INFO | train_inner | epoch 004: 2083 / 3002 loss=2.472, ppl=5.55, wps=5939.8, ups=0.09, wpb=64874, bsz=128, num_updates=11024, lr=9.99198e-05, gnorm=1.977, loss_scale=16, train_wall=10, gb_free=2.8, wall=126404
2021-06-20 05:45:41 | INFO | train_inner | epoch 004: 2084 / 3002 loss=2.544, ppl=5.83, wps=5777.8, ups=0.09, wpb=64756, bsz=128, num_updates=11025, lr=9.99198e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=126415
2021-06-20 05:45:52 | INFO | train_inner | epoch 004: 2085 / 3002 loss=2.508, ppl=5.69, wps=5864.4, ups=0.09, wpb=64783, bsz=128, num_updates=11026, lr=9.99198e-05, gnorm=2.142, loss_scale=16, train_wall=11, gb_free=2.8, wall=126426
2021-06-20 05:46:02 | INFO | train_inner | epoch 004: 2086 / 3002 loss=2.626, ppl=6.18, wps=6035.1, ups=0.09, wpb=64900, bsz=128, num_updates=11027, lr=9.99198e-05, gnorm=2.079, loss_scale=16, train_wall=10, gb_free=2.8, wall=126437
2021-06-20 05:46:14 | INFO | train_inner | epoch 004: 2087 / 3002 loss=2.453, ppl=5.48, wps=5784.1, ups=0.09, wpb=64834, bsz=128, num_updates=11028, lr=9.99198e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=126448
2021-06-20 05:46:25 | INFO | train_inner | epoch 004: 2088 / 3002 loss=2.611, ppl=6.11, wps=5892.7, ups=0.09, wpb=64825, bsz=128, num_updates=11029, lr=9.99198e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=126459
2021-06-20 05:46:36 | INFO | train_inner | epoch 004: 2089 / 3002 loss=2.728, ppl=6.63, wps=5803.5, ups=0.09, wpb=64783, bsz=128, num_updates=11030, lr=9.99198e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=126470
2021-06-20 05:46:47 | INFO | train_inner | epoch 004: 2090 / 3002 loss=2.623, ppl=6.16, wps=5821.7, ups=0.09, wpb=64851, bsz=128, num_updates=11031, lr=9.99197e-05, gnorm=2.07, loss_scale=16, train_wall=11, gb_free=2.8, wall=126481
2021-06-20 05:46:58 | INFO | train_inner | epoch 004: 2091 / 3002 loss=2.554, ppl=5.87, wps=5839.6, ups=0.09, wpb=64806, bsz=128, num_updates=11032, lr=9.99197e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=126492
2021-06-20 05:47:09 | INFO | train_inner | epoch 004: 2092 / 3002 loss=2.691, ppl=6.46, wps=5877.2, ups=0.09, wpb=64778, bsz=128, num_updates=11033, lr=9.99197e-05, gnorm=2.071, loss_scale=16, train_wall=11, gb_free=2.8, wall=126503
2021-06-20 05:47:20 | INFO | train_inner | epoch 004: 2093 / 3002 loss=2.46, ppl=5.5, wps=5818.7, ups=0.09, wpb=64920, bsz=128, num_updates=11034, lr=9.99197e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=126514
2021-06-20 05:47:31 | INFO | train_inner | epoch 004: 2094 / 3002 loss=2.698, ppl=6.49, wps=5780.7, ups=0.09, wpb=64856, bsz=128, num_updates=11035, lr=9.99197e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=126526
2021-06-20 05:47:42 | INFO | train_inner | epoch 004: 2095 / 3002 loss=2.509, ppl=5.69, wps=5844.3, ups=0.09, wpb=64814, bsz=128, num_updates=11036, lr=9.99197e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=126537
2021-06-20 05:47:54 | INFO | train_inner | epoch 004: 2096 / 3002 loss=2.599, ppl=6.06, wps=5836, ups=0.09, wpb=64825, bsz=128, num_updates=11037, lr=9.99197e-05, gnorm=2.074, loss_scale=16, train_wall=11, gb_free=2.8, wall=126548
2021-06-20 05:48:05 | INFO | train_inner | epoch 004: 2097 / 3002 loss=2.552, ppl=5.86, wps=5760.5, ups=0.09, wpb=64887, bsz=128, num_updates=11038, lr=9.99197e-05, gnorm=2.395, loss_scale=16, train_wall=11, gb_free=2.8, wall=126559
2021-06-20 05:48:16 | INFO | train_inner | epoch 004: 2098 / 3002 loss=2.391, ppl=5.24, wps=5801.9, ups=0.09, wpb=64732, bsz=128, num_updates=11039, lr=9.99197e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=126570
2021-06-20 05:48:27 | INFO | train_inner | epoch 004: 2099 / 3002 loss=2.579, ppl=5.98, wps=5948.2, ups=0.09, wpb=64874, bsz=128, num_updates=11040, lr=9.99197e-05, gnorm=1.958, loss_scale=16, train_wall=10, gb_free=2.8, wall=126581
2021-06-20 05:48:38 | INFO | train_inner | epoch 004: 2100 / 3002 loss=2.583, ppl=5.99, wps=5696, ups=0.09, wpb=64800, bsz=128, num_updates=11041, lr=9.99197e-05, gnorm=2.195, loss_scale=16, train_wall=11, gb_free=2.8, wall=126593
2021-06-20 05:48:49 | INFO | train_inner | epoch 004: 2101 / 3002 loss=2.69, ppl=6.45, wps=5764.6, ups=0.09, wpb=64756, bsz=128, num_updates=11042, lr=9.99197e-05, gnorm=2.073, loss_scale=16, train_wall=11, gb_free=2.8, wall=126604
2021-06-20 05:49:01 | INFO | train_inner | epoch 004: 2102 / 3002 loss=2.697, ppl=6.49, wps=5815.6, ups=0.09, wpb=64815, bsz=128, num_updates=11043, lr=9.99196e-05, gnorm=2.058, loss_scale=16, train_wall=11, gb_free=2.8, wall=126615
2021-06-20 05:49:12 | INFO | train_inner | epoch 004: 2103 / 3002 loss=2.439, ppl=5.42, wps=5898.5, ups=0.09, wpb=64781, bsz=128, num_updates=11044, lr=9.99196e-05, gnorm=2.067, loss_scale=16, train_wall=11, gb_free=2.8, wall=126626
2021-06-20 05:49:23 | INFO | train_inner | epoch 004: 2104 / 3002 loss=2.534, ppl=5.79, wps=5781.6, ups=0.09, wpb=64797, bsz=128, num_updates=11045, lr=9.99196e-05, gnorm=2.036, loss_scale=16, train_wall=11, gb_free=2.8, wall=126637
2021-06-20 05:49:34 | INFO | train_inner | epoch 004: 2105 / 3002 loss=2.745, ppl=6.7, wps=5700, ups=0.09, wpb=64804, bsz=128, num_updates=11046, lr=9.99196e-05, gnorm=2.118, loss_scale=16, train_wall=11, gb_free=2.8, wall=126649
2021-06-20 05:49:45 | INFO | train_inner | epoch 004: 2106 / 3002 loss=2.549, ppl=5.85, wps=5876.2, ups=0.09, wpb=64834, bsz=128, num_updates=11047, lr=9.99196e-05, gnorm=2.084, loss_scale=16, train_wall=11, gb_free=2.8, wall=126660
2021-06-20 05:49:56 | INFO | train_inner | epoch 004: 2107 / 3002 loss=2.511, ppl=5.7, wps=5871.3, ups=0.09, wpb=64898, bsz=128, num_updates=11048, lr=9.99196e-05, gnorm=2.03, loss_scale=16, train_wall=11, gb_free=2.8, wall=126671
2021-06-20 05:50:07 | INFO | train_inner | epoch 004: 2108 / 3002 loss=2.422, ppl=5.36, wps=5896.6, ups=0.09, wpb=64802, bsz=128, num_updates=11049, lr=9.99196e-05, gnorm=1.936, loss_scale=16, train_wall=11, gb_free=2.8, wall=126682
2021-06-20 05:50:18 | INFO | train_inner | epoch 004: 2109 / 3002 loss=2.561, ppl=5.9, wps=5862.2, ups=0.09, wpb=64792, bsz=128, num_updates=11050, lr=9.99196e-05, gnorm=2.098, loss_scale=16, train_wall=11, gb_free=2.8, wall=126693
2021-06-20 05:50:29 | INFO | train_inner | epoch 004: 2110 / 3002 loss=2.492, ppl=5.62, wps=5854.3, ups=0.09, wpb=64788, bsz=128, num_updates=11051, lr=9.99196e-05, gnorm=2.119, loss_scale=16, train_wall=11, gb_free=2.8, wall=126704
2021-06-20 05:50:40 | INFO | train_inner | epoch 004: 2111 / 3002 loss=2.565, ppl=5.92, wps=5909.8, ups=0.09, wpb=64935, bsz=128, num_updates=11052, lr=9.99196e-05, gnorm=2.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=126715
2021-06-20 05:50:51 | INFO | train_inner | epoch 004: 2112 / 3002 loss=2.719, ppl=6.58, wps=5899.1, ups=0.09, wpb=64898, bsz=128, num_updates=11053, lr=9.99196e-05, gnorm=2.074, loss_scale=16, train_wall=11, gb_free=2.8, wall=126726
2021-06-20 05:51:03 | INFO | train_inner | epoch 004: 2113 / 3002 loss=2.64, ppl=6.23, wps=5817.6, ups=0.09, wpb=64820, bsz=128, num_updates=11054, lr=9.99196e-05, gnorm=2.121, loss_scale=16, train_wall=11, gb_free=2.8, wall=126737
2021-06-20 05:51:14 | INFO | train_inner | epoch 004: 2114 / 3002 loss=2.585, ppl=6, wps=5814.2, ups=0.09, wpb=64837, bsz=128, num_updates=11055, lr=9.99196e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=126748
2021-06-20 05:51:25 | INFO | train_inner | epoch 004: 2115 / 3002 loss=2.507, ppl=5.68, wps=5781.8, ups=0.09, wpb=64797, bsz=128, num_updates=11056, lr=9.99195e-05, gnorm=2.079, loss_scale=16, train_wall=11, gb_free=2.8, wall=126759
2021-06-20 05:51:36 | INFO | train_inner | epoch 004: 2116 / 3002 loss=2.488, ppl=5.61, wps=5831.3, ups=0.09, wpb=64711, bsz=128, num_updates=11057, lr=9.99195e-05, gnorm=2.168, loss_scale=16, train_wall=11, gb_free=2.8, wall=126770
2021-06-20 05:51:47 | INFO | train_inner | epoch 004: 2117 / 3002 loss=2.416, ppl=5.34, wps=5789.9, ups=0.09, wpb=64851, bsz=128, num_updates=11058, lr=9.99195e-05, gnorm=2.266, loss_scale=16, train_wall=11, gb_free=2.8, wall=126781
2021-06-20 05:51:58 | INFO | train_inner | epoch 004: 2118 / 3002 loss=2.552, ppl=5.87, wps=5983.7, ups=0.09, wpb=64929, bsz=128, num_updates=11059, lr=9.99195e-05, gnorm=1.979, loss_scale=16, train_wall=10, gb_free=2.8, wall=126792
2021-06-20 05:52:09 | INFO | train_inner | epoch 004: 2119 / 3002 loss=2.679, ppl=6.41, wps=5788.2, ups=0.09, wpb=64886, bsz=128, num_updates=11060, lr=9.99195e-05, gnorm=2.027, loss_scale=16, train_wall=11, gb_free=2.8, wall=126804
2021-06-20 05:52:20 | INFO | train_inner | epoch 004: 2120 / 3002 loss=2.512, ppl=5.7, wps=5949.8, ups=0.09, wpb=64771, bsz=128, num_updates=11061, lr=9.99195e-05, gnorm=2.228, loss_scale=16, train_wall=10, gb_free=2.8, wall=126814
2021-06-20 05:52:31 | INFO | train_inner | epoch 004: 2121 / 3002 loss=2.534, ppl=5.79, wps=5781.5, ups=0.09, wpb=64822, bsz=128, num_updates=11062, lr=9.99195e-05, gnorm=2.064, loss_scale=16, train_wall=11, gb_free=2.8, wall=126826
2021-06-20 05:52:42 | INFO | train_inner | epoch 004: 2122 / 3002 loss=2.675, ppl=6.39, wps=5807.6, ups=0.09, wpb=64832, bsz=128, num_updates=11063, lr=9.99195e-05, gnorm=2.218, loss_scale=16, train_wall=11, gb_free=2.8, wall=126837
2021-06-20 05:52:54 | INFO | train_inner | epoch 004: 2123 / 3002 loss=2.496, ppl=5.64, wps=5731.6, ups=0.09, wpb=64788, bsz=128, num_updates=11064, lr=9.99195e-05, gnorm=2.04, loss_scale=16, train_wall=11, gb_free=2.8, wall=126848
2021-06-20 05:53:05 | INFO | train_inner | epoch 004: 2124 / 3002 loss=2.862, ppl=7.27, wps=5995.2, ups=0.09, wpb=64894, bsz=128, num_updates=11065, lr=9.99195e-05, gnorm=2.042, loss_scale=16, train_wall=10, gb_free=2.8, wall=126859
2021-06-20 05:53:16 | INFO | train_inner | epoch 004: 2125 / 3002 loss=2.548, ppl=5.85, wps=5797.7, ups=0.09, wpb=64819, bsz=128, num_updates=11066, lr=9.99195e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=126870
2021-06-20 05:53:27 | INFO | train_inner | epoch 004: 2126 / 3002 loss=2.67, ppl=6.37, wps=6020.6, ups=0.09, wpb=64869, bsz=128, num_updates=11067, lr=9.99195e-05, gnorm=2.048, loss_scale=16, train_wall=10, gb_free=2.8, wall=126881
2021-06-20 05:53:38 | INFO | train_inner | epoch 004: 2127 / 3002 loss=2.595, ppl=6.04, wps=5832.2, ups=0.09, wpb=64815, bsz=128, num_updates=11068, lr=9.99194e-05, gnorm=2.05, loss_scale=16, train_wall=11, gb_free=2.8, wall=126892
2021-06-20 05:53:49 | INFO | train_inner | epoch 004: 2128 / 3002 loss=2.542, ppl=5.82, wps=5814.8, ups=0.09, wpb=64775, bsz=128, num_updates=11069, lr=9.99194e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=126903
2021-06-20 05:54:00 | INFO | train_inner | epoch 004: 2129 / 3002 loss=2.518, ppl=5.73, wps=5809.8, ups=0.09, wpb=64817, bsz=128, num_updates=11070, lr=9.99194e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=126914
2021-06-20 05:54:11 | INFO | train_inner | epoch 004: 2130 / 3002 loss=2.474, ppl=5.55, wps=5855.2, ups=0.09, wpb=64902, bsz=128, num_updates=11071, lr=9.99194e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=126925
2021-06-20 05:54:22 | INFO | train_inner | epoch 004: 2131 / 3002 loss=2.577, ppl=5.97, wps=5902.7, ups=0.09, wpb=64898, bsz=128, num_updates=11072, lr=9.99194e-05, gnorm=2.077, loss_scale=16, train_wall=11, gb_free=2.8, wall=126936
2021-06-20 05:54:33 | INFO | train_inner | epoch 004: 2132 / 3002 loss=2.419, ppl=5.35, wps=5926.8, ups=0.09, wpb=64760, bsz=128, num_updates=11073, lr=9.99194e-05, gnorm=1.934, loss_scale=16, train_wall=10, gb_free=2.8, wall=126947
2021-06-20 05:54:44 | INFO | train_inner | epoch 004: 2133 / 3002 loss=2.434, ppl=5.4, wps=5871.3, ups=0.09, wpb=64956, bsz=128, num_updates=11074, lr=9.99194e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=126958
2021-06-20 05:54:55 | INFO | train_inner | epoch 004: 2134 / 3002 loss=2.629, ppl=6.18, wps=5849.2, ups=0.09, wpb=64846, bsz=128, num_updates=11075, lr=9.99194e-05, gnorm=2.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=126969
2021-06-20 05:55:06 | INFO | train_inner | epoch 004: 2135 / 3002 loss=2.6, ppl=6.06, wps=5817.3, ups=0.09, wpb=64858, bsz=128, num_updates=11076, lr=9.99194e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=126981
2021-06-20 05:55:17 | INFO | train_inner | epoch 004: 2136 / 3002 loss=2.531, ppl=5.78, wps=5849.9, ups=0.09, wpb=64888, bsz=128, num_updates=11077, lr=9.99194e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=126992
2021-06-20 05:55:28 | INFO | train_inner | epoch 004: 2137 / 3002 loss=2.57, ppl=5.94, wps=5854.1, ups=0.09, wpb=64821, bsz=128, num_updates=11078, lr=9.99194e-05, gnorm=2.053, loss_scale=16, train_wall=11, gb_free=2.8, wall=127003
2021-06-20 05:55:40 | INFO | train_inner | epoch 004: 2138 / 3002 loss=2.631, ppl=6.19, wps=5818.5, ups=0.09, wpb=64770, bsz=128, num_updates=11079, lr=9.99194e-05, gnorm=2.413, loss_scale=16, train_wall=11, gb_free=2.8, wall=127014
2021-06-20 05:55:51 | INFO | train_inner | epoch 004: 2139 / 3002 loss=2.652, ppl=6.28, wps=5912.9, ups=0.09, wpb=64861, bsz=128, num_updates=11080, lr=9.99194e-05, gnorm=2.011, loss_scale=16, train_wall=10, gb_free=2.8, wall=127025
2021-06-20 05:56:02 | INFO | train_inner | epoch 004: 2140 / 3002 loss=2.485, ppl=5.6, wps=5842.8, ups=0.09, wpb=64891, bsz=128, num_updates=11081, lr=9.99193e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=127036
2021-06-20 05:56:13 | INFO | train_inner | epoch 004: 2141 / 3002 loss=2.475, ppl=5.56, wps=5785.6, ups=0.09, wpb=64882, bsz=128, num_updates=11082, lr=9.99193e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=127047
2021-06-20 05:56:24 | INFO | train_inner | epoch 004: 2142 / 3002 loss=2.648, ppl=6.27, wps=5812.8, ups=0.09, wpb=64898, bsz=128, num_updates=11083, lr=9.99193e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=127058
2021-06-20 05:56:35 | INFO | train_inner | epoch 004: 2143 / 3002 loss=2.57, ppl=5.94, wps=5865.8, ups=0.09, wpb=64767, bsz=128, num_updates=11084, lr=9.99193e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=127069
2021-06-20 05:56:46 | INFO | train_inner | epoch 004: 2144 / 3002 loss=2.503, ppl=5.67, wps=5830.1, ups=0.09, wpb=64756, bsz=128, num_updates=11085, lr=9.99193e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=127081
2021-06-20 05:56:57 | INFO | train_inner | epoch 004: 2145 / 3002 loss=2.618, ppl=6.14, wps=5750.3, ups=0.09, wpb=64749, bsz=128, num_updates=11086, lr=9.99193e-05, gnorm=2.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=127092
2021-06-20 05:57:09 | INFO | train_inner | epoch 004: 2146 / 3002 loss=2.537, ppl=5.8, wps=5765.4, ups=0.09, wpb=64769, bsz=128, num_updates=11087, lr=9.99193e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=127103
2021-06-20 05:57:20 | INFO | train_inner | epoch 004: 2147 / 3002 loss=2.63, ppl=6.19, wps=5807.6, ups=0.09, wpb=64850, bsz=128, num_updates=11088, lr=9.99193e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=127114
2021-06-20 05:57:31 | INFO | train_inner | epoch 004: 2148 / 3002 loss=2.554, ppl=5.87, wps=5872, ups=0.09, wpb=64887, bsz=128, num_updates=11089, lr=9.99193e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=127125
2021-06-20 05:57:42 | INFO | train_inner | epoch 004: 2149 / 3002 loss=2.436, ppl=5.41, wps=5898.1, ups=0.09, wpb=64902, bsz=128, num_updates=11090, lr=9.99193e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=127136
2021-06-20 05:57:53 | INFO | train_inner | epoch 004: 2150 / 3002 loss=2.621, ppl=6.15, wps=5908.4, ups=0.09, wpb=64833, bsz=128, num_updates=11091, lr=9.99193e-05, gnorm=2.04, loss_scale=16, train_wall=11, gb_free=2.8, wall=127147
2021-06-20 05:58:04 | INFO | train_inner | epoch 004: 2151 / 3002 loss=2.609, ppl=6.1, wps=5957.5, ups=0.09, wpb=64846, bsz=128, num_updates=11092, lr=9.99193e-05, gnorm=2.107, loss_scale=16, train_wall=10, gb_free=2.8, wall=127158
2021-06-20 05:58:15 | INFO | train_inner | epoch 004: 2152 / 3002 loss=2.543, ppl=5.83, wps=5861.4, ups=0.09, wpb=64836, bsz=128, num_updates=11093, lr=9.99192e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=127169
2021-06-20 05:58:26 | INFO | train_inner | epoch 004: 2153 / 3002 loss=2.543, ppl=5.83, wps=5798.9, ups=0.09, wpb=64841, bsz=128, num_updates=11094, lr=9.99192e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=127180
2021-06-20 05:58:37 | INFO | train_inner | epoch 004: 2154 / 3002 loss=2.511, ppl=5.7, wps=5890.7, ups=0.09, wpb=64852, bsz=128, num_updates=11095, lr=9.99192e-05, gnorm=2.137, loss_scale=16, train_wall=11, gb_free=2.8, wall=127191
2021-06-20 05:58:48 | INFO | train_inner | epoch 004: 2155 / 3002 loss=2.537, ppl=5.8, wps=5844.8, ups=0.09, wpb=64826, bsz=128, num_updates=11096, lr=9.99192e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=127202
2021-06-20 05:58:59 | INFO | train_inner | epoch 004: 2156 / 3002 loss=2.625, ppl=6.17, wps=5839.8, ups=0.09, wpb=64743, bsz=128, num_updates=11097, lr=9.99192e-05, gnorm=2.067, loss_scale=16, train_wall=11, gb_free=2.8, wall=127214
2021-06-20 05:59:10 | INFO | train_inner | epoch 004: 2157 / 3002 loss=2.551, ppl=5.86, wps=5885.6, ups=0.09, wpb=64786, bsz=128, num_updates=11098, lr=9.99192e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=127225
2021-06-20 05:59:21 | INFO | train_inner | epoch 004: 2158 / 3002 loss=2.594, ppl=6.04, wps=5882.5, ups=0.09, wpb=64897, bsz=128, num_updates=11099, lr=9.99192e-05, gnorm=2.089, loss_scale=16, train_wall=11, gb_free=2.8, wall=127236
2021-06-20 05:59:32 | INFO | train_inner | epoch 004: 2159 / 3002 loss=2.423, ppl=5.36, wps=5918, ups=0.09, wpb=64825, bsz=128, num_updates=11100, lr=9.99192e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=127247
2021-06-20 05:59:43 | INFO | train_inner | epoch 004: 2160 / 3002 loss=2.597, ppl=6.05, wps=5886, ups=0.09, wpb=64850, bsz=128, num_updates=11101, lr=9.99192e-05, gnorm=2.145, loss_scale=16, train_wall=11, gb_free=2.8, wall=127258
2021-06-20 05:59:54 | INFO | train_inner | epoch 004: 2161 / 3002 loss=2.659, ppl=6.32, wps=5837.2, ups=0.09, wpb=64868, bsz=128, num_updates=11102, lr=9.99192e-05, gnorm=2.304, loss_scale=16, train_wall=11, gb_free=2.8, wall=127269
2021-06-20 06:00:05 | INFO | train_inner | epoch 004: 2162 / 3002 loss=2.486, ppl=5.6, wps=5869.4, ups=0.09, wpb=64776, bsz=128, num_updates=11103, lr=9.99192e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=127280
2021-06-20 06:00:16 | INFO | train_inner | epoch 004: 2163 / 3002 loss=2.782, ppl=6.88, wps=5822.5, ups=0.09, wpb=64780, bsz=128, num_updates=11104, lr=9.99192e-05, gnorm=2.075, loss_scale=16, train_wall=11, gb_free=2.8, wall=127291
2021-06-20 06:00:28 | INFO | train_inner | epoch 004: 2164 / 3002 loss=2.603, ppl=6.08, wps=5752.8, ups=0.09, wpb=64828, bsz=128, num_updates=11105, lr=9.99192e-05, gnorm=2.155, loss_scale=16, train_wall=11, gb_free=2.8, wall=127302
2021-06-20 06:00:39 | INFO | train_inner | epoch 004: 2165 / 3002 loss=2.416, ppl=5.34, wps=5959.6, ups=0.09, wpb=64898, bsz=128, num_updates=11106, lr=9.99191e-05, gnorm=1.97, loss_scale=16, train_wall=10, gb_free=2.8, wall=127313
2021-06-20 06:00:50 | INFO | train_inner | epoch 004: 2166 / 3002 loss=2.66, ppl=6.32, wps=5723.3, ups=0.09, wpb=64820, bsz=128, num_updates=11107, lr=9.99191e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=127324
2021-06-20 06:01:01 | INFO | train_inner | epoch 004: 2167 / 3002 loss=2.697, ppl=6.49, wps=5819.1, ups=0.09, wpb=64830, bsz=128, num_updates=11108, lr=9.99191e-05, gnorm=2.056, loss_scale=16, train_wall=11, gb_free=2.8, wall=127335
2021-06-20 06:01:12 | INFO | train_inner | epoch 004: 2168 / 3002 loss=2.542, ppl=5.83, wps=5820.1, ups=0.09, wpb=64731, bsz=128, num_updates=11109, lr=9.99191e-05, gnorm=2.108, loss_scale=16, train_wall=11, gb_free=2.8, wall=127347
2021-06-20 06:01:23 | INFO | train_inner | epoch 004: 2169 / 3002 loss=2.482, ppl=5.59, wps=5892.7, ups=0.09, wpb=64912, bsz=128, num_updates=11110, lr=9.99191e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=127358
2021-06-20 06:01:34 | INFO | train_inner | epoch 004: 2170 / 3002 loss=2.497, ppl=5.65, wps=5793.8, ups=0.09, wpb=64875, bsz=128, num_updates=11111, lr=9.99191e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=127369
2021-06-20 06:01:46 | INFO | train_inner | epoch 004: 2171 / 3002 loss=2.709, ppl=6.54, wps=5854.9, ups=0.09, wpb=64791, bsz=128, num_updates=11112, lr=9.99191e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=127380
2021-06-20 06:01:57 | INFO | train_inner | epoch 004: 2172 / 3002 loss=2.628, ppl=6.18, wps=5892.9, ups=0.09, wpb=64802, bsz=128, num_updates=11113, lr=9.99191e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=127391
2021-06-20 06:02:07 | INFO | train_inner | epoch 004: 2173 / 3002 loss=2.588, ppl=6.01, wps=5987.6, ups=0.09, wpb=64812, bsz=128, num_updates=11114, lr=9.99191e-05, gnorm=2.189, loss_scale=16, train_wall=10, gb_free=2.8, wall=127402
2021-06-20 06:02:19 | INFO | train_inner | epoch 004: 2174 / 3002 loss=2.567, ppl=5.93, wps=5776.3, ups=0.09, wpb=64762, bsz=128, num_updates=11115, lr=9.99191e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=127413
2021-06-20 06:02:30 | INFO | train_inner | epoch 004: 2175 / 3002 loss=2.629, ppl=6.18, wps=5801.6, ups=0.09, wpb=64873, bsz=128, num_updates=11116, lr=9.99191e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=127424
2021-06-20 06:02:41 | INFO | train_inner | epoch 004: 2176 / 3002 loss=2.496, ppl=5.64, wps=5779, ups=0.09, wpb=64906, bsz=128, num_updates=11117, lr=9.99191e-05, gnorm=2.02, loss_scale=32, train_wall=11, gb_free=2.8, wall=127435
2021-06-20 06:02:52 | INFO | train_inner | epoch 004: 2177 / 3002 loss=2.736, ppl=6.66, wps=5909.8, ups=0.09, wpb=64759, bsz=128, num_updates=11118, lr=9.9919e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=127446
2021-06-20 06:03:03 | INFO | train_inner | epoch 004: 2178 / 3002 loss=2.561, ppl=5.9, wps=5905, ups=0.09, wpb=64851, bsz=128, num_updates=11119, lr=9.9919e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=127457
2021-06-20 06:03:14 | INFO | train_inner | epoch 004: 2179 / 3002 loss=2.431, ppl=5.39, wps=5783.9, ups=0.09, wpb=64844, bsz=128, num_updates=11120, lr=9.9919e-05, gnorm=2.162, loss_scale=32, train_wall=11, gb_free=2.8, wall=127468
2021-06-20 06:03:25 | INFO | train_inner | epoch 004: 2180 / 3002 loss=2.643, ppl=6.25, wps=5867.6, ups=0.09, wpb=64847, bsz=128, num_updates=11121, lr=9.9919e-05, gnorm=2.119, loss_scale=32, train_wall=11, gb_free=2.8, wall=127479
2021-06-20 06:03:36 | INFO | train_inner | epoch 004: 2181 / 3002 loss=2.653, ppl=6.29, wps=5768, ups=0.09, wpb=64794, bsz=128, num_updates=11122, lr=9.9919e-05, gnorm=2.078, loss_scale=32, train_wall=11, gb_free=2.8, wall=127491
2021-06-20 06:03:48 | INFO | train_inner | epoch 004: 2182 / 3002 loss=2.496, ppl=5.64, wps=5776, ups=0.09, wpb=64897, bsz=128, num_updates=11123, lr=9.9919e-05, gnorm=2.047, loss_scale=32, train_wall=11, gb_free=2.8, wall=127502
2021-06-20 06:03:59 | INFO | train_inner | epoch 004: 2183 / 3002 loss=2.665, ppl=6.34, wps=5884.9, ups=0.09, wpb=64856, bsz=128, num_updates=11124, lr=9.9919e-05, gnorm=2.075, loss_scale=32, train_wall=11, gb_free=2.8, wall=127513
2021-06-20 06:04:10 | INFO | train_inner | epoch 004: 2184 / 3002 loss=2.588, ppl=6.01, wps=5918.2, ups=0.09, wpb=64784, bsz=128, num_updates=11125, lr=9.9919e-05, gnorm=1.994, loss_scale=32, train_wall=10, gb_free=2.8, wall=127524
2021-06-20 06:04:21 | INFO | train_inner | epoch 004: 2185 / 3002 loss=2.568, ppl=5.93, wps=5880.2, ups=0.09, wpb=64855, bsz=128, num_updates=11126, lr=9.9919e-05, gnorm=2, loss_scale=32, train_wall=11, gb_free=2.8, wall=127535
2021-06-20 06:04:32 | INFO | train_inner | epoch 004: 2186 / 3002 loss=2.633, ppl=6.2, wps=5727.4, ups=0.09, wpb=64830, bsz=128, num_updates=11127, lr=9.9919e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=127546
2021-06-20 06:04:43 | INFO | train_inner | epoch 004: 2187 / 3002 loss=2.591, ppl=6.03, wps=5865.1, ups=0.09, wpb=64851, bsz=128, num_updates=11128, lr=9.9919e-05, gnorm=2.096, loss_scale=32, train_wall=11, gb_free=2.8, wall=127557
2021-06-20 06:04:54 | INFO | train_inner | epoch 004: 2188 / 3002 loss=2.51, ppl=5.7, wps=5951, ups=0.09, wpb=64846, bsz=128, num_updates=11129, lr=9.9919e-05, gnorm=1.974, loss_scale=32, train_wall=10, gb_free=2.8, wall=127568
2021-06-20 06:05:05 | INFO | train_inner | epoch 004: 2189 / 3002 loss=2.576, ppl=5.96, wps=5793.2, ups=0.09, wpb=64828, bsz=128, num_updates=11130, lr=9.9919e-05, gnorm=2.11, loss_scale=32, train_wall=11, gb_free=2.8, wall=127579
2021-06-20 06:05:16 | INFO | train_inner | epoch 004: 2190 / 3002 loss=2.514, ppl=5.71, wps=5802.4, ups=0.09, wpb=64834, bsz=128, num_updates=11131, lr=9.99189e-05, gnorm=2.256, loss_scale=32, train_wall=11, gb_free=2.8, wall=127591
2021-06-20 06:05:27 | INFO | train_inner | epoch 004: 2191 / 3002 loss=2.618, ppl=6.14, wps=5806, ups=0.09, wpb=64798, bsz=128, num_updates=11132, lr=9.99189e-05, gnorm=2.75, loss_scale=32, train_wall=11, gb_free=2.8, wall=127602
2021-06-20 06:05:38 | INFO | train_inner | epoch 004: 2192 / 3002 loss=2.543, ppl=5.83, wps=5863.5, ups=0.09, wpb=64819, bsz=128, num_updates=11133, lr=9.99189e-05, gnorm=2.11, loss_scale=32, train_wall=11, gb_free=2.8, wall=127613
2021-06-20 06:05:49 | INFO | train_inner | epoch 004: 2193 / 3002 loss=2.469, ppl=5.54, wps=5917.1, ups=0.09, wpb=64848, bsz=128, num_updates=11134, lr=9.99189e-05, gnorm=2.01, loss_scale=32, train_wall=11, gb_free=2.8, wall=127624
2021-06-20 06:06:01 | INFO | train_inner | epoch 004: 2194 / 3002 loss=2.53, ppl=5.78, wps=5826.5, ups=0.09, wpb=64779, bsz=128, num_updates=11135, lr=9.99189e-05, gnorm=2.14, loss_scale=32, train_wall=11, gb_free=2.8, wall=127635
2021-06-20 06:06:12 | INFO | train_inner | epoch 004: 2195 / 3002 loss=2.62, ppl=6.15, wps=5839.6, ups=0.09, wpb=64747, bsz=128, num_updates=11136, lr=9.99189e-05, gnorm=1.991, loss_scale=32, train_wall=11, gb_free=2.8, wall=127646
2021-06-20 06:06:23 | INFO | train_inner | epoch 004: 2196 / 3002 loss=2.543, ppl=5.83, wps=5835.5, ups=0.09, wpb=64799, bsz=128, num_updates=11137, lr=9.99189e-05, gnorm=1.998, loss_scale=32, train_wall=11, gb_free=2.8, wall=127657
2021-06-20 06:06:34 | INFO | train_inner | epoch 004: 2197 / 3002 loss=2.594, ppl=6.04, wps=5898.7, ups=0.09, wpb=64799, bsz=128, num_updates=11138, lr=9.99189e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=127668
2021-06-20 06:06:45 | INFO | train_inner | epoch 004: 2198 / 3002 loss=2.604, ppl=6.08, wps=5862.5, ups=0.09, wpb=64736, bsz=128, num_updates=11139, lr=9.99189e-05, gnorm=2.013, loss_scale=32, train_wall=11, gb_free=2.8, wall=127679
2021-06-20 06:06:56 | INFO | train_inner | epoch 004: 2199 / 3002 loss=2.543, ppl=5.83, wps=5866.5, ups=0.09, wpb=64822, bsz=128, num_updates=11140, lr=9.99189e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=127690
2021-06-20 06:07:07 | INFO | train_inner | epoch 004: 2200 / 3002 loss=2.525, ppl=5.76, wps=5847.9, ups=0.09, wpb=64827, bsz=128, num_updates=11141, lr=9.99189e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=127701
2021-06-20 06:07:18 | INFO | train_inner | epoch 004: 2201 / 3002 loss=2.507, ppl=5.69, wps=5915, ups=0.09, wpb=64829, bsz=128, num_updates=11142, lr=9.99189e-05, gnorm=2.069, loss_scale=32, train_wall=10, gb_free=2.8, wall=127712
2021-06-20 06:07:29 | INFO | train_inner | epoch 004: 2202 / 3002 loss=2.623, ppl=6.16, wps=5914.7, ups=0.09, wpb=64794, bsz=128, num_updates=11143, lr=9.99188e-05, gnorm=2.142, loss_scale=32, train_wall=10, gb_free=2.8, wall=127723
2021-06-20 06:07:40 | INFO | train_inner | epoch 004: 2203 / 3002 loss=2.38, ppl=5.2, wps=5895, ups=0.09, wpb=64818, bsz=128, num_updates=11144, lr=9.99188e-05, gnorm=2.356, loss_scale=32, train_wall=11, gb_free=2.8, wall=127734
2021-06-20 06:07:51 | INFO | train_inner | epoch 004: 2204 / 3002 loss=2.48, ppl=5.58, wps=5978.2, ups=0.09, wpb=64777, bsz=128, num_updates=11145, lr=9.99188e-05, gnorm=2.069, loss_scale=32, train_wall=10, gb_free=2.8, wall=127745
2021-06-20 06:08:02 | INFO | train_inner | epoch 004: 2205 / 3002 loss=2.534, ppl=5.79, wps=5806.4, ups=0.09, wpb=64828, bsz=128, num_updates=11146, lr=9.99188e-05, gnorm=2.243, loss_scale=32, train_wall=11, gb_free=2.8, wall=127756
2021-06-20 06:08:13 | INFO | train_inner | epoch 004: 2206 / 3002 loss=2.535, ppl=5.79, wps=5794.6, ups=0.09, wpb=64719, bsz=128, num_updates=11147, lr=9.99188e-05, gnorm=2.008, loss_scale=32, train_wall=11, gb_free=2.8, wall=127767
2021-06-20 06:08:24 | INFO | train_inner | epoch 004: 2207 / 3002 loss=2.579, ppl=5.98, wps=5800, ups=0.09, wpb=64837, bsz=128, num_updates=11148, lr=9.99188e-05, gnorm=2.115, loss_scale=32, train_wall=11, gb_free=2.8, wall=127779
2021-06-20 06:08:35 | INFO | train_inner | epoch 004: 2208 / 3002 loss=2.509, ppl=5.69, wps=5761, ups=0.09, wpb=64908, bsz=128, num_updates=11149, lr=9.99188e-05, gnorm=2.017, loss_scale=32, train_wall=11, gb_free=2.8, wall=127790
2021-06-20 06:08:47 | INFO | train_inner | epoch 004: 2209 / 3002 loss=2.608, ppl=6.1, wps=5779.3, ups=0.09, wpb=64854, bsz=128, num_updates=11150, lr=9.99188e-05, gnorm=2.073, loss_scale=32, train_wall=11, gb_free=2.8, wall=127801
2021-06-20 06:08:58 | INFO | train_inner | epoch 004: 2210 / 3002 loss=2.734, ppl=6.65, wps=5910.3, ups=0.09, wpb=64858, bsz=128, num_updates=11151, lr=9.99188e-05, gnorm=1.996, loss_scale=32, train_wall=11, gb_free=2.8, wall=127812
2021-06-20 06:09:09 | INFO | train_inner | epoch 004: 2211 / 3002 loss=2.543, ppl=5.83, wps=5759.5, ups=0.09, wpb=64793, bsz=128, num_updates=11152, lr=9.99188e-05, gnorm=1.999, loss_scale=32, train_wall=11, gb_free=2.8, wall=127823
2021-06-20 06:09:20 | INFO | train_inner | epoch 004: 2212 / 3002 loss=2.592, ppl=6.03, wps=5941.2, ups=0.09, wpb=64765, bsz=128, num_updates=11153, lr=9.99188e-05, gnorm=2.123, loss_scale=32, train_wall=10, gb_free=2.8, wall=127834
2021-06-20 06:09:31 | INFO | train_inner | epoch 004: 2213 / 3002 loss=2.524, ppl=5.75, wps=5884, ups=0.09, wpb=64839, bsz=128, num_updates=11154, lr=9.99188e-05, gnorm=2.185, loss_scale=32, train_wall=11, gb_free=2.8, wall=127845
2021-06-20 06:09:42 | INFO | train_inner | epoch 004: 2214 / 3002 loss=2.606, ppl=6.09, wps=5839, ups=0.09, wpb=64867, bsz=128, num_updates=11155, lr=9.99188e-05, gnorm=2.095, loss_scale=32, train_wall=11, gb_free=2.8, wall=127856
2021-06-20 06:09:53 | INFO | train_inner | epoch 004: 2215 / 3002 loss=2.513, ppl=5.71, wps=5767, ups=0.09, wpb=64881, bsz=128, num_updates=11156, lr=9.99187e-05, gnorm=2.101, loss_scale=32, train_wall=11, gb_free=2.8, wall=127868
2021-06-20 06:10:04 | INFO | train_inner | epoch 004: 2216 / 3002 loss=2.57, ppl=5.94, wps=5856.6, ups=0.09, wpb=64817, bsz=128, num_updates=11157, lr=9.99187e-05, gnorm=2.017, loss_scale=32, train_wall=11, gb_free=2.8, wall=127879
2021-06-20 06:10:15 | INFO | train_inner | epoch 004: 2217 / 3002 loss=2.611, ppl=6.11, wps=5819.8, ups=0.09, wpb=64852, bsz=128, num_updates=11158, lr=9.99187e-05, gnorm=2.152, loss_scale=32, train_wall=11, gb_free=2.8, wall=127890
2021-06-20 06:10:27 | INFO | train_inner | epoch 004: 2218 / 3002 loss=2.549, ppl=5.85, wps=5785.1, ups=0.09, wpb=64863, bsz=128, num_updates=11159, lr=9.99187e-05, gnorm=2.051, loss_scale=32, train_wall=11, gb_free=2.8, wall=127901
2021-06-20 06:10:38 | INFO | train_inner | epoch 004: 2219 / 3002 loss=2.729, ppl=6.63, wps=5800.9, ups=0.09, wpb=64782, bsz=128, num_updates=11160, lr=9.99187e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=127912
2021-06-20 06:10:49 | INFO | train_inner | epoch 004: 2220 / 3002 loss=2.527, ppl=5.76, wps=5847.6, ups=0.09, wpb=64852, bsz=128, num_updates=11161, lr=9.99187e-05, gnorm=1.965, loss_scale=32, train_wall=11, gb_free=2.8, wall=127923
2021-06-20 06:11:00 | INFO | train_inner | epoch 004: 2221 / 3002 loss=2.7, ppl=6.5, wps=5811.6, ups=0.09, wpb=64849, bsz=128, num_updates=11162, lr=9.99187e-05, gnorm=2.032, loss_scale=32, train_wall=11, gb_free=2.8, wall=127934
2021-06-20 06:11:11 | INFO | train_inner | epoch 004: 2222 / 3002 loss=2.65, ppl=6.28, wps=5828.2, ups=0.09, wpb=64730, bsz=128, num_updates=11163, lr=9.99187e-05, gnorm=2.039, loss_scale=32, train_wall=11, gb_free=2.8, wall=127945
2021-06-20 06:11:22 | INFO | train_inner | epoch 004: 2223 / 3002 loss=2.504, ppl=5.67, wps=5804.8, ups=0.09, wpb=64801, bsz=128, num_updates=11164, lr=9.99187e-05, gnorm=1.934, loss_scale=32, train_wall=11, gb_free=2.8, wall=127957
2021-06-20 06:11:33 | INFO | train_inner | epoch 004: 2224 / 3002 loss=2.467, ppl=5.53, wps=5901.7, ups=0.09, wpb=64893, bsz=128, num_updates=11165, lr=9.99187e-05, gnorm=2.12, loss_scale=32, train_wall=11, gb_free=2.8, wall=127968
2021-06-20 06:11:44 | INFO | train_inner | epoch 004: 2225 / 3002 loss=2.588, ppl=6.01, wps=5817.4, ups=0.09, wpb=64819, bsz=128, num_updates=11166, lr=9.99187e-05, gnorm=2.037, loss_scale=32, train_wall=11, gb_free=2.8, wall=127979
2021-06-20 06:11:56 | INFO | train_inner | epoch 004: 2226 / 3002 loss=2.52, ppl=5.73, wps=5783.1, ups=0.09, wpb=64833, bsz=128, num_updates=11167, lr=9.99187e-05, gnorm=1.977, loss_scale=32, train_wall=11, gb_free=2.8, wall=127990
2021-06-20 06:12:07 | INFO | train_inner | epoch 004: 2227 / 3002 loss=2.527, ppl=5.76, wps=5897.3, ups=0.09, wpb=64907, bsz=128, num_updates=11168, lr=9.99186e-05, gnorm=2.054, loss_scale=32, train_wall=11, gb_free=2.8, wall=128001
2021-06-20 06:12:18 | INFO | train_inner | epoch 004: 2228 / 3002 loss=2.57, ppl=5.94, wps=5737.9, ups=0.09, wpb=64861, bsz=128, num_updates=11169, lr=9.99186e-05, gnorm=2.132, loss_scale=32, train_wall=11, gb_free=2.8, wall=128012
2021-06-20 06:12:29 | INFO | train_inner | epoch 004: 2229 / 3002 loss=2.493, ppl=5.63, wps=5820.7, ups=0.09, wpb=64834, bsz=128, num_updates=11170, lr=9.99186e-05, gnorm=2.122, loss_scale=32, train_wall=11, gb_free=2.8, wall=128023
2021-06-20 06:12:40 | INFO | train_inner | epoch 004: 2230 / 3002 loss=2.5, ppl=5.66, wps=5921.5, ups=0.09, wpb=64768, bsz=128, num_updates=11171, lr=9.99186e-05, gnorm=1.937, loss_scale=32, train_wall=10, gb_free=2.8, wall=128034
2021-06-20 06:12:51 | INFO | train_inner | epoch 004: 2231 / 3002 loss=2.472, ppl=5.55, wps=5784.7, ups=0.09, wpb=64794, bsz=128, num_updates=11172, lr=9.99186e-05, gnorm=1.944, loss_scale=32, train_wall=11, gb_free=2.8, wall=128046
2021-06-20 06:13:02 | INFO | train_inner | epoch 004: 2232 / 3002 loss=2.587, ppl=6.01, wps=5888.5, ups=0.09, wpb=64884, bsz=128, num_updates=11173, lr=9.99186e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=128057
2021-06-20 06:13:13 | INFO | train_inner | epoch 004: 2233 / 3002 loss=2.641, ppl=6.24, wps=5791.7, ups=0.09, wpb=64825, bsz=128, num_updates=11174, lr=9.99186e-05, gnorm=2.068, loss_scale=32, train_wall=11, gb_free=2.8, wall=128068
2021-06-20 06:13:24 | INFO | train_inner | epoch 004: 2234 / 3002 loss=2.679, ppl=6.4, wps=5882.9, ups=0.09, wpb=64818, bsz=128, num_updates=11175, lr=9.99186e-05, gnorm=2.054, loss_scale=32, train_wall=11, gb_free=2.8, wall=128079
2021-06-20 06:13:35 | INFO | train_inner | epoch 004: 2235 / 3002 loss=2.526, ppl=5.76, wps=5946, ups=0.09, wpb=64911, bsz=128, num_updates=11176, lr=9.99186e-05, gnorm=2.06, loss_scale=32, train_wall=10, gb_free=2.8, wall=128090
2021-06-20 06:13:46 | INFO | train_inner | epoch 004: 2236 / 3002 loss=2.7, ppl=6.5, wps=5955, ups=0.09, wpb=64865, bsz=128, num_updates=11177, lr=9.99186e-05, gnorm=1.984, loss_scale=32, train_wall=10, gb_free=2.8, wall=128101
2021-06-20 06:13:57 | INFO | train_inner | epoch 004: 2237 / 3002 loss=2.601, ppl=6.07, wps=5815.7, ups=0.09, wpb=64815, bsz=128, num_updates=11178, lr=9.99186e-05, gnorm=2.047, loss_scale=32, train_wall=11, gb_free=2.8, wall=128112
2021-06-20 06:14:09 | INFO | train_inner | epoch 004: 2238 / 3002 loss=2.434, ppl=5.4, wps=5835.8, ups=0.09, wpb=64834, bsz=128, num_updates=11179, lr=9.99186e-05, gnorm=1.956, loss_scale=32, train_wall=11, gb_free=2.8, wall=128123
2021-06-20 06:14:19 | INFO | train_inner | epoch 004: 2239 / 3002 loss=2.669, ppl=6.36, wps=5909.6, ups=0.09, wpb=64844, bsz=128, num_updates=11180, lr=9.99186e-05, gnorm=2.013, loss_scale=32, train_wall=11, gb_free=2.8, wall=128134
2021-06-20 06:14:30 | INFO | train_inner | epoch 004: 2240 / 3002 loss=2.542, ppl=5.83, wps=5996.9, ups=0.09, wpb=64826, bsz=128, num_updates=11181, lr=9.99185e-05, gnorm=2.024, loss_scale=32, train_wall=10, gb_free=2.8, wall=128145
2021-06-20 06:14:41 | INFO | train_inner | epoch 004: 2241 / 3002 loss=2.604, ppl=6.08, wps=5871.3, ups=0.09, wpb=64840, bsz=128, num_updates=11182, lr=9.99185e-05, gnorm=2.044, loss_scale=32, train_wall=11, gb_free=2.8, wall=128156
2021-06-20 06:14:53 | INFO | train_inner | epoch 004: 2242 / 3002 loss=2.584, ppl=6, wps=5804.1, ups=0.09, wpb=64820, bsz=128, num_updates=11183, lr=9.99185e-05, gnorm=2.038, loss_scale=32, train_wall=11, gb_free=2.8, wall=128167
2021-06-20 06:15:04 | INFO | train_inner | epoch 004: 2243 / 3002 loss=2.487, ppl=5.6, wps=5834.6, ups=0.09, wpb=64828, bsz=128, num_updates=11184, lr=9.99185e-05, gnorm=2, loss_scale=32, train_wall=11, gb_free=2.8, wall=128178
2021-06-20 06:15:15 | INFO | train_inner | epoch 004: 2244 / 3002 loss=2.667, ppl=6.35, wps=5879.3, ups=0.09, wpb=64961, bsz=128, num_updates=11185, lr=9.99185e-05, gnorm=2.064, loss_scale=32, train_wall=11, gb_free=2.8, wall=128189
2021-06-20 06:15:26 | INFO | train_inner | epoch 004: 2245 / 3002 loss=2.461, ppl=5.51, wps=5889.3, ups=0.09, wpb=64639, bsz=128, num_updates=11186, lr=9.99185e-05, gnorm=2.084, loss_scale=32, train_wall=10, gb_free=2.8, wall=128200
2021-06-20 06:15:37 | INFO | train_inner | epoch 004: 2246 / 3002 loss=2.638, ppl=6.23, wps=5865.4, ups=0.09, wpb=64843, bsz=128, num_updates=11187, lr=9.99185e-05, gnorm=2.12, loss_scale=32, train_wall=11, gb_free=2.8, wall=128211
2021-06-20 06:15:48 | INFO | train_inner | epoch 004: 2247 / 3002 loss=2.618, ppl=6.14, wps=5941.9, ups=0.09, wpb=64840, bsz=128, num_updates=11188, lr=9.99185e-05, gnorm=2, loss_scale=32, train_wall=10, gb_free=2.8, wall=128222
2021-06-20 06:15:59 | INFO | train_inner | epoch 004: 2248 / 3002 loss=2.604, ppl=6.08, wps=5911, ups=0.09, wpb=64776, bsz=128, num_updates=11189, lr=9.99185e-05, gnorm=1.971, loss_scale=32, train_wall=10, gb_free=2.8, wall=128233
2021-06-20 06:16:10 | INFO | train_inner | epoch 004: 2249 / 3002 loss=2.585, ppl=6, wps=5752.3, ups=0.09, wpb=64774, bsz=128, num_updates=11190, lr=9.99185e-05, gnorm=1.997, loss_scale=32, train_wall=11, gb_free=2.8, wall=128244
2021-06-20 06:16:21 | INFO | train_inner | epoch 004: 2250 / 3002 loss=2.686, ppl=6.44, wps=5828.1, ups=0.09, wpb=64795, bsz=128, num_updates=11191, lr=9.99185e-05, gnorm=2.064, loss_scale=32, train_wall=11, gb_free=2.8, wall=128255
2021-06-20 06:16:32 | INFO | train_inner | epoch 004: 2251 / 3002 loss=2.506, ppl=5.68, wps=5864.3, ups=0.09, wpb=64825, bsz=128, num_updates=11192, lr=9.99185e-05, gnorm=2.053, loss_scale=32, train_wall=11, gb_free=2.8, wall=128266
2021-06-20 06:16:43 | INFO | train_inner | epoch 004: 2252 / 3002 loss=2.471, ppl=5.54, wps=5881.9, ups=0.09, wpb=64824, bsz=128, num_updates=11193, lr=9.99184e-05, gnorm=1.956, loss_scale=32, train_wall=11, gb_free=2.8, wall=128277
2021-06-20 06:16:54 | INFO | train_inner | epoch 004: 2253 / 3002 loss=2.598, ppl=6.06, wps=5827.3, ups=0.09, wpb=64840, bsz=128, num_updates=11194, lr=9.99184e-05, gnorm=1.892, loss_scale=32, train_wall=11, gb_free=2.8, wall=128288
2021-06-20 06:17:05 | INFO | train_inner | epoch 004: 2254 / 3002 loss=2.43, ppl=5.39, wps=5848.7, ups=0.09, wpb=64858, bsz=128, num_updates=11195, lr=9.99184e-05, gnorm=2.039, loss_scale=32, train_wall=11, gb_free=2.8, wall=128300
2021-06-20 06:17:16 | INFO | train_inner | epoch 004: 2255 / 3002 loss=2.552, ppl=5.87, wps=5796, ups=0.09, wpb=64828, bsz=128, num_updates=11196, lr=9.99184e-05, gnorm=2.075, loss_scale=32, train_wall=11, gb_free=2.8, wall=128311
2021-06-20 06:17:28 | INFO | train_inner | epoch 004: 2256 / 3002 loss=2.515, ppl=5.72, wps=5856.9, ups=0.09, wpb=64862, bsz=128, num_updates=11197, lr=9.99184e-05, gnorm=2.053, loss_scale=32, train_wall=11, gb_free=2.8, wall=128322
2021-06-20 06:17:39 | INFO | train_inner | epoch 004: 2257 / 3002 loss=2.612, ppl=6.12, wps=5777.9, ups=0.09, wpb=64764, bsz=128, num_updates=11198, lr=9.99184e-05, gnorm=2.045, loss_scale=32, train_wall=11, gb_free=2.8, wall=128333
2021-06-20 06:17:50 | INFO | train_inner | epoch 004: 2258 / 3002 loss=2.506, ppl=5.68, wps=5928, ups=0.09, wpb=64928, bsz=128, num_updates=11199, lr=9.99184e-05, gnorm=1.989, loss_scale=32, train_wall=10, gb_free=2.8, wall=128344
2021-06-20 06:18:01 | INFO | train_inner | epoch 004: 2259 / 3002 loss=2.495, ppl=5.64, wps=5812.5, ups=0.09, wpb=64848, bsz=128, num_updates=11200, lr=9.99184e-05, gnorm=2.084, loss_scale=32, train_wall=11, gb_free=2.8, wall=128355
2021-06-20 06:18:12 | INFO | train_inner | epoch 004: 2260 / 3002 loss=2.58, ppl=5.98, wps=5877.7, ups=0.09, wpb=64808, bsz=128, num_updates=11201, lr=9.99184e-05, gnorm=2.011, loss_scale=32, train_wall=11, gb_free=2.8, wall=128366
2021-06-20 06:18:23 | INFO | train_inner | epoch 004: 2261 / 3002 loss=2.576, ppl=5.96, wps=5859.2, ups=0.09, wpb=64771, bsz=128, num_updates=11202, lr=9.99184e-05, gnorm=1.948, loss_scale=32, train_wall=11, gb_free=2.8, wall=128377
2021-06-20 06:18:34 | INFO | train_inner | epoch 004: 2262 / 3002 loss=2.842, ppl=7.17, wps=5788.2, ups=0.09, wpb=64852, bsz=128, num_updates=11203, lr=9.99184e-05, gnorm=2.102, loss_scale=32, train_wall=11, gb_free=2.8, wall=128388
2021-06-20 06:18:45 | INFO | train_inner | epoch 004: 2263 / 3002 loss=2.609, ppl=6.1, wps=5877.5, ups=0.09, wpb=64881, bsz=128, num_updates=11204, lr=9.99184e-05, gnorm=2.013, loss_scale=32, train_wall=11, gb_free=2.8, wall=128399
2021-06-20 06:18:56 | INFO | train_inner | epoch 004: 2264 / 3002 loss=2.464, ppl=5.52, wps=5745.1, ups=0.09, wpb=64766, bsz=128, num_updates=11205, lr=9.99184e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=128411
2021-06-20 06:19:08 | INFO | train_inner | epoch 004: 2265 / 3002 loss=2.522, ppl=5.74, wps=5757.6, ups=0.09, wpb=64700, bsz=128, num_updates=11206, lr=9.99183e-05, gnorm=1.953, loss_scale=32, train_wall=11, gb_free=2.8, wall=128422
2021-06-20 06:19:19 | INFO | train_inner | epoch 004: 2266 / 3002 loss=2.579, ppl=5.97, wps=5789.2, ups=0.09, wpb=64783, bsz=128, num_updates=11207, lr=9.99183e-05, gnorm=2.018, loss_scale=32, train_wall=11, gb_free=2.8, wall=128433
2021-06-20 06:19:30 | INFO | train_inner | epoch 004: 2267 / 3002 loss=2.653, ppl=6.29, wps=5867.1, ups=0.09, wpb=64821, bsz=128, num_updates=11208, lr=9.99183e-05, gnorm=2.086, loss_scale=32, train_wall=11, gb_free=2.8, wall=128444
2021-06-20 06:19:41 | INFO | train_inner | epoch 004: 2268 / 3002 loss=2.513, ppl=5.71, wps=5811.7, ups=0.09, wpb=64854, bsz=128, num_updates=11209, lr=9.99183e-05, gnorm=2.493, loss_scale=32, train_wall=11, gb_free=2.8, wall=128455
2021-06-20 06:19:52 | INFO | train_inner | epoch 004: 2269 / 3002 loss=2.529, ppl=5.77, wps=5876.6, ups=0.09, wpb=64763, bsz=128, num_updates=11210, lr=9.99183e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=128466
2021-06-20 06:20:03 | INFO | train_inner | epoch 004: 2270 / 3002 loss=2.615, ppl=6.13, wps=5800.6, ups=0.09, wpb=64858, bsz=128, num_updates=11211, lr=9.99183e-05, gnorm=1.932, loss_scale=32, train_wall=11, gb_free=2.8, wall=128478
2021-06-20 06:20:14 | INFO | train_inner | epoch 004: 2271 / 3002 loss=2.608, ppl=6.1, wps=5848.5, ups=0.09, wpb=64868, bsz=128, num_updates=11212, lr=9.99183e-05, gnorm=2.023, loss_scale=32, train_wall=11, gb_free=2.8, wall=128489
2021-06-20 06:20:25 | INFO | train_inner | epoch 004: 2272 / 3002 loss=2.6, ppl=6.06, wps=5934.5, ups=0.09, wpb=64915, bsz=128, num_updates=11213, lr=9.99183e-05, gnorm=2.025, loss_scale=32, train_wall=10, gb_free=2.8, wall=128500
2021-06-20 06:20:37 | INFO | train_inner | epoch 004: 2273 / 3002 loss=2.609, ppl=6.1, wps=5706.2, ups=0.09, wpb=64785, bsz=128, num_updates=11214, lr=9.99183e-05, gnorm=1.996, loss_scale=32, train_wall=11, gb_free=2.8, wall=128511
2021-06-20 06:20:48 | INFO | train_inner | epoch 004: 2274 / 3002 loss=2.651, ppl=6.28, wps=5842, ups=0.09, wpb=64798, bsz=128, num_updates=11215, lr=9.99183e-05, gnorm=2.023, loss_scale=32, train_wall=11, gb_free=2.8, wall=128522
2021-06-20 06:20:59 | INFO | train_inner | epoch 004: 2275 / 3002 loss=2.544, ppl=5.83, wps=5969.8, ups=0.09, wpb=64841, bsz=128, num_updates=11216, lr=9.99183e-05, gnorm=1.979, loss_scale=32, train_wall=10, gb_free=2.8, wall=128533
2021-06-20 06:21:10 | INFO | train_inner | epoch 004: 2276 / 3002 loss=2.488, ppl=5.61, wps=5786.2, ups=0.09, wpb=64896, bsz=128, num_updates=11217, lr=9.99183e-05, gnorm=2.181, loss_scale=32, train_wall=11, gb_free=2.8, wall=128544
2021-06-20 06:21:21 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0
2021-06-20 06:21:32 | INFO | train_inner | epoch 004: 2278 / 3002 loss=2.547, ppl=5.84, wps=2932.2, ups=0.05, wpb=64832, bsz=128, num_updates=11218, lr=9.99182e-05, gnorm=2.006, loss_scale=16, train_wall=21, gb_free=2.8, wall=128566
2021-06-20 06:21:43 | INFO | train_inner | epoch 004: 2279 / 3002 loss=2.597, ppl=6.05, wps=5823.2, ups=0.09, wpb=64778, bsz=128, num_updates=11219, lr=9.99182e-05, gnorm=2.102, loss_scale=16, train_wall=11, gb_free=2.8, wall=128577
2021-06-20 06:21:54 | INFO | train_inner | epoch 004: 2280 / 3002 loss=2.672, ppl=6.37, wps=5845.8, ups=0.09, wpb=64820, bsz=128, num_updates=11220, lr=9.99182e-05, gnorm=2.041, loss_scale=16, train_wall=11, gb_free=2.8, wall=128588
2021-06-20 06:22:05 | INFO | train_inner | epoch 004: 2281 / 3002 loss=2.465, ppl=5.52, wps=5974, ups=0.09, wpb=64782, bsz=128, num_updates=11221, lr=9.99182e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=128599
2021-06-20 06:22:16 | INFO | train_inner | epoch 004: 2282 / 3002 loss=2.553, ppl=5.87, wps=5858.5, ups=0.09, wpb=64869, bsz=128, num_updates=11222, lr=9.99182e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=128610
2021-06-20 06:22:27 | INFO | train_inner | epoch 004: 2283 / 3002 loss=2.466, ppl=5.52, wps=5739.7, ups=0.09, wpb=64892, bsz=128, num_updates=11223, lr=9.99182e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=128622
2021-06-20 06:22:38 | INFO | train_inner | epoch 004: 2284 / 3002 loss=2.532, ppl=5.78, wps=5855.8, ups=0.09, wpb=64917, bsz=128, num_updates=11224, lr=9.99182e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=128633
2021-06-20 06:22:50 | INFO | train_inner | epoch 004: 2285 / 3002 loss=2.653, ppl=6.29, wps=5762.1, ups=0.09, wpb=64893, bsz=128, num_updates=11225, lr=9.99182e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=128644
2021-06-20 06:23:01 | INFO | train_inner | epoch 004: 2286 / 3002 loss=2.401, ppl=5.28, wps=5773.7, ups=0.09, wpb=64808, bsz=128, num_updates=11226, lr=9.99182e-05, gnorm=2.083, loss_scale=16, train_wall=11, gb_free=2.8, wall=128655
2021-06-20 06:23:12 | INFO | train_inner | epoch 004: 2287 / 3002 loss=2.521, ppl=5.74, wps=5886.9, ups=0.09, wpb=64868, bsz=128, num_updates=11227, lr=9.99182e-05, gnorm=2.081, loss_scale=16, train_wall=11, gb_free=2.8, wall=128666
2021-06-20 06:23:23 | INFO | train_inner | epoch 004: 2288 / 3002 loss=2.616, ppl=6.13, wps=5795.1, ups=0.09, wpb=64834, bsz=128, num_updates=11228, lr=9.99182e-05, gnorm=2.063, loss_scale=16, train_wall=11, gb_free=2.8, wall=128677
2021-06-20 06:23:34 | INFO | train_inner | epoch 004: 2289 / 3002 loss=2.556, ppl=5.88, wps=5851.8, ups=0.09, wpb=64790, bsz=128, num_updates=11229, lr=9.99182e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=128689
2021-06-20 06:23:45 | INFO | train_inner | epoch 004: 2290 / 3002 loss=2.451, ppl=5.47, wps=5811.5, ups=0.09, wpb=64882, bsz=128, num_updates=11230, lr=9.99182e-05, gnorm=2.035, loss_scale=16, train_wall=11, gb_free=2.8, wall=128700
2021-06-20 06:23:56 | INFO | train_inner | epoch 004: 2291 / 3002 loss=2.455, ppl=5.48, wps=5876, ups=0.09, wpb=64877, bsz=128, num_updates=11231, lr=9.99181e-05, gnorm=2.028, loss_scale=16, train_wall=11, gb_free=2.8, wall=128711
2021-06-20 06:24:08 | INFO | train_inner | epoch 004: 2292 / 3002 loss=2.508, ppl=5.69, wps=5837.2, ups=0.09, wpb=64731, bsz=128, num_updates=11232, lr=9.99181e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=128722
2021-06-20 06:24:18 | INFO | train_inner | epoch 004: 2293 / 3002 loss=2.657, ppl=6.31, wps=5942.7, ups=0.09, wpb=64803, bsz=128, num_updates=11233, lr=9.99181e-05, gnorm=2.103, loss_scale=16, train_wall=10, gb_free=2.8, wall=128733
2021-06-20 06:24:29 | INFO | train_inner | epoch 004: 2294 / 3002 loss=2.512, ppl=5.71, wps=5858.1, ups=0.09, wpb=64873, bsz=128, num_updates=11234, lr=9.99181e-05, gnorm=2.061, loss_scale=16, train_wall=11, gb_free=2.8, wall=128744
2021-06-20 06:24:41 | INFO | train_inner | epoch 004: 2295 / 3002 loss=2.654, ppl=6.3, wps=5811.2, ups=0.09, wpb=64797, bsz=128, num_updates=11235, lr=9.99181e-05, gnorm=2.125, loss_scale=16, train_wall=11, gb_free=2.8, wall=128755
2021-06-20 06:24:52 | INFO | train_inner | epoch 004: 2296 / 3002 loss=2.513, ppl=5.71, wps=5711, ups=0.09, wpb=64877, bsz=128, num_updates=11236, lr=9.99181e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=128766
2021-06-20 06:25:03 | INFO | train_inner | epoch 004: 2297 / 3002 loss=2.575, ppl=5.96, wps=5770.4, ups=0.09, wpb=64822, bsz=128, num_updates=11237, lr=9.99181e-05, gnorm=2.017, loss_scale=16, train_wall=11, gb_free=2.8, wall=128778
2021-06-20 06:25:14 | INFO | train_inner | epoch 004: 2298 / 3002 loss=2.504, ppl=5.67, wps=5768.9, ups=0.09, wpb=64814, bsz=128, num_updates=11238, lr=9.99181e-05, gnorm=2.046, loss_scale=16, train_wall=11, gb_free=2.8, wall=128789
2021-06-20 06:25:26 | INFO | train_inner | epoch 004: 2299 / 3002 loss=2.677, ppl=6.4, wps=5873.6, ups=0.09, wpb=64785, bsz=128, num_updates=11239, lr=9.99181e-05, gnorm=2.121, loss_scale=16, train_wall=11, gb_free=2.8, wall=128800
2021-06-20 06:25:37 | INFO | train_inner | epoch 004: 2300 / 3002 loss=2.376, ppl=5.19, wps=5879.1, ups=0.09, wpb=64828, bsz=128, num_updates=11240, lr=9.99181e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=128811
2021-06-20 06:25:48 | INFO | train_inner | epoch 004: 2301 / 3002 loss=2.544, ppl=5.83, wps=5871, ups=0.09, wpb=64740, bsz=128, num_updates=11241, lr=9.99181e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=128822
2021-06-20 06:25:58 | INFO | train_inner | epoch 004: 2302 / 3002 loss=2.618, ppl=6.14, wps=5958.4, ups=0.09, wpb=64836, bsz=128, num_updates=11242, lr=9.99181e-05, gnorm=2.101, loss_scale=16, train_wall=10, gb_free=2.8, wall=128833
2021-06-20 06:26:10 | INFO | train_inner | epoch 004: 2303 / 3002 loss=2.514, ppl=5.71, wps=5793.1, ups=0.09, wpb=64765, bsz=128, num_updates=11243, lr=9.9918e-05, gnorm=2.168, loss_scale=16, train_wall=11, gb_free=2.8, wall=128844
2021-06-20 06:26:21 | INFO | train_inner | epoch 004: 2304 / 3002 loss=2.532, ppl=5.78, wps=5883.6, ups=0.09, wpb=64803, bsz=128, num_updates=11244, lr=9.9918e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=128855
2021-06-20 06:26:32 | INFO | train_inner | epoch 004: 2305 / 3002 loss=2.464, ppl=5.52, wps=5830.1, ups=0.09, wpb=64804, bsz=128, num_updates=11245, lr=9.9918e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=128866
2021-06-20 06:26:43 | INFO | train_inner | epoch 004: 2306 / 3002 loss=2.591, ppl=6.03, wps=5831.8, ups=0.09, wpb=64796, bsz=128, num_updates=11246, lr=9.9918e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=128877
2021-06-20 06:26:54 | INFO | train_inner | epoch 004: 2307 / 3002 loss=2.344, ppl=5.08, wps=5869.7, ups=0.09, wpb=64780, bsz=128, num_updates=11247, lr=9.9918e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=128888
2021-06-20 06:27:05 | INFO | train_inner | epoch 004: 2308 / 3002 loss=2.51, ppl=5.7, wps=5886.8, ups=0.09, wpb=64821, bsz=128, num_updates=11248, lr=9.9918e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=128899
2021-06-20 06:27:16 | INFO | train_inner | epoch 004: 2309 / 3002 loss=2.495, ppl=5.64, wps=5899.3, ups=0.09, wpb=64850, bsz=128, num_updates=11249, lr=9.9918e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=128910
2021-06-20 06:27:27 | INFO | train_inner | epoch 004: 2310 / 3002 loss=2.688, ppl=6.44, wps=5888.3, ups=0.09, wpb=64815, bsz=128, num_updates=11250, lr=9.9918e-05, gnorm=2.009, loss_scale=16, train_wall=11, gb_free=2.8, wall=128921
2021-06-20 06:27:38 | INFO | train_inner | epoch 004: 2311 / 3002 loss=2.54, ppl=5.82, wps=5830.6, ups=0.09, wpb=64712, bsz=128, num_updates=11251, lr=9.9918e-05, gnorm=2.095, loss_scale=16, train_wall=11, gb_free=2.8, wall=128932
2021-06-20 06:27:49 | INFO | train_inner | epoch 004: 2312 / 3002 loss=2.522, ppl=5.74, wps=5845.8, ups=0.09, wpb=64832, bsz=128, num_updates=11252, lr=9.9918e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=128943
2021-06-20 06:28:00 | INFO | train_inner | epoch 004: 2313 / 3002 loss=2.509, ppl=5.69, wps=5858.6, ups=0.09, wpb=64838, bsz=128, num_updates=11253, lr=9.9918e-05, gnorm=2.031, loss_scale=16, train_wall=11, gb_free=2.8, wall=128955
2021-06-20 06:28:11 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 06:28:22 | INFO | train_inner | epoch 004: 2315 / 3002 loss=2.56, ppl=5.9, wps=2943.8, ups=0.05, wpb=64822, bsz=128, num_updates=11254, lr=9.9918e-05, gnorm=2.036, loss_scale=8, train_wall=21, gb_free=2.8, wall=128977
2021-06-20 06:28:34 | INFO | train_inner | epoch 004: 2316 / 3002 loss=2.504, ppl=5.67, wps=5735.6, ups=0.09, wpb=64913, bsz=128, num_updates=11255, lr=9.9918e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=128988
2021-06-20 06:28:45 | INFO | train_inner | epoch 004: 2317 / 3002 loss=2.575, ppl=5.96, wps=5795.3, ups=0.09, wpb=64793, bsz=128, num_updates=11256, lr=9.99179e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=128999
2021-06-20 06:28:56 | INFO | train_inner | epoch 004: 2318 / 3002 loss=2.473, ppl=5.55, wps=5808.2, ups=0.09, wpb=64765, bsz=128, num_updates=11257, lr=9.99179e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=129010
2021-06-20 06:29:07 | INFO | train_inner | epoch 004: 2319 / 3002 loss=2.543, ppl=5.83, wps=5822.2, ups=0.09, wpb=64855, bsz=128, num_updates=11258, lr=9.99179e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=129021
2021-06-20 06:29:18 | INFO | train_inner | epoch 004: 2320 / 3002 loss=2.611, ppl=6.11, wps=5915.4, ups=0.09, wpb=64850, bsz=128, num_updates=11259, lr=9.99179e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=129032
2021-06-20 06:29:29 | INFO | train_inner | epoch 004: 2321 / 3002 loss=2.499, ppl=5.65, wps=5830.7, ups=0.09, wpb=64846, bsz=128, num_updates=11260, lr=9.99179e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=129043
2021-06-20 06:29:40 | INFO | train_inner | epoch 004: 2322 / 3002 loss=2.505, ppl=5.67, wps=5906.8, ups=0.09, wpb=64806, bsz=128, num_updates=11261, lr=9.99179e-05, gnorm=2.124, loss_scale=8, train_wall=11, gb_free=2.8, wall=129054
2021-06-20 06:29:51 | INFO | train_inner | epoch 004: 2323 / 3002 loss=2.543, ppl=5.83, wps=5878.8, ups=0.09, wpb=64879, bsz=128, num_updates=11262, lr=9.99179e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=129065
2021-06-20 06:30:02 | INFO | train_inner | epoch 004: 2324 / 3002 loss=2.763, ppl=6.79, wps=5830.8, ups=0.09, wpb=64771, bsz=128, num_updates=11263, lr=9.99179e-05, gnorm=2.226, loss_scale=8, train_wall=11, gb_free=2.8, wall=129077
2021-06-20 06:30:13 | INFO | train_inner | epoch 004: 2325 / 3002 loss=2.668, ppl=6.35, wps=5794.6, ups=0.09, wpb=64847, bsz=128, num_updates=11264, lr=9.99179e-05, gnorm=2.699, loss_scale=8, train_wall=11, gb_free=2.8, wall=129088
2021-06-20 06:30:25 | INFO | train_inner | epoch 004: 2326 / 3002 loss=2.603, ppl=6.08, wps=5763.2, ups=0.09, wpb=64854, bsz=128, num_updates=11265, lr=9.99179e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=129099
2021-06-20 06:30:36 | INFO | train_inner | epoch 004: 2327 / 3002 loss=2.627, ppl=6.18, wps=5850.6, ups=0.09, wpb=64765, bsz=128, num_updates=11266, lr=9.99179e-05, gnorm=7.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=129110
2021-06-20 06:30:47 | INFO | train_inner | epoch 004: 2328 / 3002 loss=2.641, ppl=6.24, wps=5905.4, ups=0.09, wpb=64831, bsz=128, num_updates=11267, lr=9.99179e-05, gnorm=2.088, loss_scale=8, train_wall=11, gb_free=2.8, wall=129121
2021-06-20 06:30:58 | INFO | train_inner | epoch 004: 2329 / 3002 loss=2.549, ppl=5.85, wps=5812.9, ups=0.09, wpb=64781, bsz=128, num_updates=11268, lr=9.99178e-05, gnorm=2.269, loss_scale=8, train_wall=11, gb_free=2.8, wall=129132
2021-06-20 06:31:09 | INFO | train_inner | epoch 004: 2330 / 3002 loss=2.598, ppl=6.05, wps=5906, ups=0.09, wpb=64795, bsz=128, num_updates=11269, lr=9.99178e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=129143
2021-06-20 06:31:20 | INFO | train_inner | epoch 004: 2331 / 3002 loss=2.424, ppl=5.37, wps=5761.5, ups=0.09, wpb=64827, bsz=128, num_updates=11270, lr=9.99178e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=129154
2021-06-20 06:31:31 | INFO | train_inner | epoch 004: 2332 / 3002 loss=2.438, ppl=5.42, wps=5898.6, ups=0.09, wpb=64786, bsz=128, num_updates=11271, lr=9.99178e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=129165
2021-06-20 06:31:42 | INFO | train_inner | epoch 004: 2333 / 3002 loss=2.406, ppl=5.3, wps=5836.9, ups=0.09, wpb=64866, bsz=128, num_updates=11272, lr=9.99178e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=129176
2021-06-20 06:31:53 | INFO | train_inner | epoch 004: 2334 / 3002 loss=2.626, ppl=6.17, wps=5825.8, ups=0.09, wpb=64834, bsz=128, num_updates=11273, lr=9.99178e-05, gnorm=2.067, loss_scale=8, train_wall=11, gb_free=2.8, wall=129188
2021-06-20 06:32:04 | INFO | train_inner | epoch 004: 2335 / 3002 loss=2.547, ppl=5.84, wps=5965.1, ups=0.09, wpb=64897, bsz=128, num_updates=11274, lr=9.99178e-05, gnorm=2.029, loss_scale=8, train_wall=10, gb_free=2.8, wall=129198
2021-06-20 06:32:15 | INFO | train_inner | epoch 004: 2336 / 3002 loss=2.516, ppl=5.72, wps=5899.1, ups=0.09, wpb=64734, bsz=128, num_updates=11275, lr=9.99178e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=129209
2021-06-20 06:32:26 | INFO | train_inner | epoch 004: 2337 / 3002 loss=2.602, ppl=6.07, wps=5872.6, ups=0.09, wpb=64826, bsz=128, num_updates=11276, lr=9.99178e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=129220
2021-06-20 06:32:37 | INFO | train_inner | epoch 004: 2338 / 3002 loss=2.454, ppl=5.48, wps=5867.1, ups=0.09, wpb=64803, bsz=128, num_updates=11277, lr=9.99178e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=129232
2021-06-20 06:32:48 | INFO | train_inner | epoch 004: 2339 / 3002 loss=2.527, ppl=5.76, wps=5824.4, ups=0.09, wpb=64866, bsz=128, num_updates=11278, lr=9.99178e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=129243
2021-06-20 06:33:00 | INFO | train_inner | epoch 004: 2340 / 3002 loss=2.658, ppl=6.31, wps=5738.1, ups=0.09, wpb=64713, bsz=128, num_updates=11279, lr=9.99178e-05, gnorm=2.098, loss_scale=8, train_wall=11, gb_free=2.8, wall=129254
2021-06-20 06:33:10 | INFO | train_inner | epoch 004: 2341 / 3002 loss=2.701, ppl=6.5, wps=6004.5, ups=0.09, wpb=64854, bsz=128, num_updates=11280, lr=9.99178e-05, gnorm=2.09, loss_scale=8, train_wall=10, gb_free=2.8, wall=129265
2021-06-20 06:33:21 | INFO | train_inner | epoch 004: 2342 / 3002 loss=2.526, ppl=5.76, wps=5857.7, ups=0.09, wpb=64820, bsz=128, num_updates=11281, lr=9.99177e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=129276
2021-06-20 06:33:33 | INFO | train_inner | epoch 004: 2343 / 3002 loss=2.466, ppl=5.52, wps=5848.5, ups=0.09, wpb=64792, bsz=128, num_updates=11282, lr=9.99177e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=129287
2021-06-20 06:33:43 | INFO | train_inner | epoch 004: 2344 / 3002 loss=2.483, ppl=5.59, wps=5938.1, ups=0.09, wpb=64908, bsz=128, num_updates=11283, lr=9.99177e-05, gnorm=2.121, loss_scale=8, train_wall=10, gb_free=2.8, wall=129298
2021-06-20 06:33:54 | INFO | train_inner | epoch 004: 2345 / 3002 loss=2.595, ppl=6.04, wps=5911.6, ups=0.09, wpb=64792, bsz=128, num_updates=11284, lr=9.99177e-05, gnorm=2.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=129309
2021-06-20 06:34:05 | INFO | train_inner | epoch 004: 2346 / 3002 loss=2.647, ppl=6.26, wps=5907.9, ups=0.09, wpb=64875, bsz=128, num_updates=11285, lr=9.99177e-05, gnorm=2.196, loss_scale=8, train_wall=10, gb_free=2.8, wall=129320
2021-06-20 06:34:16 | INFO | train_inner | epoch 004: 2347 / 3002 loss=2.596, ppl=6.05, wps=5978.6, ups=0.09, wpb=64808, bsz=128, num_updates=11286, lr=9.99177e-05, gnorm=1.984, loss_scale=8, train_wall=10, gb_free=2.8, wall=129331
2021-06-20 06:34:27 | INFO | train_inner | epoch 004: 2348 / 3002 loss=2.497, ppl=5.65, wps=5862.4, ups=0.09, wpb=64821, bsz=128, num_updates=11287, lr=9.99177e-05, gnorm=2.284, loss_scale=8, train_wall=11, gb_free=2.8, wall=129342
2021-06-20 06:34:39 | INFO | train_inner | epoch 004: 2349 / 3002 loss=2.67, ppl=6.36, wps=5796.6, ups=0.09, wpb=64876, bsz=128, num_updates=11288, lr=9.99177e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=129353
2021-06-20 06:34:50 | INFO | train_inner | epoch 004: 2350 / 3002 loss=2.837, ppl=7.15, wps=5859, ups=0.09, wpb=64847, bsz=128, num_updates=11289, lr=9.99177e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=129364
2021-06-20 06:35:01 | INFO | train_inner | epoch 004: 2351 / 3002 loss=2.672, ppl=6.37, wps=5894.7, ups=0.09, wpb=64814, bsz=128, num_updates=11290, lr=9.99177e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=129375
2021-06-20 06:35:12 | INFO | train_inner | epoch 004: 2352 / 3002 loss=2.543, ppl=5.83, wps=5940.2, ups=0.09, wpb=64818, bsz=128, num_updates=11291, lr=9.99177e-05, gnorm=2.11, loss_scale=8, train_wall=10, gb_free=2.8, wall=129386
2021-06-20 06:35:23 | INFO | train_inner | epoch 004: 2353 / 3002 loss=2.502, ppl=5.66, wps=5728.9, ups=0.09, wpb=64849, bsz=128, num_updates=11292, lr=9.99177e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=129397
2021-06-20 06:35:34 | INFO | train_inner | epoch 004: 2354 / 3002 loss=2.596, ppl=6.05, wps=5776.6, ups=0.09, wpb=64799, bsz=128, num_updates=11293, lr=9.99176e-05, gnorm=2.084, loss_scale=8, train_wall=11, gb_free=2.8, wall=129408
2021-06-20 06:35:45 | INFO | train_inner | epoch 004: 2355 / 3002 loss=2.49, ppl=5.62, wps=5717.2, ups=0.09, wpb=64709, bsz=128, num_updates=11294, lr=9.99176e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=129420
2021-06-20 06:35:56 | INFO | train_inner | epoch 004: 2356 / 3002 loss=2.412, ppl=5.32, wps=5900.9, ups=0.09, wpb=64901, bsz=128, num_updates=11295, lr=9.99176e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=129431
2021-06-20 06:36:08 | INFO | train_inner | epoch 004: 2357 / 3002 loss=2.458, ppl=5.49, wps=5765.8, ups=0.09, wpb=64856, bsz=128, num_updates=11296, lr=9.99176e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=129442
2021-06-20 06:36:19 | INFO | train_inner | epoch 004: 2358 / 3002 loss=2.475, ppl=5.56, wps=5770.5, ups=0.09, wpb=64775, bsz=128, num_updates=11297, lr=9.99176e-05, gnorm=3.277, loss_scale=8, train_wall=11, gb_free=2.8, wall=129453
2021-06-20 06:36:30 | INFO | train_inner | epoch 004: 2359 / 3002 loss=2.643, ppl=6.25, wps=5886.3, ups=0.09, wpb=64849, bsz=128, num_updates=11298, lr=9.99176e-05, gnorm=2.154, loss_scale=8, train_wall=11, gb_free=2.8, wall=129464
2021-06-20 06:36:41 | INFO | train_inner | epoch 004: 2360 / 3002 loss=2.499, ppl=5.65, wps=5805.2, ups=0.09, wpb=64806, bsz=128, num_updates=11299, lr=9.99176e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=129475
2021-06-20 06:36:52 | INFO | train_inner | epoch 004: 2361 / 3002 loss=2.504, ppl=5.67, wps=5793.2, ups=0.09, wpb=64861, bsz=128, num_updates=11300, lr=9.99176e-05, gnorm=2.269, loss_scale=8, train_wall=11, gb_free=2.8, wall=129487
2021-06-20 06:37:03 | INFO | train_inner | epoch 004: 2362 / 3002 loss=2.518, ppl=5.73, wps=5767.3, ups=0.09, wpb=64811, bsz=128, num_updates=11301, lr=9.99176e-05, gnorm=2.052, loss_scale=8, train_wall=11, gb_free=2.8, wall=129498
2021-06-20 06:37:15 | INFO | train_inner | epoch 004: 2363 / 3002 loss=2.623, ppl=6.16, wps=5822.6, ups=0.09, wpb=64821, bsz=128, num_updates=11302, lr=9.99176e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=129509
2021-06-20 06:37:26 | INFO | train_inner | epoch 004: 2364 / 3002 loss=2.42, ppl=5.35, wps=5828.1, ups=0.09, wpb=64859, bsz=128, num_updates=11303, lr=9.99176e-05, gnorm=2.704, loss_scale=8, train_wall=11, gb_free=2.8, wall=129520
2021-06-20 06:37:37 | INFO | train_inner | epoch 004: 2365 / 3002 loss=2.634, ppl=6.21, wps=5955.4, ups=0.09, wpb=64873, bsz=128, num_updates=11304, lr=9.99176e-05, gnorm=2.025, loss_scale=8, train_wall=10, gb_free=2.8, wall=129531
2021-06-20 06:37:48 | INFO | train_inner | epoch 004: 2366 / 3002 loss=2.807, ppl=7, wps=5707.9, ups=0.09, wpb=64819, bsz=128, num_updates=11305, lr=9.99176e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=129542
2021-06-20 06:37:59 | INFO | train_inner | epoch 004: 2367 / 3002 loss=2.71, ppl=6.54, wps=5821.8, ups=0.09, wpb=64844, bsz=128, num_updates=11306, lr=9.99175e-05, gnorm=2.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=129553
2021-06-20 06:38:10 | INFO | train_inner | epoch 004: 2368 / 3002 loss=2.683, ppl=6.42, wps=5801.7, ups=0.09, wpb=64811, bsz=128, num_updates=11307, lr=9.99175e-05, gnorm=2.102, loss_scale=8, train_wall=11, gb_free=2.8, wall=129565
2021-06-20 06:38:21 | INFO | train_inner | epoch 004: 2369 / 3002 loss=2.478, ppl=5.57, wps=5859, ups=0.09, wpb=64828, bsz=128, num_updates=11308, lr=9.99175e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=129576
2021-06-20 06:38:33 | INFO | train_inner | epoch 004: 2370 / 3002 loss=2.529, ppl=5.77, wps=5760.9, ups=0.09, wpb=64764, bsz=128, num_updates=11309, lr=9.99175e-05, gnorm=2.141, loss_scale=8, train_wall=11, gb_free=2.8, wall=129587
2021-06-20 06:38:44 | INFO | train_inner | epoch 004: 2371 / 3002 loss=2.456, ppl=5.49, wps=5796.1, ups=0.09, wpb=64815, bsz=128, num_updates=11310, lr=9.99175e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=129598
2021-06-20 06:38:55 | INFO | train_inner | epoch 004: 2372 / 3002 loss=2.62, ppl=6.15, wps=5701.4, ups=0.09, wpb=64809, bsz=128, num_updates=11311, lr=9.99175e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=129609
2021-06-20 06:39:06 | INFO | train_inner | epoch 004: 2373 / 3002 loss=2.443, ppl=5.44, wps=5879.3, ups=0.09, wpb=64826, bsz=128, num_updates=11312, lr=9.99175e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=129620
2021-06-20 06:39:17 | INFO | train_inner | epoch 004: 2374 / 3002 loss=2.469, ppl=5.54, wps=5875.1, ups=0.09, wpb=64788, bsz=128, num_updates=11313, lr=9.99175e-05, gnorm=4.194, loss_scale=8, train_wall=11, gb_free=2.8, wall=129632
2021-06-20 06:39:28 | INFO | train_inner | epoch 004: 2375 / 3002 loss=2.612, ppl=6.11, wps=5827.5, ups=0.09, wpb=64750, bsz=128, num_updates=11314, lr=9.99175e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=129643
2021-06-20 06:39:39 | INFO | train_inner | epoch 004: 2376 / 3002 loss=2.641, ppl=6.24, wps=5850.9, ups=0.09, wpb=64764, bsz=128, num_updates=11315, lr=9.99175e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=129654
2021-06-20 06:39:50 | INFO | train_inner | epoch 004: 2377 / 3002 loss=2.559, ppl=5.89, wps=5879.2, ups=0.09, wpb=64836, bsz=128, num_updates=11316, lr=9.99175e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=129665
2021-06-20 06:40:02 | INFO | train_inner | epoch 004: 2378 / 3002 loss=2.753, ppl=6.74, wps=5818.4, ups=0.09, wpb=64903, bsz=128, num_updates=11317, lr=9.99175e-05, gnorm=2.139, loss_scale=8, train_wall=11, gb_free=2.8, wall=129676
2021-06-20 06:40:13 | INFO | train_inner | epoch 004: 2379 / 3002 loss=2.671, ppl=6.37, wps=5871.8, ups=0.09, wpb=64901, bsz=128, num_updates=11318, lr=9.99174e-05, gnorm=2.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=129687
2021-06-20 06:40:24 | INFO | train_inner | epoch 004: 2380 / 3002 loss=2.65, ppl=6.28, wps=5809.6, ups=0.09, wpb=64814, bsz=128, num_updates=11319, lr=9.99174e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=129698
2021-06-20 06:40:35 | INFO | train_inner | epoch 004: 2381 / 3002 loss=2.438, ppl=5.42, wps=5919, ups=0.09, wpb=64888, bsz=128, num_updates=11320, lr=9.99174e-05, gnorm=4.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=129709
2021-06-20 06:40:46 | INFO | train_inner | epoch 004: 2382 / 3002 loss=2.638, ppl=6.22, wps=5857.4, ups=0.09, wpb=64867, bsz=128, num_updates=11321, lr=9.99174e-05, gnorm=2.57, loss_scale=8, train_wall=11, gb_free=2.8, wall=129720
2021-06-20 06:40:57 | INFO | train_inner | epoch 004: 2383 / 3002 loss=2.73, ppl=6.63, wps=5817.7, ups=0.09, wpb=64885, bsz=128, num_updates=11322, lr=9.99174e-05, gnorm=2.162, loss_scale=8, train_wall=11, gb_free=2.8, wall=129731
2021-06-20 06:41:08 | INFO | train_inner | epoch 004: 2384 / 3002 loss=2.398, ppl=5.27, wps=5824, ups=0.09, wpb=64820, bsz=128, num_updates=11323, lr=9.99174e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=129742
2021-06-20 06:41:19 | INFO | train_inner | epoch 004: 2385 / 3002 loss=2.377, ppl=5.2, wps=5868.7, ups=0.09, wpb=64793, bsz=128, num_updates=11324, lr=9.99174e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=129753
2021-06-20 06:41:30 | INFO | train_inner | epoch 004: 2386 / 3002 loss=2.723, ppl=6.6, wps=5841.9, ups=0.09, wpb=64803, bsz=128, num_updates=11325, lr=9.99174e-05, gnorm=2.167, loss_scale=8, train_wall=11, gb_free=2.8, wall=129765
2021-06-20 06:41:41 | INFO | train_inner | epoch 004: 2387 / 3002 loss=2.405, ppl=5.3, wps=5861.2, ups=0.09, wpb=64773, bsz=128, num_updates=11326, lr=9.99174e-05, gnorm=4.348, loss_scale=8, train_wall=11, gb_free=2.8, wall=129776
2021-06-20 06:41:52 | INFO | train_inner | epoch 004: 2388 / 3002 loss=2.684, ppl=6.43, wps=5787.3, ups=0.09, wpb=64765, bsz=128, num_updates=11327, lr=9.99174e-05, gnorm=2.102, loss_scale=8, train_wall=11, gb_free=2.8, wall=129787
2021-06-20 06:42:03 | INFO | train_inner | epoch 004: 2389 / 3002 loss=2.617, ppl=6.13, wps=5891.4, ups=0.09, wpb=64806, bsz=128, num_updates=11328, lr=9.99174e-05, gnorm=2.189, loss_scale=8, train_wall=11, gb_free=2.8, wall=129798
2021-06-20 06:42:14 | INFO | train_inner | epoch 004: 2390 / 3002 loss=2.531, ppl=5.78, wps=5910.1, ups=0.09, wpb=64881, bsz=128, num_updates=11329, lr=9.99174e-05, gnorm=2.14, loss_scale=8, train_wall=11, gb_free=2.8, wall=129809
2021-06-20 06:42:25 | INFO | train_inner | epoch 004: 2391 / 3002 loss=2.563, ppl=5.91, wps=5961.7, ups=0.09, wpb=64905, bsz=128, num_updates=11330, lr=9.99174e-05, gnorm=2.313, loss_scale=8, train_wall=10, gb_free=2.8, wall=129820
2021-06-20 06:42:37 | INFO | train_inner | epoch 004: 2392 / 3002 loss=2.68, ppl=6.41, wps=5775.5, ups=0.09, wpb=64919, bsz=128, num_updates=11331, lr=9.99173e-05, gnorm=2.175, loss_scale=8, train_wall=11, gb_free=2.8, wall=129831
2021-06-20 06:42:48 | INFO | train_inner | epoch 004: 2393 / 3002 loss=2.562, ppl=5.9, wps=5822.7, ups=0.09, wpb=64789, bsz=128, num_updates=11332, lr=9.99173e-05, gnorm=2.203, loss_scale=8, train_wall=11, gb_free=2.8, wall=129842
2021-06-20 06:42:59 | INFO | train_inner | epoch 004: 2394 / 3002 loss=2.609, ppl=6.1, wps=5742.7, ups=0.09, wpb=64859, bsz=128, num_updates=11333, lr=9.99173e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=129853
2021-06-20 06:43:10 | INFO | train_inner | epoch 004: 2395 / 3002 loss=2.562, ppl=5.91, wps=5810.2, ups=0.09, wpb=64777, bsz=128, num_updates=11334, lr=9.99173e-05, gnorm=2.415, loss_scale=8, train_wall=11, gb_free=2.8, wall=129864
2021-06-20 06:43:21 | INFO | train_inner | epoch 004: 2396 / 3002 loss=2.596, ppl=6.05, wps=6025.2, ups=0.09, wpb=64813, bsz=128, num_updates=11335, lr=9.99173e-05, gnorm=2.185, loss_scale=8, train_wall=10, gb_free=2.8, wall=129875
2021-06-20 06:43:32 | INFO | train_inner | epoch 004: 2397 / 3002 loss=2.589, ppl=6.02, wps=6005.8, ups=0.09, wpb=64929, bsz=128, num_updates=11336, lr=9.99173e-05, gnorm=2.113, loss_scale=8, train_wall=10, gb_free=2.8, wall=129886
2021-06-20 06:43:43 | INFO | train_inner | epoch 004: 2398 / 3002 loss=2.497, ppl=5.65, wps=5827.8, ups=0.09, wpb=64842, bsz=128, num_updates=11337, lr=9.99173e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=129897
2021-06-20 06:43:54 | INFO | train_inner | epoch 004: 2399 / 3002 loss=2.496, ppl=5.64, wps=5949, ups=0.09, wpb=64833, bsz=128, num_updates=11338, lr=9.99173e-05, gnorm=2.085, loss_scale=8, train_wall=10, gb_free=2.8, wall=129908
2021-06-20 06:44:05 | INFO | train_inner | epoch 004: 2400 / 3002 loss=2.714, ppl=6.56, wps=5800.7, ups=0.09, wpb=64806, bsz=128, num_updates=11339, lr=9.99173e-05, gnorm=2.145, loss_scale=8, train_wall=11, gb_free=2.8, wall=129919
2021-06-20 06:44:16 | INFO | train_inner | epoch 004: 2401 / 3002 loss=2.603, ppl=6.07, wps=5839.7, ups=0.09, wpb=64847, bsz=128, num_updates=11340, lr=9.99173e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=129930
2021-06-20 06:44:27 | INFO | train_inner | epoch 004: 2402 / 3002 loss=2.622, ppl=6.16, wps=5932.8, ups=0.09, wpb=64877, bsz=128, num_updates=11341, lr=9.99173e-05, gnorm=2.026, loss_scale=8, train_wall=10, gb_free=2.8, wall=129941
2021-06-20 06:44:38 | INFO | train_inner | epoch 004: 2403 / 3002 loss=2.725, ppl=6.61, wps=5946.7, ups=0.09, wpb=64724, bsz=128, num_updates=11342, lr=9.99173e-05, gnorm=2.133, loss_scale=8, train_wall=10, gb_free=2.8, wall=129952
2021-06-20 06:44:49 | INFO | train_inner | epoch 004: 2404 / 3002 loss=2.537, ppl=5.8, wps=5765.4, ups=0.09, wpb=64742, bsz=128, num_updates=11343, lr=9.99172e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=129963
2021-06-20 06:45:00 | INFO | train_inner | epoch 004: 2405 / 3002 loss=2.628, ppl=6.18, wps=5974.5, ups=0.09, wpb=64710, bsz=128, num_updates=11344, lr=9.99172e-05, gnorm=2.036, loss_scale=8, train_wall=10, gb_free=2.8, wall=129974
2021-06-20 06:45:11 | INFO | train_inner | epoch 004: 2406 / 3002 loss=2.535, ppl=5.8, wps=5780.8, ups=0.09, wpb=64811, bsz=128, num_updates=11345, lr=9.99172e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=129985
2021-06-20 06:45:22 | INFO | train_inner | epoch 004: 2407 / 3002 loss=2.455, ppl=5.48, wps=5995.8, ups=0.09, wpb=64899, bsz=128, num_updates=11346, lr=9.99172e-05, gnorm=2.587, loss_scale=8, train_wall=10, gb_free=2.8, wall=129996
2021-06-20 06:45:33 | INFO | train_inner | epoch 004: 2408 / 3002 loss=2.62, ppl=6.15, wps=5894.5, ups=0.09, wpb=64882, bsz=128, num_updates=11347, lr=9.99172e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=130007
2021-06-20 06:45:44 | INFO | train_inner | epoch 004: 2409 / 3002 loss=2.612, ppl=6.12, wps=5708.2, ups=0.09, wpb=64720, bsz=128, num_updates=11348, lr=9.99172e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=130019
2021-06-20 06:45:55 | INFO | train_inner | epoch 004: 2410 / 3002 loss=2.509, ppl=5.69, wps=6037, ups=0.09, wpb=64784, bsz=128, num_updates=11349, lr=9.99172e-05, gnorm=2.047, loss_scale=8, train_wall=10, gb_free=2.8, wall=130029
2021-06-20 06:46:06 | INFO | train_inner | epoch 004: 2411 / 3002 loss=2.681, ppl=6.41, wps=5816.5, ups=0.09, wpb=64893, bsz=128, num_updates=11350, lr=9.99172e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=130040
2021-06-20 06:46:17 | INFO | train_inner | epoch 004: 2412 / 3002 loss=2.469, ppl=5.53, wps=5788.1, ups=0.09, wpb=64819, bsz=128, num_updates=11351, lr=9.99172e-05, gnorm=2.041, loss_scale=8, train_wall=11, gb_free=2.8, wall=130052
2021-06-20 06:46:28 | INFO | train_inner | epoch 004: 2413 / 3002 loss=2.467, ppl=5.53, wps=5884.9, ups=0.09, wpb=64882, bsz=128, num_updates=11352, lr=9.99172e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=130063
2021-06-20 06:46:40 | INFO | train_inner | epoch 004: 2414 / 3002 loss=2.689, ppl=6.45, wps=5767.1, ups=0.09, wpb=64772, bsz=128, num_updates=11353, lr=9.99172e-05, gnorm=2.167, loss_scale=8, train_wall=11, gb_free=2.8, wall=130074
2021-06-20 06:46:51 | INFO | train_inner | epoch 004: 2415 / 3002 loss=2.518, ppl=5.73, wps=5921.2, ups=0.09, wpb=64860, bsz=128, num_updates=11354, lr=9.99172e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=130085
2021-06-20 06:47:02 | INFO | train_inner | epoch 004: 2416 / 3002 loss=2.523, ppl=5.75, wps=5846.4, ups=0.09, wpb=64879, bsz=128, num_updates=11355, lr=9.99172e-05, gnorm=2.071, loss_scale=8, train_wall=11, gb_free=2.8, wall=130096
2021-06-20 06:47:13 | INFO | train_inner | epoch 004: 2417 / 3002 loss=2.565, ppl=5.92, wps=5898.3, ups=0.09, wpb=64838, bsz=128, num_updates=11356, lr=9.99171e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=130107
2021-06-20 06:47:24 | INFO | train_inner | epoch 004: 2418 / 3002 loss=2.571, ppl=5.94, wps=5809.7, ups=0.09, wpb=64889, bsz=128, num_updates=11357, lr=9.99171e-05, gnorm=2.345, loss_scale=8, train_wall=11, gb_free=2.8, wall=130118
2021-06-20 06:47:35 | INFO | train_inner | epoch 004: 2419 / 3002 loss=2.564, ppl=5.91, wps=5797.7, ups=0.09, wpb=64806, bsz=128, num_updates=11358, lr=9.99171e-05, gnorm=2.126, loss_scale=8, train_wall=11, gb_free=2.8, wall=130129
2021-06-20 06:47:46 | INFO | train_inner | epoch 004: 2420 / 3002 loss=2.78, ppl=6.87, wps=5835.5, ups=0.09, wpb=64811, bsz=128, num_updates=11359, lr=9.99171e-05, gnorm=2.195, loss_scale=8, train_wall=11, gb_free=2.8, wall=130140
2021-06-20 06:47:57 | INFO | train_inner | epoch 004: 2421 / 3002 loss=2.448, ppl=5.46, wps=5915.6, ups=0.09, wpb=64899, bsz=128, num_updates=11360, lr=9.99171e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=130151
2021-06-20 06:48:08 | INFO | train_inner | epoch 004: 2422 / 3002 loss=2.643, ppl=6.25, wps=5746.4, ups=0.09, wpb=64830, bsz=128, num_updates=11361, lr=9.99171e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=130163
2021-06-20 06:48:19 | INFO | train_inner | epoch 004: 2423 / 3002 loss=2.543, ppl=5.83, wps=5905.6, ups=0.09, wpb=64877, bsz=128, num_updates=11362, lr=9.99171e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=130174
2021-06-20 06:48:30 | INFO | train_inner | epoch 004: 2424 / 3002 loss=2.663, ppl=6.33, wps=5887.3, ups=0.09, wpb=64875, bsz=128, num_updates=11363, lr=9.99171e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=130185
2021-06-20 06:48:42 | INFO | train_inner | epoch 004: 2425 / 3002 loss=2.57, ppl=5.94, wps=5789.3, ups=0.09, wpb=64820, bsz=128, num_updates=11364, lr=9.99171e-05, gnorm=2.335, loss_scale=8, train_wall=11, gb_free=2.8, wall=130196
2021-06-20 06:48:53 | INFO | train_inner | epoch 004: 2426 / 3002 loss=2.514, ppl=5.71, wps=5769.9, ups=0.09, wpb=64795, bsz=128, num_updates=11365, lr=9.99171e-05, gnorm=2.11, loss_scale=8, train_wall=11, gb_free=2.8, wall=130207
2021-06-20 06:49:04 | INFO | train_inner | epoch 004: 2427 / 3002 loss=2.675, ppl=6.38, wps=5900.9, ups=0.09, wpb=64897, bsz=128, num_updates=11366, lr=9.99171e-05, gnorm=2.135, loss_scale=8, train_wall=11, gb_free=2.8, wall=130218
2021-06-20 06:49:15 | INFO | train_inner | epoch 004: 2428 / 3002 loss=2.696, ppl=6.48, wps=5779.2, ups=0.09, wpb=64793, bsz=128, num_updates=11367, lr=9.99171e-05, gnorm=2.073, loss_scale=8, train_wall=11, gb_free=2.8, wall=130229
2021-06-20 06:49:26 | INFO | train_inner | epoch 004: 2429 / 3002 loss=2.446, ppl=5.45, wps=5816.7, ups=0.09, wpb=64819, bsz=128, num_updates=11368, lr=9.9917e-05, gnorm=2.094, loss_scale=8, train_wall=11, gb_free=2.8, wall=130240
2021-06-20 06:49:37 | INFO | train_inner | epoch 004: 2430 / 3002 loss=2.485, ppl=5.6, wps=5829.4, ups=0.09, wpb=64739, bsz=128, num_updates=11369, lr=9.9917e-05, gnorm=5.49, loss_scale=8, train_wall=11, gb_free=2.8, wall=130252
2021-06-20 06:49:48 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-20 06:49:59 | INFO | train_inner | epoch 004: 2432 / 3002 loss=2.601, ppl=6.07, wps=2948.5, ups=0.05, wpb=64859, bsz=128, num_updates=11370, lr=9.9917e-05, gnorm=9.947, loss_scale=4, train_wall=21, gb_free=2.8, wall=130274
2021-06-20 06:50:10 | INFO | train_inner | epoch 004: 2433 / 3002 loss=2.493, ppl=5.63, wps=5786.5, ups=0.09, wpb=64799, bsz=128, num_updates=11371, lr=9.9917e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=130285
2021-06-20 06:50:22 | INFO | train_inner | epoch 004: 2434 / 3002 loss=2.469, ppl=5.54, wps=5839.9, ups=0.09, wpb=64823, bsz=128, num_updates=11372, lr=9.9917e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=130296
2021-06-20 06:50:33 | INFO | train_inner | epoch 004: 2435 / 3002 loss=2.464, ppl=5.52, wps=5786, ups=0.09, wpb=64844, bsz=128, num_updates=11373, lr=9.9917e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=130307
2021-06-20 06:50:44 | INFO | train_inner | epoch 004: 2436 / 3002 loss=2.577, ppl=5.97, wps=5832.5, ups=0.09, wpb=64848, bsz=128, num_updates=11374, lr=9.9917e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=130318
2021-06-20 06:50:55 | INFO | train_inner | epoch 004: 2437 / 3002 loss=2.496, ppl=5.64, wps=5871.9, ups=0.09, wpb=64769, bsz=128, num_updates=11375, lr=9.9917e-05, gnorm=2.584, loss_scale=4, train_wall=11, gb_free=2.8, wall=130329
2021-06-20 06:51:06 | INFO | train_inner | epoch 004: 2438 / 3002 loss=2.673, ppl=6.38, wps=5831.2, ups=0.09, wpb=64845, bsz=128, num_updates=11376, lr=9.9917e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=130340
2021-06-20 06:51:17 | INFO | train_inner | epoch 004: 2439 / 3002 loss=2.583, ppl=5.99, wps=5712.1, ups=0.09, wpb=64822, bsz=128, num_updates=11377, lr=9.9917e-05, gnorm=2.192, loss_scale=4, train_wall=11, gb_free=2.8, wall=130352
2021-06-20 06:51:28 | INFO | train_inner | epoch 004: 2440 / 3002 loss=2.62, ppl=6.15, wps=5929.2, ups=0.09, wpb=64838, bsz=128, num_updates=11378, lr=9.9917e-05, gnorm=2.718, loss_scale=4, train_wall=11, gb_free=2.8, wall=130363
2021-06-20 06:51:39 | INFO | train_inner | epoch 004: 2441 / 3002 loss=2.508, ppl=5.69, wps=5867, ups=0.09, wpb=64789, bsz=128, num_updates=11379, lr=9.9917e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=130374
2021-06-20 06:51:50 | INFO | train_inner | epoch 004: 2442 / 3002 loss=2.653, ppl=6.29, wps=5818.6, ups=0.09, wpb=64816, bsz=128, num_updates=11380, lr=9.9917e-05, gnorm=2.131, loss_scale=4, train_wall=11, gb_free=2.8, wall=130385
2021-06-20 06:52:01 | INFO | train_inner | epoch 004: 2443 / 3002 loss=2.648, ppl=6.27, wps=5920.7, ups=0.09, wpb=64769, bsz=128, num_updates=11381, lr=9.99169e-05, gnorm=2.126, loss_scale=4, train_wall=11, gb_free=2.8, wall=130396
2021-06-20 06:52:12 | INFO | train_inner | epoch 004: 2444 / 3002 loss=2.502, ppl=5.67, wps=5921.9, ups=0.09, wpb=64857, bsz=128, num_updates=11382, lr=9.99169e-05, gnorm=2.182, loss_scale=4, train_wall=10, gb_free=2.8, wall=130407
2021-06-20 06:52:23 | INFO | train_inner | epoch 004: 2445 / 3002 loss=2.651, ppl=6.28, wps=5849, ups=0.09, wpb=64865, bsz=128, num_updates=11383, lr=9.99169e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=130418
2021-06-20 06:52:35 | INFO | train_inner | epoch 004: 2446 / 3002 loss=2.592, ppl=6.03, wps=5812.8, ups=0.09, wpb=64835, bsz=128, num_updates=11384, lr=9.99169e-05, gnorm=2.261, loss_scale=4, train_wall=11, gb_free=2.8, wall=130429
2021-06-20 06:52:45 | INFO | train_inner | epoch 004: 2447 / 3002 loss=2.398, ppl=5.27, wps=5983.7, ups=0.09, wpb=64807, bsz=128, num_updates=11385, lr=9.99169e-05, gnorm=3.553, loss_scale=4, train_wall=10, gb_free=2.8, wall=130440
2021-06-20 06:52:57 | INFO | train_inner | epoch 004: 2448 / 3002 loss=2.506, ppl=5.68, wps=5802.2, ups=0.09, wpb=64885, bsz=128, num_updates=11386, lr=9.99169e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=130451
2021-06-20 06:53:08 | INFO | train_inner | epoch 004: 2449 / 3002 loss=2.473, ppl=5.55, wps=5918.1, ups=0.09, wpb=64889, bsz=128, num_updates=11387, lr=9.99169e-05, gnorm=2.308, loss_scale=4, train_wall=10, gb_free=2.8, wall=130462
2021-06-20 06:53:19 | INFO | train_inner | epoch 004: 2450 / 3002 loss=2.613, ppl=6.12, wps=5820, ups=0.09, wpb=64822, bsz=128, num_updates=11388, lr=9.99169e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=130473
2021-06-20 06:53:30 | INFO | train_inner | epoch 004: 2451 / 3002 loss=2.543, ppl=5.83, wps=5932.4, ups=0.09, wpb=64850, bsz=128, num_updates=11389, lr=9.99169e-05, gnorm=1.984, loss_scale=4, train_wall=10, gb_free=2.8, wall=130484
2021-06-20 06:53:41 | INFO | train_inner | epoch 004: 2452 / 3002 loss=2.542, ppl=5.82, wps=5827.7, ups=0.09, wpb=64761, bsz=128, num_updates=11390, lr=9.99169e-05, gnorm=2.081, loss_scale=4, train_wall=11, gb_free=2.8, wall=130495
2021-06-20 06:53:52 | INFO | train_inner | epoch 004: 2453 / 3002 loss=2.47, ppl=5.54, wps=5939, ups=0.09, wpb=64832, bsz=128, num_updates=11391, lr=9.99169e-05, gnorm=2.012, loss_scale=4, train_wall=10, gb_free=2.8, wall=130506
2021-06-20 06:54:03 | INFO | train_inner | epoch 004: 2454 / 3002 loss=2.536, ppl=5.8, wps=5821.4, ups=0.09, wpb=64729, bsz=128, num_updates=11392, lr=9.99169e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=130517
2021-06-20 06:54:14 | INFO | train_inner | epoch 004: 2455 / 3002 loss=2.615, ppl=6.13, wps=5844.5, ups=0.09, wpb=64749, bsz=128, num_updates=11393, lr=9.99168e-05, gnorm=2.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=130528
2021-06-20 06:54:25 | INFO | train_inner | epoch 004: 2456 / 3002 loss=2.524, ppl=5.75, wps=5879.1, ups=0.09, wpb=64879, bsz=128, num_updates=11394, lr=9.99168e-05, gnorm=2.18, loss_scale=4, train_wall=11, gb_free=2.8, wall=130539
2021-06-20 06:54:36 | INFO | train_inner | epoch 004: 2457 / 3002 loss=2.472, ppl=5.55, wps=5856, ups=0.09, wpb=64851, bsz=128, num_updates=11395, lr=9.99168e-05, gnorm=2.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=130550
2021-06-20 06:54:47 | INFO | train_inner | epoch 004: 2458 / 3002 loss=2.536, ppl=5.8, wps=5848, ups=0.09, wpb=64788, bsz=128, num_updates=11396, lr=9.99168e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=130561
2021-06-20 06:54:58 | INFO | train_inner | epoch 004: 2459 / 3002 loss=2.458, ppl=5.5, wps=5867.7, ups=0.09, wpb=64827, bsz=128, num_updates=11397, lr=9.99168e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=130572
2021-06-20 06:55:09 | INFO | train_inner | epoch 004: 2460 / 3002 loss=2.577, ppl=5.97, wps=5994.7, ups=0.09, wpb=64895, bsz=128, num_updates=11398, lr=9.99168e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=130583
2021-06-20 06:55:20 | INFO | train_inner | epoch 004: 2461 / 3002 loss=2.633, ppl=6.2, wps=5769.3, ups=0.09, wpb=64822, bsz=128, num_updates=11399, lr=9.99168e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=130595
2021-06-20 06:55:31 | INFO | train_inner | epoch 004: 2462 / 3002 loss=2.633, ppl=6.2, wps=5920, ups=0.09, wpb=64721, bsz=128, num_updates=11400, lr=9.99168e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=130605
2021-06-20 06:55:42 | INFO | train_inner | epoch 004: 2463 / 3002 loss=2.542, ppl=5.82, wps=5979.2, ups=0.09, wpb=64863, bsz=128, num_updates=11401, lr=9.99168e-05, gnorm=2.006, loss_scale=4, train_wall=10, gb_free=2.8, wall=130616
2021-06-20 06:55:53 | INFO | train_inner | epoch 004: 2464 / 3002 loss=2.555, ppl=5.88, wps=5814.9, ups=0.09, wpb=64793, bsz=128, num_updates=11402, lr=9.99168e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=130627
2021-06-20 06:56:04 | INFO | train_inner | epoch 004: 2465 / 3002 loss=2.499, ppl=5.65, wps=5846, ups=0.09, wpb=64796, bsz=128, num_updates=11403, lr=9.99168e-05, gnorm=2.07, loss_scale=4, train_wall=11, gb_free=2.8, wall=130639
2021-06-20 06:56:15 | INFO | train_inner | epoch 004: 2466 / 3002 loss=2.532, ppl=5.78, wps=5881.6, ups=0.09, wpb=64681, bsz=128, num_updates=11404, lr=9.99168e-05, gnorm=2.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=130650
2021-06-20 06:56:26 | INFO | train_inner | epoch 004: 2467 / 3002 loss=2.564, ppl=5.91, wps=5859.2, ups=0.09, wpb=64857, bsz=128, num_updates=11405, lr=9.99168e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=130661
2021-06-20 06:56:38 | INFO | train_inner | epoch 004: 2468 / 3002 loss=2.527, ppl=5.76, wps=5725.2, ups=0.09, wpb=64820, bsz=128, num_updates=11406, lr=9.99167e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=130672
2021-06-20 06:56:48 | INFO | train_inner | epoch 004: 2469 / 3002 loss=2.567, ppl=5.92, wps=5971.1, ups=0.09, wpb=64861, bsz=128, num_updates=11407, lr=9.99167e-05, gnorm=1.997, loss_scale=4, train_wall=10, gb_free=2.8, wall=130683
2021-06-20 06:56:59 | INFO | train_inner | epoch 004: 2470 / 3002 loss=2.618, ppl=6.14, wps=5906.1, ups=0.09, wpb=64858, bsz=128, num_updates=11408, lr=9.99167e-05, gnorm=1.957, loss_scale=4, train_wall=11, gb_free=2.8, wall=130694
2021-06-20 06:57:11 | INFO | train_inner | epoch 004: 2471 / 3002 loss=2.541, ppl=5.82, wps=5773.5, ups=0.09, wpb=64786, bsz=128, num_updates=11409, lr=9.99167e-05, gnorm=2.107, loss_scale=4, train_wall=11, gb_free=2.8, wall=130705
2021-06-20 06:57:22 | INFO | train_inner | epoch 004: 2472 / 3002 loss=2.344, ppl=5.08, wps=5831.7, ups=0.09, wpb=64843, bsz=128, num_updates=11410, lr=9.99167e-05, gnorm=2.031, loss_scale=4, train_wall=11, gb_free=2.8, wall=130716
2021-06-20 06:57:33 | INFO | train_inner | epoch 004: 2473 / 3002 loss=2.558, ppl=5.89, wps=5856.3, ups=0.09, wpb=64877, bsz=128, num_updates=11411, lr=9.99167e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=130727
2021-06-20 06:57:44 | INFO | train_inner | epoch 004: 2474 / 3002 loss=2.455, ppl=5.48, wps=5776.5, ups=0.09, wpb=64848, bsz=128, num_updates=11412, lr=9.99167e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=130738
2021-06-20 06:57:55 | INFO | train_inner | epoch 004: 2475 / 3002 loss=2.535, ppl=5.8, wps=5893.9, ups=0.09, wpb=64834, bsz=128, num_updates=11413, lr=9.99167e-05, gnorm=2.104, loss_scale=4, train_wall=11, gb_free=2.8, wall=130749
2021-06-20 06:58:06 | INFO | train_inner | epoch 004: 2476 / 3002 loss=2.564, ppl=5.91, wps=5899.3, ups=0.09, wpb=64865, bsz=128, num_updates=11414, lr=9.99167e-05, gnorm=2.542, loss_scale=4, train_wall=11, gb_free=2.8, wall=130760
2021-06-20 06:58:17 | INFO | train_inner | epoch 004: 2477 / 3002 loss=2.602, ppl=6.07, wps=5815.8, ups=0.09, wpb=64879, bsz=128, num_updates=11415, lr=9.99167e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=130772
2021-06-20 06:58:28 | INFO | train_inner | epoch 004: 2478 / 3002 loss=2.426, ppl=5.37, wps=5773, ups=0.09, wpb=64861, bsz=128, num_updates=11416, lr=9.99167e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=130783
2021-06-20 06:58:40 | INFO | train_inner | epoch 004: 2479 / 3002 loss=2.74, ppl=6.68, wps=5838.8, ups=0.09, wpb=64761, bsz=128, num_updates=11417, lr=9.99167e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=130794
2021-06-20 06:58:51 | INFO | train_inner | epoch 004: 2480 / 3002 loss=2.855, ppl=7.24, wps=5912.4, ups=0.09, wpb=64866, bsz=128, num_updates=11418, lr=9.99166e-05, gnorm=2.123, loss_scale=4, train_wall=11, gb_free=2.8, wall=130805
2021-06-20 06:59:02 | INFO | train_inner | epoch 004: 2481 / 3002 loss=2.592, ppl=6.03, wps=5778.5, ups=0.09, wpb=64746, bsz=128, num_updates=11419, lr=9.99166e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=130816
2021-06-20 06:59:13 | INFO | train_inner | epoch 004: 2482 / 3002 loss=2.404, ppl=5.29, wps=5781.9, ups=0.09, wpb=64845, bsz=128, num_updates=11420, lr=9.99166e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=130827
2021-06-20 06:59:24 | INFO | train_inner | epoch 004: 2483 / 3002 loss=2.695, ppl=6.47, wps=5888.3, ups=0.09, wpb=64863, bsz=128, num_updates=11421, lr=9.99166e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=130838
2021-06-20 06:59:35 | INFO | train_inner | epoch 004: 2484 / 3002 loss=2.413, ppl=5.32, wps=5863.3, ups=0.09, wpb=64817, bsz=128, num_updates=11422, lr=9.99166e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=130849
2021-06-20 06:59:46 | INFO | train_inner | epoch 004: 2485 / 3002 loss=2.62, ppl=6.15, wps=5846.6, ups=0.09, wpb=64810, bsz=128, num_updates=11423, lr=9.99166e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=130860
2021-06-20 06:59:57 | INFO | train_inner | epoch 004: 2486 / 3002 loss=2.706, ppl=6.52, wps=5879.8, ups=0.09, wpb=64775, bsz=128, num_updates=11424, lr=9.99166e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=130871
2021-06-20 07:00:08 | INFO | train_inner | epoch 004: 2487 / 3002 loss=2.611, ppl=6.11, wps=5875.4, ups=0.09, wpb=64908, bsz=128, num_updates=11425, lr=9.99166e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=130883
2021-06-20 07:00:19 | INFO | train_inner | epoch 004: 2488 / 3002 loss=2.627, ppl=6.18, wps=5768.7, ups=0.09, wpb=64848, bsz=128, num_updates=11426, lr=9.99166e-05, gnorm=3.727, loss_scale=4, train_wall=11, gb_free=2.8, wall=130894
2021-06-20 07:00:30 | INFO | train_inner | epoch 004: 2489 / 3002 loss=2.61, ppl=6.1, wps=5896.2, ups=0.09, wpb=64861, bsz=128, num_updates=11427, lr=9.99166e-05, gnorm=1.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=130905
2021-06-20 07:00:41 | INFO | train_inner | epoch 004: 2490 / 3002 loss=2.55, ppl=5.86, wps=5883.8, ups=0.09, wpb=64730, bsz=128, num_updates=11428, lr=9.99166e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=130916
2021-06-20 07:00:52 | INFO | train_inner | epoch 004: 2491 / 3002 loss=2.387, ppl=5.23, wps=5934.9, ups=0.09, wpb=64850, bsz=128, num_updates=11429, lr=9.99166e-05, gnorm=1.984, loss_scale=4, train_wall=10, gb_free=2.8, wall=130927
2021-06-20 07:01:03 | INFO | train_inner | epoch 004: 2492 / 3002 loss=2.518, ppl=5.73, wps=5891.4, ups=0.09, wpb=64851, bsz=128, num_updates=11430, lr=9.99166e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=130938
2021-06-20 07:01:14 | INFO | train_inner | epoch 004: 2493 / 3002 loss=2.597, ppl=6.05, wps=5876.5, ups=0.09, wpb=64860, bsz=128, num_updates=11431, lr=9.99165e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=130949
2021-06-20 07:01:26 | INFO | train_inner | epoch 004: 2494 / 3002 loss=2.539, ppl=5.81, wps=5832.2, ups=0.09, wpb=64818, bsz=128, num_updates=11432, lr=9.99165e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=130960
2021-06-20 07:01:37 | INFO | train_inner | epoch 004: 2495 / 3002 loss=2.493, ppl=5.63, wps=5708.6, ups=0.09, wpb=64883, bsz=128, num_updates=11433, lr=9.99165e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=130971
2021-06-20 07:01:48 | INFO | train_inner | epoch 004: 2496 / 3002 loss=2.545, ppl=5.84, wps=5792.1, ups=0.09, wpb=64855, bsz=128, num_updates=11434, lr=9.99165e-05, gnorm=2.211, loss_scale=4, train_wall=11, gb_free=2.8, wall=130982
2021-06-20 07:01:59 | INFO | train_inner | epoch 004: 2497 / 3002 loss=2.567, ppl=5.93, wps=5810.3, ups=0.09, wpb=64858, bsz=128, num_updates=11435, lr=9.99165e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=130994
2021-06-20 07:02:10 | INFO | train_inner | epoch 004: 2498 / 3002 loss=2.609, ppl=6.1, wps=5792.2, ups=0.09, wpb=64852, bsz=128, num_updates=11436, lr=9.99165e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=131005
2021-06-20 07:02:22 | INFO | train_inner | epoch 004: 2499 / 3002 loss=2.506, ppl=5.68, wps=5754.3, ups=0.09, wpb=64844, bsz=128, num_updates=11437, lr=9.99165e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=131016
2021-06-20 07:02:33 | INFO | train_inner | epoch 004: 2500 / 3002 loss=2.682, ppl=6.42, wps=5874.3, ups=0.09, wpb=64790, bsz=128, num_updates=11438, lr=9.99165e-05, gnorm=2.232, loss_scale=4, train_wall=11, gb_free=2.8, wall=131027
2021-06-20 07:02:44 | INFO | train_inner | epoch 004: 2501 / 3002 loss=2.438, ppl=5.42, wps=5847, ups=0.09, wpb=64932, bsz=128, num_updates=11439, lr=9.99165e-05, gnorm=2.1, loss_scale=4, train_wall=11, gb_free=2.8, wall=131038
2021-06-20 07:02:55 | INFO | train_inner | epoch 004: 2502 / 3002 loss=2.485, ppl=5.6, wps=5991.5, ups=0.09, wpb=64866, bsz=128, num_updates=11440, lr=9.99165e-05, gnorm=11.2, loss_scale=4, train_wall=10, gb_free=2.8, wall=131049
2021-06-20 07:03:06 | INFO | train_inner | epoch 004: 2503 / 3002 loss=2.474, ppl=5.56, wps=5872.3, ups=0.09, wpb=64774, bsz=128, num_updates=11441, lr=9.99165e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=131060
2021-06-20 07:03:17 | INFO | train_inner | epoch 004: 2504 / 3002 loss=2.6, ppl=6.06, wps=5868, ups=0.09, wpb=64860, bsz=128, num_updates=11442, lr=9.99165e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=131071
2021-06-20 07:03:28 | INFO | train_inner | epoch 004: 2505 / 3002 loss=2.486, ppl=5.6, wps=5874.6, ups=0.09, wpb=64827, bsz=128, num_updates=11443, lr=9.99164e-05, gnorm=4.772, loss_scale=4, train_wall=11, gb_free=2.8, wall=131082
2021-06-20 07:03:39 | INFO | train_inner | epoch 004: 2506 / 3002 loss=2.595, ppl=6.04, wps=5882, ups=0.09, wpb=64817, bsz=128, num_updates=11444, lr=9.99164e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=131093
2021-06-20 07:03:50 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-20 07:04:01 | INFO | train_inner | epoch 004: 2508 / 3002 loss=2.41, ppl=5.31, wps=2951.1, ups=0.05, wpb=64770, bsz=128, num_updates=11445, lr=9.99164e-05, gnorm=2.001, loss_scale=2, train_wall=21, gb_free=2.8, wall=131115
2021-06-20 07:04:12 | INFO | train_inner | epoch 004: 2509 / 3002 loss=2.59, ppl=6.02, wps=5909, ups=0.09, wpb=64900, bsz=128, num_updates=11446, lr=9.99164e-05, gnorm=2.053, loss_scale=2, train_wall=11, gb_free=2.8, wall=131126
2021-06-20 07:04:23 | INFO | train_inner | epoch 004: 2510 / 3002 loss=2.695, ppl=6.47, wps=5796.1, ups=0.09, wpb=64828, bsz=128, num_updates=11447, lr=9.99164e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=131137
2021-06-20 07:04:34 | INFO | train_inner | epoch 004: 2511 / 3002 loss=2.383, ppl=5.22, wps=5859.1, ups=0.09, wpb=64877, bsz=128, num_updates=11448, lr=9.99164e-05, gnorm=2.257, loss_scale=2, train_wall=11, gb_free=2.8, wall=131148
2021-06-20 07:04:45 | INFO | train_inner | epoch 004: 2512 / 3002 loss=2.648, ppl=6.27, wps=5854, ups=0.09, wpb=64881, bsz=128, num_updates=11449, lr=9.99164e-05, gnorm=2.364, loss_scale=2, train_wall=11, gb_free=2.8, wall=131159
2021-06-20 07:04:56 | INFO | train_inner | epoch 004: 2513 / 3002 loss=2.419, ppl=5.35, wps=5787.6, ups=0.09, wpb=64826, bsz=128, num_updates=11450, lr=9.99164e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=131171
2021-06-20 07:05:07 | INFO | train_inner | epoch 004: 2514 / 3002 loss=2.665, ppl=6.34, wps=5795.1, ups=0.09, wpb=64753, bsz=128, num_updates=11451, lr=9.99164e-05, gnorm=3.172, loss_scale=2, train_wall=11, gb_free=2.8, wall=131182
2021-06-20 07:05:19 | INFO | train_inner | epoch 004: 2515 / 3002 loss=2.656, ppl=6.3, wps=5841.3, ups=0.09, wpb=64790, bsz=128, num_updates=11452, lr=9.99164e-05, gnorm=2.226, loss_scale=2, train_wall=11, gb_free=2.8, wall=131193
2021-06-20 07:05:30 | INFO | train_inner | epoch 004: 2516 / 3002 loss=2.563, ppl=5.91, wps=5785.4, ups=0.09, wpb=64871, bsz=128, num_updates=11453, lr=9.99164e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=131204
2021-06-20 07:05:41 | INFO | train_inner | epoch 004: 2517 / 3002 loss=2.568, ppl=5.93, wps=5806.7, ups=0.09, wpb=64851, bsz=128, num_updates=11454, lr=9.99164e-05, gnorm=3.237, loss_scale=2, train_wall=11, gb_free=2.8, wall=131215
2021-06-20 07:05:52 | INFO | train_inner | epoch 004: 2518 / 3002 loss=2.458, ppl=5.5, wps=5975.2, ups=0.09, wpb=64823, bsz=128, num_updates=11455, lr=9.99164e-05, gnorm=2.056, loss_scale=2, train_wall=10, gb_free=2.8, wall=131226
2021-06-20 07:06:03 | INFO | train_inner | epoch 004: 2519 / 3002 loss=2.601, ppl=6.07, wps=5788.4, ups=0.09, wpb=64881, bsz=128, num_updates=11456, lr=9.99163e-05, gnorm=2.248, loss_scale=2, train_wall=11, gb_free=2.8, wall=131237
2021-06-20 07:06:14 | INFO | train_inner | epoch 004: 2520 / 3002 loss=2.539, ppl=5.81, wps=5805.9, ups=0.09, wpb=64727, bsz=128, num_updates=11457, lr=9.99163e-05, gnorm=2.403, loss_scale=2, train_wall=11, gb_free=2.8, wall=131248
2021-06-20 07:06:25 | INFO | train_inner | epoch 004: 2521 / 3002 loss=2.683, ppl=6.42, wps=5873.6, ups=0.09, wpb=64783, bsz=128, num_updates=11458, lr=9.99163e-05, gnorm=2.656, loss_scale=2, train_wall=11, gb_free=2.8, wall=131260
2021-06-20 07:06:36 | INFO | train_inner | epoch 004: 2522 / 3002 loss=2.58, ppl=5.98, wps=5901.1, ups=0.09, wpb=64802, bsz=128, num_updates=11459, lr=9.99163e-05, gnorm=2.767, loss_scale=2, train_wall=11, gb_free=2.8, wall=131270
2021-06-20 07:06:47 | INFO | train_inner | epoch 004: 2523 / 3002 loss=2.527, ppl=5.77, wps=5830.2, ups=0.09, wpb=64822, bsz=128, num_updates=11460, lr=9.99163e-05, gnorm=4.741, loss_scale=2, train_wall=11, gb_free=2.8, wall=131282
2021-06-20 07:06:58 | INFO | train_inner | epoch 004: 2524 / 3002 loss=2.475, ppl=5.56, wps=5812, ups=0.09, wpb=64867, bsz=128, num_updates=11461, lr=9.99163e-05, gnorm=5.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=131293
2021-06-20 07:07:09 | INFO | train_inner | epoch 004: 2525 / 3002 loss=2.607, ppl=6.09, wps=5860.5, ups=0.09, wpb=64843, bsz=128, num_updates=11462, lr=9.99163e-05, gnorm=2.234, loss_scale=2, train_wall=11, gb_free=2.8, wall=131304
2021-06-20 07:07:20 | INFO | train_inner | epoch 004: 2526 / 3002 loss=2.559, ppl=5.89, wps=5925.1, ups=0.09, wpb=64843, bsz=128, num_updates=11463, lr=9.99163e-05, gnorm=2.067, loss_scale=2, train_wall=10, gb_free=2.8, wall=131315
2021-06-20 07:07:31 | INFO | train_inner | epoch 004: 2527 / 3002 loss=2.573, ppl=5.95, wps=5934.7, ups=0.09, wpb=64819, bsz=128, num_updates=11464, lr=9.99163e-05, gnorm=2.231, loss_scale=2, train_wall=10, gb_free=2.8, wall=131326
2021-06-20 07:07:42 | INFO | train_inner | epoch 004: 2528 / 3002 loss=2.553, ppl=5.87, wps=5823.2, ups=0.09, wpb=64775, bsz=128, num_updates=11465, lr=9.99163e-05, gnorm=3.443, loss_scale=2, train_wall=11, gb_free=2.8, wall=131337
2021-06-20 07:07:54 | INFO | train_inner | epoch 004: 2529 / 3002 loss=2.656, ppl=6.3, wps=5747.5, ups=0.09, wpb=64813, bsz=128, num_updates=11466, lr=9.99163e-05, gnorm=2.325, loss_scale=2, train_wall=11, gb_free=2.8, wall=131348
2021-06-20 07:08:05 | INFO | train_inner | epoch 004: 2530 / 3002 loss=2.536, ppl=5.8, wps=5862, ups=0.09, wpb=64879, bsz=128, num_updates=11467, lr=9.99163e-05, gnorm=2.408, loss_scale=2, train_wall=11, gb_free=2.8, wall=131359
2021-06-20 07:08:16 | INFO | train_inner | epoch 004: 2531 / 3002 loss=2.608, ppl=6.1, wps=5729.7, ups=0.09, wpb=64825, bsz=128, num_updates=11468, lr=9.99162e-05, gnorm=2.461, loss_scale=2, train_wall=11, gb_free=2.8, wall=131370
2021-06-20 07:08:27 | INFO | train_inner | epoch 004: 2532 / 3002 loss=2.67, ppl=6.36, wps=6076.8, ups=0.09, wpb=64843, bsz=128, num_updates=11469, lr=9.99162e-05, gnorm=2.91, loss_scale=2, train_wall=10, gb_free=2.8, wall=131381
2021-06-20 07:08:38 | INFO | train_inner | epoch 004: 2533 / 3002 loss=2.601, ppl=6.07, wps=5782.8, ups=0.09, wpb=64800, bsz=128, num_updates=11470, lr=9.99162e-05, gnorm=6.658, loss_scale=2, train_wall=11, gb_free=2.8, wall=131392
2021-06-20 07:08:49 | INFO | train_inner | epoch 004: 2534 / 3002 loss=2.518, ppl=5.73, wps=5990.5, ups=0.09, wpb=64970, bsz=128, num_updates=11471, lr=9.99162e-05, gnorm=2.961, loss_scale=2, train_wall=10, gb_free=2.8, wall=131403
2021-06-20 07:09:00 | INFO | train_inner | epoch 004: 2535 / 3002 loss=2.556, ppl=5.88, wps=5784.5, ups=0.09, wpb=64893, bsz=128, num_updates=11472, lr=9.99162e-05, gnorm=2.224, loss_scale=2, train_wall=11, gb_free=2.8, wall=131414
2021-06-20 07:09:11 | INFO | train_inner | epoch 004: 2536 / 3002 loss=2.609, ppl=6.1, wps=5812, ups=0.09, wpb=64790, bsz=128, num_updates=11473, lr=9.99162e-05, gnorm=6.582, loss_scale=2, train_wall=11, gb_free=2.8, wall=131426
2021-06-20 07:09:22 | INFO | train_inner | epoch 004: 2537 / 3002 loss=2.576, ppl=5.96, wps=5851, ups=0.09, wpb=64803, bsz=128, num_updates=11474, lr=9.99162e-05, gnorm=2.719, loss_scale=2, train_wall=11, gb_free=2.8, wall=131437
2021-06-20 07:09:33 | INFO | train_inner | epoch 004: 2538 / 3002 loss=2.721, ppl=6.59, wps=5910.4, ups=0.09, wpb=64824, bsz=128, num_updates=11475, lr=9.99162e-05, gnorm=11.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=131448
2021-06-20 07:09:44 | INFO | train_inner | epoch 004: 2539 / 3002 loss=2.536, ppl=5.8, wps=6062.6, ups=0.09, wpb=64924, bsz=128, num_updates=11476, lr=9.99162e-05, gnorm=2.389, loss_scale=2, train_wall=10, gb_free=2.8, wall=131458
2021-06-20 07:09:55 | INFO | train_inner | epoch 004: 2540 / 3002 loss=2.516, ppl=5.72, wps=5781.1, ups=0.09, wpb=64787, bsz=128, num_updates=11477, lr=9.99162e-05, gnorm=11.505, loss_scale=2, train_wall=11, gb_free=2.8, wall=131470
2021-06-20 07:10:06 | INFO | train_inner | epoch 004: 2541 / 3002 loss=2.442, ppl=5.44, wps=5843.4, ups=0.09, wpb=64847, bsz=128, num_updates=11478, lr=9.99162e-05, gnorm=2.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=131481
2021-06-20 07:10:17 | INFO | train_inner | epoch 004: 2542 / 3002 loss=2.598, ppl=6.06, wps=5945.5, ups=0.09, wpb=64798, bsz=128, num_updates=11479, lr=9.99162e-05, gnorm=2.547, loss_scale=2, train_wall=10, gb_free=2.8, wall=131492
2021-06-20 07:10:28 | INFO | train_inner | epoch 004: 2543 / 3002 loss=2.527, ppl=5.76, wps=5755.2, ups=0.09, wpb=64913, bsz=128, num_updates=11480, lr=9.99162e-05, gnorm=2.39, loss_scale=2, train_wall=11, gb_free=2.8, wall=131503
2021-06-20 07:10:40 | INFO | train_inner | epoch 004: 2544 / 3002 loss=2.657, ppl=6.31, wps=5855.4, ups=0.09, wpb=64673, bsz=128, num_updates=11481, lr=9.99161e-05, gnorm=2.395, loss_scale=2, train_wall=11, gb_free=2.8, wall=131514
2021-06-20 07:10:51 | INFO | train_inner | epoch 004: 2545 / 3002 loss=2.526, ppl=5.76, wps=5825.6, ups=0.09, wpb=64846, bsz=128, num_updates=11482, lr=9.99161e-05, gnorm=2.355, loss_scale=2, train_wall=11, gb_free=2.8, wall=131525
2021-06-20 07:11:02 | INFO | train_inner | epoch 004: 2546 / 3002 loss=2.549, ppl=5.85, wps=5838, ups=0.09, wpb=64788, bsz=128, num_updates=11483, lr=9.99161e-05, gnorm=2.181, loss_scale=2, train_wall=11, gb_free=2.8, wall=131536
2021-06-20 07:11:13 | INFO | train_inner | epoch 004: 2547 / 3002 loss=2.569, ppl=5.93, wps=5741.2, ups=0.09, wpb=64834, bsz=128, num_updates=11484, lr=9.99161e-05, gnorm=2.317, loss_scale=2, train_wall=11, gb_free=2.8, wall=131547
2021-06-20 07:11:24 | INFO | train_inner | epoch 004: 2548 / 3002 loss=2.436, ppl=5.41, wps=5886.4, ups=0.09, wpb=64790, bsz=128, num_updates=11485, lr=9.99161e-05, gnorm=2.592, loss_scale=2, train_wall=11, gb_free=2.8, wall=131558
2021-06-20 07:11:35 | INFO | train_inner | epoch 004: 2549 / 3002 loss=2.73, ppl=6.64, wps=5821.4, ups=0.09, wpb=64913, bsz=128, num_updates=11486, lr=9.99161e-05, gnorm=4.453, loss_scale=2, train_wall=11, gb_free=2.8, wall=131570
2021-06-20 07:11:46 | INFO | train_inner | epoch 004: 2550 / 3002 loss=2.409, ppl=5.31, wps=5936.6, ups=0.09, wpb=64881, bsz=128, num_updates=11487, lr=9.99161e-05, gnorm=2.346, loss_scale=2, train_wall=10, gb_free=2.8, wall=131580
2021-06-20 07:11:57 | INFO | train_inner | epoch 004: 2551 / 3002 loss=2.555, ppl=5.88, wps=5937.6, ups=0.09, wpb=64836, bsz=128, num_updates=11488, lr=9.99161e-05, gnorm=2.191, loss_scale=2, train_wall=10, gb_free=2.8, wall=131591
2021-06-20 07:12:08 | INFO | train_inner | epoch 004: 2552 / 3002 loss=2.603, ppl=6.08, wps=5779.1, ups=0.09, wpb=64855, bsz=128, num_updates=11489, lr=9.99161e-05, gnorm=3.407, loss_scale=2, train_wall=11, gb_free=2.8, wall=131603
2021-06-20 07:12:19 | INFO | train_inner | epoch 004: 2553 / 3002 loss=2.734, ppl=6.65, wps=5935, ups=0.09, wpb=64895, bsz=128, num_updates=11490, lr=9.99161e-05, gnorm=2.962, loss_scale=2, train_wall=10, gb_free=2.8, wall=131614
2021-06-20 07:12:30 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
2021-06-20 07:12:41 | INFO | train_inner | epoch 004: 2555 / 3002 loss=2.644, ppl=6.25, wps=2962.1, ups=0.05, wpb=64865, bsz=128, num_updates=11491, lr=9.99161e-05, gnorm=2.491, loss_scale=1, train_wall=21, gb_free=2.8, wall=131635
2021-06-20 07:12:52 | INFO | train_inner | epoch 004: 2556 / 3002 loss=2.599, ppl=6.06, wps=5971.7, ups=0.09, wpb=64862, bsz=128, num_updates=11492, lr=9.99161e-05, gnorm=4.582, loss_scale=1, train_wall=10, gb_free=2.8, wall=131646
2021-06-20 07:13:03 | INFO | train_inner | epoch 004: 2557 / 3002 loss=2.697, ppl=6.49, wps=5963.7, ups=0.09, wpb=64841, bsz=128, num_updates=11493, lr=9.9916e-05, gnorm=2.513, loss_scale=1, train_wall=10, gb_free=2.8, wall=131657
2021-06-20 07:13:14 | INFO | train_inner | epoch 004: 2558 / 3002 loss=2.712, ppl=6.55, wps=5874.8, ups=0.09, wpb=64828, bsz=128, num_updates=11494, lr=9.9916e-05, gnorm=3.49, loss_scale=1, train_wall=11, gb_free=2.8, wall=131668
2021-06-20 07:13:25 | INFO | train_inner | epoch 004: 2559 / 3002 loss=2.877, ppl=7.35, wps=5838.3, ups=0.09, wpb=64748, bsz=128, num_updates=11495, lr=9.9916e-05, gnorm=6.398, loss_scale=1, train_wall=11, gb_free=2.8, wall=131679
2021-06-20 07:13:36 | INFO | train_inner | epoch 004: 2560 / 3002 loss=2.867, ppl=7.3, wps=5941.4, ups=0.09, wpb=64829, bsz=128, num_updates=11496, lr=9.9916e-05, gnorm=43.956, loss_scale=1, train_wall=10, gb_free=2.8, wall=131690
2021-06-20 07:13:47 | INFO | train_inner | epoch 004: 2561 / 3002 loss=2.753, ppl=6.74, wps=5925.5, ups=0.09, wpb=64882, bsz=128, num_updates=11497, lr=9.9916e-05, gnorm=4.981, loss_scale=1, train_wall=10, gb_free=2.8, wall=131701
2021-06-20 07:13:58 | INFO | train_inner | epoch 004: 2562 / 3002 loss=2.706, ppl=6.53, wps=5865.2, ups=0.09, wpb=64797, bsz=128, num_updates=11498, lr=9.9916e-05, gnorm=5.634, loss_scale=1, train_wall=11, gb_free=2.8, wall=131712
2021-06-20 07:14:09 | INFO | train_inner | epoch 004: 2563 / 3002 loss=2.603, ppl=6.08, wps=5759.7, ups=0.09, wpb=64785, bsz=128, num_updates=11499, lr=9.9916e-05, gnorm=2.975, loss_scale=1, train_wall=11, gb_free=2.8, wall=131723
2021-06-20 07:14:20 | INFO | train_inner | epoch 004: 2564 / 3002 loss=2.713, ppl=6.56, wps=5737.4, ups=0.09, wpb=64836, bsz=128, num_updates=11500, lr=9.9916e-05, gnorm=3.527, loss_scale=1, train_wall=11, gb_free=2.8, wall=131735
2021-06-20 07:14:31 | INFO | train_inner | epoch 004: 2565 / 3002 loss=2.639, ppl=6.23, wps=5890.1, ups=0.09, wpb=64806, bsz=128, num_updates=11501, lr=9.9916e-05, gnorm=3.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=131746
2021-06-20 07:14:42 | INFO | train_inner | epoch 004: 2566 / 3002 loss=2.82, ppl=7.06, wps=5863.2, ups=0.09, wpb=64809, bsz=128, num_updates=11502, lr=9.9916e-05, gnorm=24.159, loss_scale=1, train_wall=11, gb_free=2.8, wall=131757
2021-06-20 07:14:54 | INFO | train_inner | epoch 004: 2567 / 3002 loss=2.648, ppl=6.27, wps=5838.8, ups=0.09, wpb=64830, bsz=128, num_updates=11503, lr=9.9916e-05, gnorm=4.07, loss_scale=1, train_wall=11, gb_free=2.8, wall=131768
2021-06-20 07:15:05 | INFO | train_inner | epoch 004: 2568 / 3002 loss=2.764, ppl=6.79, wps=5846.9, ups=0.09, wpb=64867, bsz=128, num_updates=11504, lr=9.9916e-05, gnorm=8.996, loss_scale=1, train_wall=11, gb_free=2.8, wall=131779
2021-06-20 07:15:16 | INFO | train_inner | epoch 004: 2569 / 3002 loss=2.651, ppl=6.28, wps=5848.7, ups=0.09, wpb=64772, bsz=128, num_updates=11505, lr=9.9916e-05, gnorm=8.962, loss_scale=1, train_wall=11, gb_free=2.8, wall=131790
2021-06-20 07:15:27 | INFO | train_inner | epoch 004: 2570 / 3002 loss=2.463, ppl=5.51, wps=5849.3, ups=0.09, wpb=64861, bsz=128, num_updates=11506, lr=9.99159e-05, gnorm=3.288, loss_scale=1, train_wall=11, gb_free=2.8, wall=131801
2021-06-20 07:15:38 | INFO | train_inner | epoch 004: 2571 / 3002 loss=2.713, ppl=6.56, wps=5857.5, ups=0.09, wpb=64763, bsz=128, num_updates=11507, lr=9.99159e-05, gnorm=3.215, loss_scale=1, train_wall=11, gb_free=2.8, wall=131812
2021-06-20 07:15:49 | INFO | train_inner | epoch 004: 2572 / 3002 loss=2.596, ppl=6.05, wps=6056.7, ups=0.09, wpb=64744, bsz=128, num_updates=11508, lr=9.99159e-05, gnorm=2.59, loss_scale=1, train_wall=10, gb_free=2.8, wall=131823
2021-06-20 07:16:00 | INFO | train_inner | epoch 004: 2573 / 3002 loss=2.546, ppl=5.84, wps=5836.3, ups=0.09, wpb=64775, bsz=128, num_updates=11509, lr=9.99159e-05, gnorm=2.528, loss_scale=1, train_wall=11, gb_free=2.8, wall=131834
2021-06-20 07:16:11 | INFO | train_inner | epoch 004: 2574 / 3002 loss=2.643, ppl=6.24, wps=5858.5, ups=0.09, wpb=64830, bsz=128, num_updates=11510, lr=9.99159e-05, gnorm=3.786, loss_scale=1, train_wall=11, gb_free=2.8, wall=131845
2021-06-20 07:16:22 | INFO | train_inner | epoch 004: 2575 / 3002 loss=2.603, ppl=6.07, wps=5921.8, ups=0.09, wpb=64838, bsz=128, num_updates=11511, lr=9.99159e-05, gnorm=2.615, loss_scale=1, train_wall=11, gb_free=2.8, wall=131856
2021-06-20 07:16:33 | INFO | train_inner | epoch 004: 2576 / 3002 loss=2.759, ppl=6.77, wps=5807.1, ups=0.09, wpb=64835, bsz=128, num_updates=11512, lr=9.99159e-05, gnorm=2.609, loss_scale=1, train_wall=11, gb_free=2.8, wall=131867
2021-06-20 07:16:44 | INFO | train_inner | epoch 004: 2577 / 3002 loss=2.529, ppl=5.77, wps=5786.8, ups=0.09, wpb=64896, bsz=128, num_updates=11513, lr=9.99159e-05, gnorm=5.721, loss_scale=1, train_wall=11, gb_free=2.8, wall=131878
2021-06-20 07:16:55 | INFO | train_inner | epoch 004: 2578 / 3002 loss=2.606, ppl=6.09, wps=5894, ups=0.09, wpb=64780, bsz=128, num_updates=11514, lr=9.99159e-05, gnorm=4.129, loss_scale=1, train_wall=11, gb_free=2.8, wall=131889
2021-06-20 07:17:06 | INFO | train_inner | epoch 004: 2579 / 3002 loss=2.757, ppl=6.76, wps=5779.6, ups=0.09, wpb=64903, bsz=128, num_updates=11515, lr=9.99159e-05, gnorm=2.307, loss_scale=1, train_wall=11, gb_free=2.8, wall=131901
2021-06-20 07:17:17 | INFO | train_inner | epoch 004: 2580 / 3002 loss=2.547, ppl=5.85, wps=5907.3, ups=0.09, wpb=64819, bsz=128, num_updates=11516, lr=9.99159e-05, gnorm=2.761, loss_scale=1, train_wall=11, gb_free=2.8, wall=131912
2021-06-20 07:17:28 | INFO | train_inner | epoch 004: 2581 / 3002 loss=2.694, ppl=6.47, wps=5872.5, ups=0.09, wpb=64796, bsz=128, num_updates=11517, lr=9.99159e-05, gnorm=2.737, loss_scale=1, train_wall=11, gb_free=2.8, wall=131923
2021-06-20 07:17:40 | INFO | train_inner | epoch 004: 2582 / 3002 loss=2.595, ppl=6.04, wps=5784.5, ups=0.09, wpb=64829, bsz=128, num_updates=11518, lr=9.99158e-05, gnorm=2.609, loss_scale=1, train_wall=11, gb_free=2.8, wall=131934
2021-06-20 07:17:50 | INFO | train_inner | epoch 004: 2583 / 3002 loss=2.695, ppl=6.48, wps=5966.8, ups=0.09, wpb=64846, bsz=128, num_updates=11519, lr=9.99158e-05, gnorm=2.764, loss_scale=1, train_wall=10, gb_free=2.8, wall=131945
2021-06-20 07:18:01 | INFO | train_inner | epoch 004: 2584 / 3002 loss=2.731, ppl=6.64, wps=5840.6, ups=0.09, wpb=64817, bsz=128, num_updates=11520, lr=9.99158e-05, gnorm=2.228, loss_scale=1, train_wall=11, gb_free=2.8, wall=131956
2021-06-20 07:18:13 | INFO | train_inner | epoch 004: 2585 / 3002 loss=2.655, ppl=6.3, wps=5845.5, ups=0.09, wpb=64836, bsz=128, num_updates=11521, lr=9.99158e-05, gnorm=2.232, loss_scale=1, train_wall=11, gb_free=2.8, wall=131967
2021-06-20 07:18:24 | INFO | train_inner | epoch 004: 2586 / 3002 loss=2.604, ppl=6.08, wps=5805.4, ups=0.09, wpb=64819, bsz=128, num_updates=11522, lr=9.99158e-05, gnorm=3.16, loss_scale=1, train_wall=11, gb_free=2.8, wall=131978
2021-06-20 07:18:35 | INFO | train_inner | epoch 004: 2587 / 3002 loss=2.711, ppl=6.55, wps=5961.4, ups=0.09, wpb=64843, bsz=128, num_updates=11523, lr=9.99158e-05, gnorm=2.125, loss_scale=1, train_wall=10, gb_free=2.8, wall=131989
2021-06-20 07:18:46 | INFO | train_inner | epoch 004: 2588 / 3002 loss=2.582, ppl=5.99, wps=5900.9, ups=0.09, wpb=64851, bsz=128, num_updates=11524, lr=9.99158e-05, gnorm=3.085, loss_scale=1, train_wall=11, gb_free=2.8, wall=132000
2021-06-20 07:18:57 | INFO | train_inner | epoch 004: 2589 / 3002 loss=2.457, ppl=5.49, wps=5835.1, ups=0.09, wpb=64825, bsz=128, num_updates=11525, lr=9.99158e-05, gnorm=4.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=132011
2021-06-20 07:19:08 | INFO | train_inner | epoch 004: 2590 / 3002 loss=2.466, ppl=5.53, wps=5847.5, ups=0.09, wpb=64822, bsz=128, num_updates=11526, lr=9.99158e-05, gnorm=2.125, loss_scale=1, train_wall=11, gb_free=2.8, wall=132022
2021-06-20 07:19:19 | INFO | train_inner | epoch 004: 2591 / 3002 loss=2.621, ppl=6.15, wps=5925.8, ups=0.09, wpb=64832, bsz=128, num_updates=11527, lr=9.99158e-05, gnorm=2.309, loss_scale=1, train_wall=10, gb_free=2.8, wall=132033
2021-06-20 07:19:30 | INFO | train_inner | epoch 004: 2592 / 3002 loss=2.601, ppl=6.07, wps=5904.6, ups=0.09, wpb=64787, bsz=128, num_updates=11528, lr=9.99158e-05, gnorm=2.145, loss_scale=1, train_wall=11, gb_free=2.8, wall=132044
2021-06-20 07:19:41 | INFO | train_inner | epoch 004: 2593 / 3002 loss=2.605, ppl=6.09, wps=5916.9, ups=0.09, wpb=64905, bsz=128, num_updates=11529, lr=9.99158e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=132055
2021-06-20 07:19:52 | INFO | train_inner | epoch 004: 2594 / 3002 loss=2.423, ppl=5.36, wps=5829.8, ups=0.09, wpb=64755, bsz=128, num_updates=11530, lr=9.99158e-05, gnorm=2.201, loss_scale=1, train_wall=11, gb_free=2.8, wall=132066
2021-06-20 07:20:03 | INFO | train_inner | epoch 004: 2595 / 3002 loss=2.632, ppl=6.2, wps=5875.7, ups=0.09, wpb=64870, bsz=128, num_updates=11531, lr=9.99157e-05, gnorm=2.235, loss_scale=1, train_wall=11, gb_free=2.8, wall=132077
2021-06-20 07:20:14 | INFO | train_inner | epoch 004: 2596 / 3002 loss=2.495, ppl=5.64, wps=5821.1, ups=0.09, wpb=64834, bsz=128, num_updates=11532, lr=9.99157e-05, gnorm=2.06, loss_scale=1, train_wall=11, gb_free=2.8, wall=132088
2021-06-20 07:20:25 | INFO | train_inner | epoch 004: 2597 / 3002 loss=2.511, ppl=5.7, wps=5852.7, ups=0.09, wpb=64783, bsz=128, num_updates=11533, lr=9.99157e-05, gnorm=2.154, loss_scale=1, train_wall=11, gb_free=2.8, wall=132099
2021-06-20 07:20:36 | INFO | train_inner | epoch 004: 2598 / 3002 loss=2.388, ppl=5.23, wps=6002.3, ups=0.09, wpb=64843, bsz=128, num_updates=11534, lr=9.99157e-05, gnorm=2.075, loss_scale=1, train_wall=10, gb_free=2.8, wall=132110
2021-06-20 07:20:47 | INFO | train_inner | epoch 004: 2599 / 3002 loss=2.431, ppl=5.39, wps=5839, ups=0.09, wpb=64783, bsz=128, num_updates=11535, lr=9.99157e-05, gnorm=2.116, loss_scale=1, train_wall=11, gb_free=2.8, wall=132121
2021-06-20 07:20:58 | INFO | train_inner | epoch 004: 2600 / 3002 loss=2.539, ppl=5.81, wps=6052.8, ups=0.09, wpb=64876, bsz=128, num_updates=11536, lr=9.99157e-05, gnorm=2.246, loss_scale=1, train_wall=10, gb_free=2.8, wall=132132
2021-06-20 07:21:09 | INFO | train_inner | epoch 004: 2601 / 3002 loss=2.705, ppl=6.52, wps=5858.2, ups=0.09, wpb=64840, bsz=128, num_updates=11537, lr=9.99157e-05, gnorm=2.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=132143
2021-06-20 07:21:20 | INFO | train_inner | epoch 004: 2602 / 3002 loss=2.717, ppl=6.57, wps=5916.2, ups=0.09, wpb=64848, bsz=128, num_updates=11538, lr=9.99157e-05, gnorm=2.309, loss_scale=1, train_wall=11, gb_free=2.8, wall=132154
2021-06-20 07:21:31 | INFO | train_inner | epoch 004: 2603 / 3002 loss=2.551, ppl=5.86, wps=5871.5, ups=0.09, wpb=64824, bsz=128, num_updates=11539, lr=9.99157e-05, gnorm=2.15, loss_scale=1, train_wall=11, gb_free=2.8, wall=132165
2021-06-20 07:21:42 | INFO | train_inner | epoch 004: 2604 / 3002 loss=2.599, ppl=6.06, wps=5878.6, ups=0.09, wpb=64894, bsz=128, num_updates=11540, lr=9.99157e-05, gnorm=2.196, loss_scale=1, train_wall=11, gb_free=2.8, wall=132176
2021-06-20 07:21:53 | INFO | train_inner | epoch 004: 2605 / 3002 loss=2.55, ppl=5.86, wps=5816.5, ups=0.09, wpb=64839, bsz=128, num_updates=11541, lr=9.99157e-05, gnorm=2.163, loss_scale=1, train_wall=11, gb_free=2.8, wall=132187
2021-06-20 07:22:04 | INFO | train_inner | epoch 004: 2606 / 3002 loss=2.654, ppl=6.29, wps=5863.2, ups=0.09, wpb=64715, bsz=128, num_updates=11542, lr=9.99157e-05, gnorm=2.119, loss_scale=1, train_wall=11, gb_free=2.8, wall=132198
2021-06-20 07:22:15 | INFO | train_inner | epoch 004: 2607 / 3002 loss=2.416, ppl=5.34, wps=5890.6, ups=0.09, wpb=64813, bsz=128, num_updates=11543, lr=9.99156e-05, gnorm=2.225, loss_scale=1, train_wall=11, gb_free=2.8, wall=132209
2021-06-20 07:22:26 | INFO | train_inner | epoch 004: 2608 / 3002 loss=2.509, ppl=5.69, wps=5850.3, ups=0.09, wpb=64842, bsz=128, num_updates=11544, lr=9.99156e-05, gnorm=2.04, loss_scale=1, train_wall=11, gb_free=2.8, wall=132220
2021-06-20 07:22:37 | INFO | train_inner | epoch 004: 2609 / 3002 loss=2.393, ppl=5.25, wps=5830.8, ups=0.09, wpb=64806, bsz=128, num_updates=11545, lr=9.99156e-05, gnorm=2.043, loss_scale=1, train_wall=11, gb_free=2.8, wall=132231
2021-06-20 07:22:48 | INFO | train_inner | epoch 004: 2610 / 3002 loss=2.538, ppl=5.81, wps=5772.1, ups=0.09, wpb=64760, bsz=128, num_updates=11546, lr=9.99156e-05, gnorm=2.082, loss_scale=1, train_wall=11, gb_free=2.8, wall=132243
2021-06-20 07:22:59 | INFO | train_inner | epoch 004: 2611 / 3002 loss=2.442, ppl=5.43, wps=5859.7, ups=0.09, wpb=64848, bsz=128, num_updates=11547, lr=9.99156e-05, gnorm=2.018, loss_scale=1, train_wall=11, gb_free=2.8, wall=132254
2021-06-20 07:23:11 | INFO | train_inner | epoch 004: 2612 / 3002 loss=2.642, ppl=6.24, wps=5838.5, ups=0.09, wpb=64794, bsz=128, num_updates=11548, lr=9.99156e-05, gnorm=2.03, loss_scale=1, train_wall=11, gb_free=2.8, wall=132265
2021-06-20 07:23:22 | INFO | train_inner | epoch 004: 2613 / 3002 loss=2.538, ppl=5.81, wps=5883.7, ups=0.09, wpb=64815, bsz=128, num_updates=11549, lr=9.99156e-05, gnorm=2.064, loss_scale=1, train_wall=11, gb_free=2.8, wall=132276
2021-06-20 07:23:33 | INFO | train_inner | epoch 004: 2614 / 3002 loss=2.505, ppl=5.68, wps=5839.3, ups=0.09, wpb=64790, bsz=128, num_updates=11550, lr=9.99156e-05, gnorm=2.177, loss_scale=1, train_wall=11, gb_free=2.8, wall=132287
2021-06-20 07:23:44 | INFO | train_inner | epoch 004: 2615 / 3002 loss=2.531, ppl=5.78, wps=5902, ups=0.09, wpb=64762, bsz=128, num_updates=11551, lr=9.99156e-05, gnorm=2.001, loss_scale=1, train_wall=10, gb_free=2.8, wall=132298
2021-06-20 07:23:55 | INFO | train_inner | epoch 004: 2616 / 3002 loss=2.453, ppl=5.48, wps=5913.1, ups=0.09, wpb=64754, bsz=128, num_updates=11552, lr=9.99156e-05, gnorm=2.172, loss_scale=1, train_wall=11, gb_free=2.8, wall=132309
2021-06-20 07:24:06 | INFO | train_inner | epoch 004: 2617 / 3002 loss=2.6, ppl=6.06, wps=5854.3, ups=0.09, wpb=64855, bsz=128, num_updates=11553, lr=9.99156e-05, gnorm=2.057, loss_scale=1, train_wall=11, gb_free=2.8, wall=132320
2021-06-20 07:24:17 | INFO | train_inner | epoch 004: 2618 / 3002 loss=2.684, ppl=6.43, wps=5914.1, ups=0.09, wpb=64802, bsz=128, num_updates=11554, lr=9.99156e-05, gnorm=2.29, loss_scale=1, train_wall=10, gb_free=2.8, wall=132331
2021-06-20 07:24:28 | INFO | train_inner | epoch 004: 2619 / 3002 loss=2.73, ppl=6.64, wps=5941.7, ups=0.09, wpb=64864, bsz=128, num_updates=11555, lr=9.99156e-05, gnorm=2.128, loss_scale=1, train_wall=10, gb_free=2.8, wall=132342
2021-06-20 07:24:38 | INFO | train_inner | epoch 004: 2620 / 3002 loss=2.647, ppl=6.26, wps=5944.9, ups=0.09, wpb=64817, bsz=128, num_updates=11556, lr=9.99155e-05, gnorm=2.037, loss_scale=1, train_wall=10, gb_free=2.8, wall=132353
2021-06-20 07:24:50 | INFO | train_inner | epoch 004: 2621 / 3002 loss=2.764, ppl=6.79, wps=5848.7, ups=0.09, wpb=64781, bsz=128, num_updates=11557, lr=9.99155e-05, gnorm=4.047, loss_scale=1, train_wall=11, gb_free=2.8, wall=132364
2021-06-20 07:25:01 | INFO | train_inner | epoch 004: 2622 / 3002 loss=2.518, ppl=5.73, wps=5884.4, ups=0.09, wpb=64822, bsz=128, num_updates=11558, lr=9.99155e-05, gnorm=2.108, loss_scale=1, train_wall=11, gb_free=2.8, wall=132375
2021-06-20 07:25:11 | INFO | train_inner | epoch 004: 2623 / 3002 loss=2.535, ppl=5.79, wps=6005.3, ups=0.09, wpb=64818, bsz=128, num_updates=11559, lr=9.99155e-05, gnorm=2.072, loss_scale=1, train_wall=10, gb_free=2.8, wall=132386
2021-06-20 07:25:22 | INFO | train_inner | epoch 004: 2624 / 3002 loss=2.488, ppl=5.61, wps=5861.4, ups=0.09, wpb=64821, bsz=128, num_updates=11560, lr=9.99155e-05, gnorm=2.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=132397
2021-06-20 07:25:33 | INFO | train_inner | epoch 004: 2625 / 3002 loss=2.689, ppl=6.45, wps=5861.6, ups=0.09, wpb=64828, bsz=128, num_updates=11561, lr=9.99155e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=132408
2021-06-20 07:25:44 | INFO | train_inner | epoch 004: 2626 / 3002 loss=2.645, ppl=6.25, wps=5846.8, ups=0.09, wpb=64624, bsz=128, num_updates=11562, lr=9.99155e-05, gnorm=2.045, loss_scale=1, train_wall=11, gb_free=2.8, wall=132419
2021-06-20 07:25:56 | INFO | train_inner | epoch 004: 2627 / 3002 loss=2.605, ppl=6.09, wps=5833.3, ups=0.09, wpb=64886, bsz=128, num_updates=11563, lr=9.99155e-05, gnorm=2.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=132430
2021-06-20 07:26:07 | INFO | train_inner | epoch 004: 2628 / 3002 loss=2.49, ppl=5.62, wps=5824.1, ups=0.09, wpb=64817, bsz=128, num_updates=11564, lr=9.99155e-05, gnorm=2.098, loss_scale=1, train_wall=11, gb_free=2.8, wall=132441
2021-06-20 07:26:18 | INFO | train_inner | epoch 004: 2629 / 3002 loss=2.563, ppl=5.91, wps=5947.5, ups=0.09, wpb=64831, bsz=128, num_updates=11565, lr=9.99155e-05, gnorm=2.034, loss_scale=1, train_wall=10, gb_free=2.8, wall=132452
2021-06-20 07:26:29 | INFO | train_inner | epoch 004: 2630 / 3002 loss=2.597, ppl=6.05, wps=5878.5, ups=0.09, wpb=64802, bsz=128, num_updates=11566, lr=9.99155e-05, gnorm=2.348, loss_scale=1, train_wall=11, gb_free=2.8, wall=132463
2021-06-20 07:26:40 | INFO | train_inner | epoch 004: 2631 / 3002 loss=2.38, ppl=5.21, wps=5891.5, ups=0.09, wpb=64878, bsz=128, num_updates=11567, lr=9.99155e-05, gnorm=1.993, loss_scale=1, train_wall=11, gb_free=2.8, wall=132474
2021-06-20 07:26:51 | INFO | train_inner | epoch 004: 2632 / 3002 loss=2.589, ppl=6.02, wps=5875.7, ups=0.09, wpb=64791, bsz=128, num_updates=11568, lr=9.99154e-05, gnorm=2.359, loss_scale=1, train_wall=11, gb_free=2.8, wall=132485
2021-06-20 07:27:02 | INFO | train_inner | epoch 004: 2633 / 3002 loss=2.567, ppl=5.93, wps=5955, ups=0.09, wpb=64831, bsz=128, num_updates=11569, lr=9.99154e-05, gnorm=2.081, loss_scale=1, train_wall=10, gb_free=2.8, wall=132496
2021-06-20 07:27:13 | INFO | train_inner | epoch 004: 2634 / 3002 loss=2.569, ppl=5.94, wps=5743.1, ups=0.09, wpb=64758, bsz=128, num_updates=11570, lr=9.99154e-05, gnorm=4.927, loss_scale=1, train_wall=11, gb_free=2.8, wall=132507
2021-06-20 07:27:24 | INFO | train_inner | epoch 004: 2635 / 3002 loss=2.415, ppl=5.33, wps=5872.7, ups=0.09, wpb=64700, bsz=128, num_updates=11571, lr=9.99154e-05, gnorm=1.936, loss_scale=1, train_wall=11, gb_free=2.8, wall=132518
2021-06-20 07:27:35 | INFO | train_inner | epoch 004: 2636 / 3002 loss=2.719, ppl=6.58, wps=5724.5, ups=0.09, wpb=64823, bsz=128, num_updates=11572, lr=9.99154e-05, gnorm=2.056, loss_scale=1, train_wall=11, gb_free=2.8, wall=132530
2021-06-20 07:27:46 | INFO | train_inner | epoch 004: 2637 / 3002 loss=2.622, ppl=6.16, wps=5885, ups=0.09, wpb=64826, bsz=128, num_updates=11573, lr=9.99154e-05, gnorm=2.293, loss_scale=1, train_wall=11, gb_free=2.8, wall=132541
2021-06-20 07:27:57 | INFO | train_inner | epoch 004: 2638 / 3002 loss=2.482, ppl=5.59, wps=5943.6, ups=0.09, wpb=64845, bsz=128, num_updates=11574, lr=9.99154e-05, gnorm=1.911, loss_scale=1, train_wall=10, gb_free=2.8, wall=132551
2021-06-20 07:28:08 | INFO | train_inner | epoch 004: 2639 / 3002 loss=2.5, ppl=5.66, wps=5777.3, ups=0.09, wpb=64785, bsz=128, num_updates=11575, lr=9.99154e-05, gnorm=2.036, loss_scale=1, train_wall=11, gb_free=2.8, wall=132563
2021-06-20 07:28:19 | INFO | train_inner | epoch 004: 2640 / 3002 loss=2.448, ppl=5.46, wps=5871.5, ups=0.09, wpb=64821, bsz=128, num_updates=11576, lr=9.99154e-05, gnorm=46.11, loss_scale=1, train_wall=11, gb_free=2.8, wall=132574
2021-06-20 07:28:30 | INFO | train_inner | epoch 004: 2641 / 3002 loss=2.581, ppl=5.98, wps=5902.8, ups=0.09, wpb=64838, bsz=128, num_updates=11577, lr=9.99154e-05, gnorm=2.131, loss_scale=1, train_wall=11, gb_free=2.8, wall=132585
2021-06-20 07:28:42 | INFO | train_inner | epoch 004: 2642 / 3002 loss=2.559, ppl=5.89, wps=5829, ups=0.09, wpb=64882, bsz=128, num_updates=11578, lr=9.99154e-05, gnorm=2.147, loss_scale=1, train_wall=11, gb_free=2.8, wall=132596
2021-06-20 07:28:53 | INFO | train_inner | epoch 004: 2643 / 3002 loss=2.785, ppl=6.89, wps=5835.9, ups=0.09, wpb=64808, bsz=128, num_updates=11579, lr=9.99154e-05, gnorm=3.988, loss_scale=1, train_wall=11, gb_free=2.8, wall=132607
2021-06-20 07:29:04 | INFO | train_inner | epoch 004: 2644 / 3002 loss=2.461, ppl=5.51, wps=5832.1, ups=0.09, wpb=64829, bsz=128, num_updates=11580, lr=9.99154e-05, gnorm=1.945, loss_scale=1, train_wall=11, gb_free=2.8, wall=132618
2021-06-20 07:29:15 | INFO | train_inner | epoch 004: 2645 / 3002 loss=2.656, ppl=6.3, wps=5816.8, ups=0.09, wpb=64749, bsz=128, num_updates=11581, lr=9.99153e-05, gnorm=2.463, loss_scale=1, train_wall=11, gb_free=2.8, wall=132629
2021-06-20 07:29:26 | INFO | train_inner | epoch 004: 2646 / 3002 loss=2.539, ppl=5.81, wps=5835.9, ups=0.09, wpb=64836, bsz=128, num_updates=11582, lr=9.99153e-05, gnorm=2.063, loss_scale=1, train_wall=11, gb_free=2.8, wall=132640
2021-06-20 07:29:37 | INFO | train_inner | epoch 004: 2647 / 3002 loss=2.401, ppl=5.28, wps=5877.4, ups=0.09, wpb=64826, bsz=128, num_updates=11583, lr=9.99153e-05, gnorm=2.537, loss_scale=1, train_wall=11, gb_free=2.8, wall=132651
2021-06-20 07:29:48 | INFO | train_inner | epoch 004: 2648 / 3002 loss=2.469, ppl=5.54, wps=5842.5, ups=0.09, wpb=64739, bsz=128, num_updates=11584, lr=9.99153e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=132662
2021-06-20 07:29:59 | INFO | train_inner | epoch 004: 2649 / 3002 loss=2.607, ppl=6.09, wps=5834.1, ups=0.09, wpb=64811, bsz=128, num_updates=11585, lr=9.99153e-05, gnorm=2.229, loss_scale=1, train_wall=11, gb_free=2.8, wall=132674
2021-06-20 07:30:10 | INFO | train_inner | epoch 004: 2650 / 3002 loss=2.582, ppl=5.99, wps=5902.6, ups=0.09, wpb=64809, bsz=128, num_updates=11586, lr=9.99153e-05, gnorm=2.217, loss_scale=1, train_wall=11, gb_free=2.8, wall=132685
2021-06-20 07:30:21 | INFO | train_inner | epoch 004: 2651 / 3002 loss=2.454, ppl=5.48, wps=5870.5, ups=0.09, wpb=64767, bsz=128, num_updates=11587, lr=9.99153e-05, gnorm=7.223, loss_scale=1, train_wall=11, gb_free=2.8, wall=132696
2021-06-20 07:30:32 | INFO | train_inner | epoch 004: 2652 / 3002 loss=2.496, ppl=5.64, wps=5942.8, ups=0.09, wpb=64868, bsz=128, num_updates=11588, lr=9.99153e-05, gnorm=2.552, loss_scale=1, train_wall=10, gb_free=2.8, wall=132706
2021-06-20 07:30:43 | INFO | train_inner | epoch 004: 2653 / 3002 loss=2.584, ppl=6, wps=5859.8, ups=0.09, wpb=64825, bsz=128, num_updates=11589, lr=9.99153e-05, gnorm=9.119, loss_scale=1, train_wall=11, gb_free=2.8, wall=132718
2021-06-20 07:30:54 | INFO | train_inner | epoch 004: 2654 / 3002 loss=2.696, ppl=6.48, wps=5913.3, ups=0.09, wpb=64749, bsz=128, num_updates=11590, lr=9.99153e-05, gnorm=2.156, loss_scale=1, train_wall=10, gb_free=2.8, wall=132728
2021-06-20 07:31:05 | INFO | train_inner | epoch 004: 2655 / 3002 loss=2.598, ppl=6.06, wps=5873.1, ups=0.09, wpb=64850, bsz=128, num_updates=11591, lr=9.99153e-05, gnorm=5.834, loss_scale=1, train_wall=11, gb_free=2.8, wall=132740
2021-06-20 07:31:16 | INFO | train_inner | epoch 004: 2656 / 3002 loss=2.548, ppl=5.85, wps=5851.5, ups=0.09, wpb=64854, bsz=128, num_updates=11592, lr=9.99153e-05, gnorm=2.403, loss_scale=1, train_wall=11, gb_free=2.8, wall=132751
2021-06-20 07:31:27 | INFO | train_inner | epoch 004: 2657 / 3002 loss=2.536, ppl=5.8, wps=5784.2, ups=0.09, wpb=64826, bsz=128, num_updates=11593, lr=9.99152e-05, gnorm=2.929, loss_scale=1, train_wall=11, gb_free=2.8, wall=132762
2021-06-20 07:31:39 | INFO | train_inner | epoch 004: 2658 / 3002 loss=2.705, ppl=6.52, wps=5770.7, ups=0.09, wpb=64813, bsz=128, num_updates=11594, lr=9.99152e-05, gnorm=2.804, loss_scale=1, train_wall=11, gb_free=2.8, wall=132773
2021-06-20 07:31:50 | INFO | train_inner | epoch 004: 2659 / 3002 loss=2.738, ppl=6.67, wps=5974.1, ups=0.09, wpb=64891, bsz=128, num_updates=11595, lr=9.99152e-05, gnorm=5.11, loss_scale=1, train_wall=10, gb_free=2.8, wall=132784
2021-06-20 07:32:01 | INFO | train_inner | epoch 004: 2660 / 3002 loss=2.541, ppl=5.82, wps=5891.4, ups=0.09, wpb=64893, bsz=128, num_updates=11596, lr=9.99152e-05, gnorm=3.674, loss_scale=1, train_wall=11, gb_free=2.8, wall=132795
2021-06-20 07:32:12 | INFO | train_inner | epoch 004: 2661 / 3002 loss=2.563, ppl=5.91, wps=5832.7, ups=0.09, wpb=64894, bsz=128, num_updates=11597, lr=9.99152e-05, gnorm=10.939, loss_scale=1, train_wall=11, gb_free=2.8, wall=132806
2021-06-20 07:32:23 | INFO | train_inner | epoch 004: 2662 / 3002 loss=2.691, ppl=6.46, wps=5910.9, ups=0.09, wpb=64821, bsz=128, num_updates=11598, lr=9.99152e-05, gnorm=2.564, loss_scale=1, train_wall=11, gb_free=2.8, wall=132817
2021-06-20 07:32:34 | INFO | train_inner | epoch 004: 2663 / 3002 loss=2.606, ppl=6.09, wps=5861.5, ups=0.09, wpb=64852, bsz=128, num_updates=11599, lr=9.99152e-05, gnorm=3.21, loss_scale=1, train_wall=11, gb_free=2.8, wall=132828
2021-06-20 07:32:45 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5
2021-06-20 07:32:56 | INFO | train_inner | epoch 004: 2665 / 3002 loss=2.815, ppl=7.04, wps=2921.5, ups=0.05, wpb=64814, bsz=128, num_updates=11600, lr=9.99152e-05, gnorm=7.311, loss_scale=0.5, train_wall=21, gb_free=2.8, wall=132850
2021-06-20 07:33:07 | INFO | train_inner | epoch 004: 2666 / 3002 loss=2.573, ppl=5.95, wps=5860, ups=0.09, wpb=64886, bsz=128, num_updates=11601, lr=9.99152e-05, gnorm=3.505, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132861
2021-06-20 07:33:18 | INFO | train_inner | epoch 004: 2667 / 3002 loss=2.519, ppl=5.73, wps=5898.8, ups=0.09, wpb=64821, bsz=128, num_updates=11602, lr=9.99152e-05, gnorm=4.59, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132872
2021-06-20 07:33:29 | INFO | train_inner | epoch 004: 2668 / 3002 loss=2.684, ppl=6.42, wps=5752.4, ups=0.09, wpb=64849, bsz=128, num_updates=11603, lr=9.99152e-05, gnorm=4.197, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132884
2021-06-20 07:33:40 | INFO | train_inner | epoch 004: 2669 / 3002 loss=2.65, ppl=6.28, wps=5824.6, ups=0.09, wpb=64867, bsz=128, num_updates=11604, lr=9.99152e-05, gnorm=5.469, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132895
2021-06-20 07:33:52 | INFO | train_inner | epoch 004: 2670 / 3002 loss=2.627, ppl=6.18, wps=5808.6, ups=0.09, wpb=64852, bsz=128, num_updates=11605, lr=9.99152e-05, gnorm=2.291, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132906
2021-06-20 07:34:02 | INFO | train_inner | epoch 004: 2671 / 3002 loss=2.637, ppl=6.22, wps=5971.2, ups=0.09, wpb=64789, bsz=128, num_updates=11606, lr=9.99151e-05, gnorm=4.452, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=132917
2021-06-20 07:34:13 | INFO | train_inner | epoch 004: 2672 / 3002 loss=2.566, ppl=5.92, wps=5914.1, ups=0.09, wpb=64795, bsz=128, num_updates=11607, lr=9.99151e-05, gnorm=2.516, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=132928
2021-06-20 07:34:24 | INFO | train_inner | epoch 004: 2673 / 3002 loss=2.597, ppl=6.05, wps=5862.4, ups=0.09, wpb=64812, bsz=128, num_updates=11608, lr=9.99151e-05, gnorm=3.414, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132939
2021-06-20 07:34:36 | INFO | train_inner | epoch 004: 2674 / 3002 loss=2.605, ppl=6.09, wps=5804.3, ups=0.09, wpb=64821, bsz=128, num_updates=11609, lr=9.99151e-05, gnorm=2.666, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132950
2021-06-20 07:34:47 | INFO | train_inner | epoch 004: 2675 / 3002 loss=2.648, ppl=6.27, wps=5834.7, ups=0.09, wpb=64867, bsz=128, num_updates=11610, lr=9.99151e-05, gnorm=2.324, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132961
2021-06-20 07:34:58 | INFO | train_inner | epoch 004: 2676 / 3002 loss=2.733, ppl=6.65, wps=5904, ups=0.09, wpb=64900, bsz=128, num_updates=11611, lr=9.99151e-05, gnorm=2.906, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132972
2021-06-20 07:35:09 | INFO | train_inner | epoch 004: 2677 / 3002 loss=2.461, ppl=5.5, wps=6009.5, ups=0.09, wpb=64879, bsz=128, num_updates=11612, lr=9.99151e-05, gnorm=2.149, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=132983
2021-06-20 07:35:20 | INFO | train_inner | epoch 004: 2678 / 3002 loss=2.418, ppl=5.34, wps=5758.3, ups=0.09, wpb=64849, bsz=128, num_updates=11613, lr=9.99151e-05, gnorm=18.579, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=132994
2021-06-20 07:35:31 | INFO | train_inner | epoch 004: 2679 / 3002 loss=2.646, ppl=6.26, wps=5832.1, ups=0.09, wpb=64811, bsz=128, num_updates=11614, lr=9.99151e-05, gnorm=3.547, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133005
2021-06-20 07:35:42 | INFO | train_inner | epoch 004: 2680 / 3002 loss=2.534, ppl=5.79, wps=5840.8, ups=0.09, wpb=64829, bsz=128, num_updates=11615, lr=9.99151e-05, gnorm=5.3, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133016
2021-06-20 07:35:53 | INFO | train_inner | epoch 004: 2681 / 3002 loss=2.558, ppl=5.89, wps=5785.7, ups=0.09, wpb=64780, bsz=128, num_updates=11616, lr=9.99151e-05, gnorm=4.523, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133028
2021-06-20 07:36:04 | INFO | train_inner | epoch 004: 2682 / 3002 loss=2.51, ppl=5.7, wps=5898, ups=0.09, wpb=64854, bsz=128, num_updates=11617, lr=9.99151e-05, gnorm=2.477, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133039
2021-06-20 07:36:15 | INFO | train_inner | epoch 004: 2683 / 3002 loss=2.729, ppl=6.63, wps=5746.6, ups=0.09, wpb=64817, bsz=128, num_updates=11618, lr=9.9915e-05, gnorm=2.663, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133050
2021-06-20 07:36:26 | INFO | train_inner | epoch 004: 2684 / 3002 loss=2.736, ppl=6.66, wps=5948.8, ups=0.09, wpb=64766, bsz=128, num_updates=11619, lr=9.9915e-05, gnorm=2.983, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133061
2021-06-20 07:36:37 | INFO | train_inner | epoch 004: 2685 / 3002 loss=2.598, ppl=6.05, wps=5844.1, ups=0.09, wpb=64810, bsz=128, num_updates=11620, lr=9.9915e-05, gnorm=2.55, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133072
2021-06-20 07:36:48 | INFO | train_inner | epoch 004: 2686 / 3002 loss=2.634, ppl=6.21, wps=6015.8, ups=0.09, wpb=64918, bsz=128, num_updates=11621, lr=9.9915e-05, gnorm=2.277, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133083
2021-06-20 07:36:59 | INFO | train_inner | epoch 004: 2687 / 3002 loss=2.45, ppl=5.46, wps=5948.2, ups=0.09, wpb=64883, bsz=128, num_updates=11622, lr=9.9915e-05, gnorm=2.426, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133093
2021-06-20 07:37:10 | INFO | train_inner | epoch 004: 2688 / 3002 loss=2.509, ppl=5.69, wps=5797.5, ups=0.09, wpb=64765, bsz=128, num_updates=11623, lr=9.9915e-05, gnorm=3.102, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133105
2021-06-20 07:37:22 | INFO | train_inner | epoch 004: 2689 / 3002 loss=2.694, ppl=6.47, wps=5753.3, ups=0.09, wpb=64818, bsz=128, num_updates=11624, lr=9.9915e-05, gnorm=2.188, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133116
2021-06-20 07:37:33 | INFO | train_inner | epoch 004: 2690 / 3002 loss=2.575, ppl=5.96, wps=5798.2, ups=0.09, wpb=64852, bsz=128, num_updates=11625, lr=9.9915e-05, gnorm=2.077, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133127
2021-06-20 07:37:44 | INFO | train_inner | epoch 004: 2691 / 3002 loss=2.737, ppl=6.66, wps=5765.3, ups=0.09, wpb=64850, bsz=128, num_updates=11626, lr=9.9915e-05, gnorm=2.105, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133138
2021-06-20 07:37:55 | INFO | train_inner | epoch 004: 2692 / 3002 loss=2.608, ppl=6.1, wps=5932, ups=0.09, wpb=64839, bsz=128, num_updates=11627, lr=9.9915e-05, gnorm=2.226, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133149
2021-06-20 07:38:06 | INFO | train_inner | epoch 004: 2693 / 3002 loss=2.575, ppl=5.96, wps=5879.5, ups=0.09, wpb=64852, bsz=128, num_updates=11628, lr=9.9915e-05, gnorm=2.056, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133160
2021-06-20 07:38:17 | INFO | train_inner | epoch 004: 2694 / 3002 loss=2.572, ppl=5.95, wps=5811.8, ups=0.09, wpb=64828, bsz=128, num_updates=11629, lr=9.9915e-05, gnorm=2.128, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133171
2021-06-20 07:38:28 | INFO | train_inner | epoch 004: 2695 / 3002 loss=2.565, ppl=5.92, wps=5810.9, ups=0.09, wpb=64827, bsz=128, num_updates=11630, lr=9.9915e-05, gnorm=2.054, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133183
2021-06-20 07:38:39 | INFO | train_inner | epoch 004: 2696 / 3002 loss=2.594, ppl=6.04, wps=5894, ups=0.09, wpb=64878, bsz=128, num_updates=11631, lr=9.99149e-05, gnorm=2.098, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133194
2021-06-20 07:38:50 | INFO | train_inner | epoch 004: 2697 / 3002 loss=2.612, ppl=6.11, wps=5911.2, ups=0.09, wpb=64832, bsz=128, num_updates=11632, lr=9.99149e-05, gnorm=2.049, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133205
2021-06-20 07:39:02 | INFO | train_inner | epoch 004: 2698 / 3002 loss=2.566, ppl=5.92, wps=5674.5, ups=0.09, wpb=64778, bsz=128, num_updates=11633, lr=9.99149e-05, gnorm=5.499, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133216
2021-06-20 07:39:13 | INFO | train_inner | epoch 004: 2699 / 3002 loss=2.497, ppl=5.65, wps=5816.4, ups=0.09, wpb=64760, bsz=128, num_updates=11634, lr=9.99149e-05, gnorm=2.19, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133227
2021-06-20 07:39:24 | INFO | train_inner | epoch 004: 2700 / 3002 loss=2.661, ppl=6.32, wps=5886.8, ups=0.09, wpb=64781, bsz=128, num_updates=11635, lr=9.99149e-05, gnorm=2.043, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133238
2021-06-20 07:39:35 | INFO | train_inner | epoch 004: 2701 / 3002 loss=2.37, ppl=5.17, wps=5903.6, ups=0.09, wpb=64817, bsz=128, num_updates=11636, lr=9.99149e-05, gnorm=2.029, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133249
2021-06-20 07:39:46 | INFO | train_inner | epoch 004: 2702 / 3002 loss=2.613, ppl=6.12, wps=5840.5, ups=0.09, wpb=64861, bsz=128, num_updates=11637, lr=9.99149e-05, gnorm=1.983, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133260
2021-06-20 07:39:57 | INFO | train_inner | epoch 004: 2703 / 3002 loss=2.575, ppl=5.96, wps=5946.4, ups=0.09, wpb=64784, bsz=128, num_updates=11638, lr=9.99149e-05, gnorm=2.017, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133271
2021-06-20 07:40:08 | INFO | train_inner | epoch 004: 2704 / 3002 loss=2.54, ppl=5.82, wps=5971.4, ups=0.09, wpb=64896, bsz=128, num_updates=11639, lr=9.99149e-05, gnorm=2.027, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133282
2021-06-20 07:40:19 | INFO | train_inner | epoch 004: 2705 / 3002 loss=2.528, ppl=5.77, wps=5766.8, ups=0.09, wpb=64846, bsz=128, num_updates=11640, lr=9.99149e-05, gnorm=2.067, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133293
2021-06-20 07:40:30 | INFO | train_inner | epoch 004: 2706 / 3002 loss=2.673, ppl=6.38, wps=5909.2, ups=0.09, wpb=64904, bsz=128, num_updates=11641, lr=9.99149e-05, gnorm=2.158, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133304
2021-06-20 07:40:41 | INFO | train_inner | epoch 004: 2707 / 3002 loss=2.624, ppl=6.16, wps=5773.1, ups=0.09, wpb=64820, bsz=128, num_updates=11642, lr=9.99149e-05, gnorm=2.118, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133315
2021-06-20 07:40:52 | INFO | train_inner | epoch 004: 2708 / 3002 loss=2.625, ppl=6.17, wps=5827.7, ups=0.09, wpb=64768, bsz=128, num_updates=11643, lr=9.99148e-05, gnorm=2.041, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133327
2021-06-20 07:41:03 | INFO | train_inner | epoch 004: 2709 / 3002 loss=2.534, ppl=5.79, wps=5887.4, ups=0.09, wpb=64902, bsz=128, num_updates=11644, lr=9.99148e-05, gnorm=2.01, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133338
2021-06-20 07:41:14 | INFO | train_inner | epoch 004: 2710 / 3002 loss=2.602, ppl=6.07, wps=5872.7, ups=0.09, wpb=64813, bsz=128, num_updates=11645, lr=9.99148e-05, gnorm=2.081, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133349
2021-06-20 07:41:25 | INFO | train_inner | epoch 004: 2711 / 3002 loss=2.555, ppl=5.88, wps=5867.2, ups=0.09, wpb=64817, bsz=128, num_updates=11646, lr=9.99148e-05, gnorm=2.045, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133360
2021-06-20 07:41:36 | INFO | train_inner | epoch 004: 2712 / 3002 loss=2.511, ppl=5.7, wps=5852.3, ups=0.09, wpb=64881, bsz=128, num_updates=11647, lr=9.99148e-05, gnorm=2.012, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133371
2021-06-20 07:41:47 | INFO | train_inner | epoch 004: 2713 / 3002 loss=2.578, ppl=5.97, wps=5888.6, ups=0.09, wpb=64818, bsz=128, num_updates=11648, lr=9.99148e-05, gnorm=2.044, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133382
2021-06-20 07:41:58 | INFO | train_inner | epoch 004: 2714 / 3002 loss=2.556, ppl=5.88, wps=5991.3, ups=0.09, wpb=64904, bsz=128, num_updates=11649, lr=9.99148e-05, gnorm=2.106, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133393
2021-06-20 07:42:10 | INFO | train_inner | epoch 004: 2715 / 3002 loss=2.545, ppl=5.84, wps=5730.4, ups=0.09, wpb=64879, bsz=128, num_updates=11650, lr=9.99148e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133404
2021-06-20 07:42:21 | INFO | train_inner | epoch 004: 2716 / 3002 loss=2.605, ppl=6.08, wps=5846.5, ups=0.09, wpb=64788, bsz=128, num_updates=11651, lr=9.99148e-05, gnorm=2.082, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133415
2021-06-20 07:42:32 | INFO | train_inner | epoch 004: 2717 / 3002 loss=2.555, ppl=5.88, wps=5796.5, ups=0.09, wpb=64817, bsz=128, num_updates=11652, lr=9.99148e-05, gnorm=1.995, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133426
2021-06-20 07:42:43 | INFO | train_inner | epoch 004: 2718 / 3002 loss=2.559, ppl=5.89, wps=5744.4, ups=0.09, wpb=64878, bsz=128, num_updates=11653, lr=9.99148e-05, gnorm=1.986, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133437
2021-06-20 07:42:54 | INFO | train_inner | epoch 004: 2719 / 3002 loss=2.605, ppl=6.09, wps=5824.6, ups=0.09, wpb=64763, bsz=128, num_updates=11654, lr=9.99148e-05, gnorm=1.984, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133449
2021-06-20 07:43:06 | INFO | train_inner | epoch 004: 2720 / 3002 loss=2.553, ppl=5.87, wps=5752.2, ups=0.09, wpb=64790, bsz=128, num_updates=11655, lr=9.99148e-05, gnorm=2.018, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133460
2021-06-20 07:43:17 | INFO | train_inner | epoch 004: 2721 / 3002 loss=2.532, ppl=5.78, wps=5826.9, ups=0.09, wpb=64881, bsz=128, num_updates=11656, lr=9.99147e-05, gnorm=1.965, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133471
2021-06-20 07:43:28 | INFO | train_inner | epoch 004: 2722 / 3002 loss=2.614, ppl=6.12, wps=5787.1, ups=0.09, wpb=64853, bsz=128, num_updates=11657, lr=9.99147e-05, gnorm=2.053, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133482
2021-06-20 07:43:39 | INFO | train_inner | epoch 004: 2723 / 3002 loss=2.496, ppl=5.64, wps=5999.2, ups=0.09, wpb=64908, bsz=128, num_updates=11658, lr=9.99147e-05, gnorm=1.967, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133493
2021-06-20 07:43:50 | INFO | train_inner | epoch 004: 2724 / 3002 loss=2.506, ppl=5.68, wps=5881.7, ups=0.09, wpb=64822, bsz=128, num_updates=11659, lr=9.99147e-05, gnorm=3.413, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133504
2021-06-20 07:44:01 | INFO | train_inner | epoch 004: 2725 / 3002 loss=2.385, ppl=5.22, wps=5789.6, ups=0.09, wpb=64896, bsz=128, num_updates=11660, lr=9.99147e-05, gnorm=2.046, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133515
2021-06-20 07:44:12 | INFO | train_inner | epoch 004: 2726 / 3002 loss=2.516, ppl=5.72, wps=5759.6, ups=0.09, wpb=64743, bsz=128, num_updates=11661, lr=9.99147e-05, gnorm=2.041, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133526
2021-06-20 07:44:23 | INFO | train_inner | epoch 004: 2727 / 3002 loss=2.526, ppl=5.76, wps=5822.5, ups=0.09, wpb=64813, bsz=128, num_updates=11662, lr=9.99147e-05, gnorm=2.006, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133538
2021-06-20 07:44:34 | INFO | train_inner | epoch 004: 2728 / 3002 loss=2.7, ppl=6.5, wps=5846.8, ups=0.09, wpb=64824, bsz=128, num_updates=11663, lr=9.99147e-05, gnorm=2.016, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133549
2021-06-20 07:44:45 | INFO | train_inner | epoch 004: 2729 / 3002 loss=2.457, ppl=5.49, wps=5847.4, ups=0.09, wpb=64790, bsz=128, num_updates=11664, lr=9.99147e-05, gnorm=2.025, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133560
2021-06-20 07:44:57 | INFO | train_inner | epoch 004: 2730 / 3002 loss=2.551, ppl=5.86, wps=5819.5, ups=0.09, wpb=64881, bsz=128, num_updates=11665, lr=9.99147e-05, gnorm=1.974, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133571
2021-06-20 07:45:07 | INFO | train_inner | epoch 004: 2731 / 3002 loss=2.593, ppl=6.04, wps=5960.2, ups=0.09, wpb=64868, bsz=128, num_updates=11666, lr=9.99147e-05, gnorm=1.95, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133582
2021-06-20 07:45:19 | INFO | train_inner | epoch 004: 2732 / 3002 loss=2.546, ppl=5.84, wps=5703.9, ups=0.09, wpb=64778, bsz=128, num_updates=11667, lr=9.99147e-05, gnorm=2.05, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133593
2021-06-20 07:45:30 | INFO | train_inner | epoch 004: 2733 / 3002 loss=2.665, ppl=6.34, wps=5791.3, ups=0.09, wpb=64711, bsz=128, num_updates=11668, lr=9.99146e-05, gnorm=2.12, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133604
2021-06-20 07:45:41 | INFO | train_inner | epoch 004: 2734 / 3002 loss=2.467, ppl=5.53, wps=5753.1, ups=0.09, wpb=64855, bsz=128, num_updates=11669, lr=9.99146e-05, gnorm=2.09, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133616
2021-06-20 07:45:52 | INFO | train_inner | epoch 004: 2735 / 3002 loss=2.369, ppl=5.17, wps=5901.2, ups=0.09, wpb=64819, bsz=128, num_updates=11670, lr=9.99146e-05, gnorm=1.982, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133627
2021-06-20 07:46:03 | INFO | train_inner | epoch 004: 2736 / 3002 loss=2.445, ppl=5.44, wps=5812.3, ups=0.09, wpb=64830, bsz=128, num_updates=11671, lr=9.99146e-05, gnorm=2.141, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133638
2021-06-20 07:46:14 | INFO | train_inner | epoch 004: 2737 / 3002 loss=2.702, ppl=6.51, wps=5862.8, ups=0.09, wpb=64798, bsz=128, num_updates=11672, lr=9.99146e-05, gnorm=3.15, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133649
2021-06-20 07:46:26 | INFO | train_inner | epoch 004: 2738 / 3002 loss=2.426, ppl=5.37, wps=5862.3, ups=0.09, wpb=64867, bsz=128, num_updates=11673, lr=9.99146e-05, gnorm=2.022, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133660
2021-06-20 07:46:37 | INFO | train_inner | epoch 004: 2739 / 3002 loss=2.559, ppl=5.89, wps=5770.2, ups=0.09, wpb=64853, bsz=128, num_updates=11674, lr=9.99146e-05, gnorm=1.974, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133671
2021-06-20 07:46:48 | INFO | train_inner | epoch 004: 2740 / 3002 loss=2.575, ppl=5.96, wps=5873.6, ups=0.09, wpb=64800, bsz=128, num_updates=11675, lr=9.99146e-05, gnorm=9.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133682
2021-06-20 07:46:59 | INFO | train_inner | epoch 004: 2741 / 3002 loss=2.612, ppl=6.11, wps=5981.2, ups=0.09, wpb=64801, bsz=128, num_updates=11676, lr=9.99146e-05, gnorm=2.043, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133693
2021-06-20 07:47:10 | INFO | train_inner | epoch 004: 2742 / 3002 loss=2.711, ppl=6.55, wps=5972.8, ups=0.09, wpb=64841, bsz=128, num_updates=11677, lr=9.99146e-05, gnorm=1.979, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133704
2021-06-20 07:47:21 | INFO | train_inner | epoch 004: 2743 / 3002 loss=2.591, ppl=6.03, wps=5813.8, ups=0.09, wpb=64852, bsz=128, num_updates=11678, lr=9.99146e-05, gnorm=1.984, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133715
2021-06-20 07:47:31 | INFO | train_inner | epoch 004: 2744 / 3002 loss=2.453, ppl=5.48, wps=6031.3, ups=0.09, wpb=64818, bsz=128, num_updates=11679, lr=9.99146e-05, gnorm=1.948, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133726
2021-06-20 07:47:43 | INFO | train_inner | epoch 004: 2745 / 3002 loss=2.574, ppl=5.96, wps=5796.3, ups=0.09, wpb=64835, bsz=128, num_updates=11680, lr=9.99146e-05, gnorm=1.945, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133737
2021-06-20 07:47:54 | INFO | train_inner | epoch 004: 2746 / 3002 loss=2.572, ppl=5.95, wps=5915, ups=0.09, wpb=64931, bsz=128, num_updates=11681, lr=9.99145e-05, gnorm=2.041, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133748
2021-06-20 07:48:05 | INFO | train_inner | epoch 004: 2747 / 3002 loss=2.693, ppl=6.46, wps=5857.3, ups=0.09, wpb=64858, bsz=128, num_updates=11682, lr=9.99145e-05, gnorm=2.283, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133759
2021-06-20 07:48:16 | INFO | train_inner | epoch 004: 2748 / 3002 loss=2.448, ppl=5.46, wps=5856.4, ups=0.09, wpb=64756, bsz=128, num_updates=11683, lr=9.99145e-05, gnorm=2.07, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133770
2021-06-20 07:48:27 | INFO | train_inner | epoch 004: 2749 / 3002 loss=2.51, ppl=5.7, wps=5911.5, ups=0.09, wpb=64841, bsz=128, num_updates=11684, lr=9.99145e-05, gnorm=2.222, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133781
2021-06-20 07:48:38 | INFO | train_inner | epoch 004: 2750 / 3002 loss=2.536, ppl=5.8, wps=5924.5, ups=0.09, wpb=64842, bsz=128, num_updates=11685, lr=9.99145e-05, gnorm=3.218, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133792
2021-06-20 07:48:48 | INFO | train_inner | epoch 004: 2751 / 3002 loss=2.796, ppl=6.94, wps=6030.2, ups=0.09, wpb=64867, bsz=128, num_updates=11686, lr=9.99145e-05, gnorm=2.088, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133803
2021-06-20 07:48:59 | INFO | train_inner | epoch 004: 2752 / 3002 loss=2.643, ppl=6.25, wps=5871, ups=0.09, wpb=64898, bsz=128, num_updates=11687, lr=9.99145e-05, gnorm=2.059, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133814
2021-06-20 07:49:11 | INFO | train_inner | epoch 004: 2753 / 3002 loss=2.45, ppl=5.46, wps=5836.9, ups=0.09, wpb=64817, bsz=128, num_updates=11688, lr=9.99145e-05, gnorm=2.185, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133825
2021-06-20 07:49:22 | INFO | train_inner | epoch 004: 2754 / 3002 loss=2.623, ppl=6.16, wps=5828.4, ups=0.09, wpb=64901, bsz=128, num_updates=11689, lr=9.99145e-05, gnorm=2.101, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133836
2021-06-20 07:49:33 | INFO | train_inner | epoch 004: 2755 / 3002 loss=2.5, ppl=5.66, wps=5783.7, ups=0.09, wpb=64845, bsz=128, num_updates=11690, lr=9.99145e-05, gnorm=11.311, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133847
2021-06-20 07:49:44 | INFO | train_inner | epoch 004: 2756 / 3002 loss=2.589, ppl=6.01, wps=5832.3, ups=0.09, wpb=64840, bsz=128, num_updates=11691, lr=9.99145e-05, gnorm=2.078, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133858
2021-06-20 07:49:55 | INFO | train_inner | epoch 004: 2757 / 3002 loss=2.532, ppl=5.78, wps=5804.4, ups=0.09, wpb=64756, bsz=128, num_updates=11692, lr=9.99145e-05, gnorm=1.992, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133869
2021-06-20 07:50:06 | INFO | train_inner | epoch 004: 2758 / 3002 loss=2.515, ppl=5.71, wps=5901.5, ups=0.09, wpb=64801, bsz=128, num_updates=11693, lr=9.99144e-05, gnorm=2.019, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133880
2021-06-20 07:50:17 | INFO | train_inner | epoch 004: 2759 / 3002 loss=2.627, ppl=6.18, wps=5997.8, ups=0.09, wpb=64798, bsz=128, num_updates=11694, lr=9.99144e-05, gnorm=1.997, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133891
2021-06-20 07:50:28 | INFO | train_inner | epoch 004: 2760 / 3002 loss=2.616, ppl=6.13, wps=5907.1, ups=0.09, wpb=64848, bsz=128, num_updates=11695, lr=9.99144e-05, gnorm=1.996, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133902
2021-06-20 07:50:39 | INFO | train_inner | epoch 004: 2761 / 3002 loss=2.406, ppl=5.3, wps=5945.3, ups=0.09, wpb=64836, bsz=128, num_updates=11696, lr=9.99144e-05, gnorm=2.024, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=133913
2021-06-20 07:50:50 | INFO | train_inner | epoch 004: 2762 / 3002 loss=2.541, ppl=5.82, wps=5830.8, ups=0.09, wpb=64807, bsz=128, num_updates=11697, lr=9.99144e-05, gnorm=1.988, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133924
2021-06-20 07:51:01 | INFO | train_inner | epoch 004: 2763 / 3002 loss=2.6, ppl=6.06, wps=5874.9, ups=0.09, wpb=64766, bsz=128, num_updates=11698, lr=9.99144e-05, gnorm=2.043, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133935
2021-06-20 07:51:12 | INFO | train_inner | epoch 004: 2764 / 3002 loss=2.578, ppl=5.97, wps=5809, ups=0.09, wpb=64862, bsz=128, num_updates=11699, lr=9.99144e-05, gnorm=2.064, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133946
2021-06-20 07:51:23 | INFO | train_inner | epoch 004: 2765 / 3002 loss=2.603, ppl=6.08, wps=5825.5, ups=0.09, wpb=64688, bsz=128, num_updates=11700, lr=9.99144e-05, gnorm=2.029, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133958
2021-06-20 07:51:34 | INFO | train_inner | epoch 004: 2766 / 3002 loss=2.528, ppl=5.77, wps=5928.3, ups=0.09, wpb=64865, bsz=128, num_updates=11701, lr=9.99144e-05, gnorm=2.01, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133969
2021-06-20 07:51:45 | INFO | train_inner | epoch 004: 2767 / 3002 loss=2.491, ppl=5.62, wps=5884.8, ups=0.09, wpb=64850, bsz=128, num_updates=11702, lr=9.99144e-05, gnorm=1.942, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133980
2021-06-20 07:51:56 | INFO | train_inner | epoch 004: 2768 / 3002 loss=2.364, ppl=5.15, wps=5785.7, ups=0.09, wpb=64849, bsz=128, num_updates=11703, lr=9.99144e-05, gnorm=2.04, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=133991
2021-06-20 07:52:08 | INFO | train_inner | epoch 004: 2769 / 3002 loss=2.563, ppl=5.91, wps=5771, ups=0.09, wpb=64843, bsz=128, num_updates=11704, lr=9.99144e-05, gnorm=2.033, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134002
2021-06-20 07:52:19 | INFO | train_inner | epoch 004: 2770 / 3002 loss=2.523, ppl=5.75, wps=5835.2, ups=0.09, wpb=64793, bsz=128, num_updates=11705, lr=9.99144e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134013
2021-06-20 07:52:30 | INFO | train_inner | epoch 004: 2771 / 3002 loss=2.646, ppl=6.26, wps=5819.5, ups=0.09, wpb=64811, bsz=128, num_updates=11706, lr=9.99143e-05, gnorm=2.18, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134024
2021-06-20 07:52:41 | INFO | train_inner | epoch 004: 2772 / 3002 loss=2.532, ppl=5.78, wps=5762.9, ups=0.09, wpb=64748, bsz=128, num_updates=11707, lr=9.99143e-05, gnorm=2.173, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134035
2021-06-20 07:52:52 | INFO | train_inner | epoch 004: 2773 / 3002 loss=2.449, ppl=5.46, wps=5865.5, ups=0.09, wpb=64859, bsz=128, num_updates=11708, lr=9.99143e-05, gnorm=3.382, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134047
2021-06-20 07:53:03 | INFO | train_inner | epoch 004: 2774 / 3002 loss=2.628, ppl=6.18, wps=5902.9, ups=0.09, wpb=64792, bsz=128, num_updates=11709, lr=9.99143e-05, gnorm=2.215, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134057
2021-06-20 07:53:14 | INFO | train_inner | epoch 004: 2775 / 3002 loss=2.576, ppl=5.96, wps=5888.6, ups=0.09, wpb=64813, bsz=128, num_updates=11710, lr=9.99143e-05, gnorm=2.122, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134068
2021-06-20 07:53:25 | INFO | train_inner | epoch 004: 2776 / 3002 loss=2.54, ppl=5.82, wps=5853.3, ups=0.09, wpb=64900, bsz=128, num_updates=11711, lr=9.99143e-05, gnorm=2.037, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134080
2021-06-20 07:53:36 | INFO | train_inner | epoch 004: 2777 / 3002 loss=2.541, ppl=5.82, wps=5922, ups=0.09, wpb=64815, bsz=128, num_updates=11712, lr=9.99143e-05, gnorm=2.036, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134091
2021-06-20 07:53:47 | INFO | train_inner | epoch 004: 2778 / 3002 loss=2.614, ppl=6.12, wps=5810.3, ups=0.09, wpb=64804, bsz=128, num_updates=11713, lr=9.99143e-05, gnorm=1.957, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134102
2021-06-20 07:53:58 | INFO | train_inner | epoch 004: 2779 / 3002 loss=2.496, ppl=5.64, wps=5931.4, ups=0.09, wpb=64881, bsz=128, num_updates=11714, lr=9.99143e-05, gnorm=1.978, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134113
2021-06-20 07:54:09 | INFO | train_inner | epoch 004: 2780 / 3002 loss=2.546, ppl=5.84, wps=5866.3, ups=0.09, wpb=64825, bsz=128, num_updates=11715, lr=9.99143e-05, gnorm=1.958, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134124
2021-06-20 07:54:20 | INFO | train_inner | epoch 004: 2781 / 3002 loss=2.535, ppl=5.8, wps=5985.3, ups=0.09, wpb=64882, bsz=128, num_updates=11716, lr=9.99143e-05, gnorm=1.945, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134135
2021-06-20 07:54:31 | INFO | train_inner | epoch 004: 2782 / 3002 loss=2.448, ppl=5.46, wps=5910.1, ups=0.09, wpb=64886, bsz=128, num_updates=11717, lr=9.99143e-05, gnorm=2.273, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134145
2021-06-20 07:54:42 | INFO | train_inner | epoch 004: 2783 / 3002 loss=2.519, ppl=5.73, wps=5806.3, ups=0.09, wpb=64858, bsz=128, num_updates=11718, lr=9.99142e-05, gnorm=2.004, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134157
2021-06-20 07:54:53 | INFO | train_inner | epoch 004: 2784 / 3002 loss=2.396, ppl=5.26, wps=5982.2, ups=0.09, wpb=64852, bsz=128, num_updates=11719, lr=9.99142e-05, gnorm=2.005, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134168
2021-06-20 07:55:04 | INFO | train_inner | epoch 004: 2785 / 3002 loss=2.524, ppl=5.75, wps=5751.6, ups=0.09, wpb=64771, bsz=128, num_updates=11720, lr=9.99142e-05, gnorm=1.976, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134179
2021-06-20 07:55:15 | INFO | train_inner | epoch 004: 2786 / 3002 loss=2.616, ppl=6.13, wps=5871.9, ups=0.09, wpb=64877, bsz=128, num_updates=11721, lr=9.99142e-05, gnorm=2.032, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134190
2021-06-20 07:55:26 | INFO | train_inner | epoch 004: 2787 / 3002 loss=2.664, ppl=6.34, wps=5954.2, ups=0.09, wpb=64814, bsz=128, num_updates=11722, lr=9.99142e-05, gnorm=2.09, loss_scale=0.5, train_wall=10, gb_free=2.8, wall=134201
2021-06-20 07:55:38 | INFO | train_inner | epoch 004: 2788 / 3002 loss=2.517, ppl=5.72, wps=5785.3, ups=0.09, wpb=64768, bsz=128, num_updates=11723, lr=9.99142e-05, gnorm=2.845, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134212
2021-06-20 07:55:49 | INFO | train_inner | epoch 004: 2789 / 3002 loss=2.484, ppl=5.59, wps=5823.5, ups=0.09, wpb=64806, bsz=128, num_updates=11724, lr=9.99142e-05, gnorm=3.306, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134223
2021-06-20 07:56:00 | INFO | train_inner | epoch 004: 2790 / 3002 loss=2.596, ppl=6.05, wps=5870.6, ups=0.09, wpb=64845, bsz=128, num_updates=11725, lr=9.99142e-05, gnorm=2.02, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134234
2021-06-20 07:56:11 | INFO | train_inner | epoch 004: 2791 / 3002 loss=2.508, ppl=5.69, wps=5909.4, ups=0.09, wpb=64802, bsz=128, num_updates=11726, lr=9.99142e-05, gnorm=2.014, loss_scale=0.5, train_wall=11, gb_free=2.8, wall=134245
2021-06-20 07:56:22 | INFO | train_inner | epoch 004: 2792 / 3002 loss=2.676, ppl=6.39, wps=5826, ups=0.09, wpb=64820, bsz=128, num_updates=11727, lr=9.99142e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=134256
2021-06-20 07:56:33 | INFO | train_inner | epoch 004: 2793 / 3002 loss=2.622, ppl=6.15, wps=5942.1, ups=0.09, wpb=64863, bsz=128, num_updates=11728, lr=9.99142e-05, gnorm=2.121, loss_scale=1, train_wall=10, gb_free=2.8, wall=134267
2021-06-20 07:56:44 | INFO | train_inner | epoch 004: 2794 / 3002 loss=2.638, ppl=6.23, wps=5835.9, ups=0.09, wpb=64809, bsz=128, num_updates=11729, lr=9.99142e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=134278
2021-06-20 07:56:55 | INFO | train_inner | epoch 004: 2795 / 3002 loss=2.427, ppl=5.38, wps=5899.6, ups=0.09, wpb=64861, bsz=128, num_updates=11730, lr=9.99142e-05, gnorm=2.048, loss_scale=1, train_wall=11, gb_free=2.8, wall=134289
2021-06-20 07:57:06 | INFO | train_inner | epoch 004: 2796 / 3002 loss=2.512, ppl=5.7, wps=5903.8, ups=0.09, wpb=64862, bsz=128, num_updates=11731, lr=9.99141e-05, gnorm=3.874, loss_scale=1, train_wall=11, gb_free=2.8, wall=134300
2021-06-20 07:57:17 | INFO | train_inner | epoch 004: 2797 / 3002 loss=2.542, ppl=5.82, wps=5826.4, ups=0.09, wpb=64754, bsz=128, num_updates=11732, lr=9.99141e-05, gnorm=2.063, loss_scale=1, train_wall=11, gb_free=2.8, wall=134311
2021-06-20 07:57:28 | INFO | train_inner | epoch 004: 2798 / 3002 loss=2.601, ppl=6.07, wps=5777, ups=0.09, wpb=64844, bsz=128, num_updates=11733, lr=9.99141e-05, gnorm=2.022, loss_scale=1, train_wall=11, gb_free=2.8, wall=134323
2021-06-20 07:57:39 | INFO | train_inner | epoch 004: 2799 / 3002 loss=2.435, ppl=5.41, wps=5881.7, ups=0.09, wpb=64903, bsz=128, num_updates=11734, lr=9.99141e-05, gnorm=2.084, loss_scale=1, train_wall=11, gb_free=2.8, wall=134334
2021-06-20 07:57:50 | INFO | train_inner | epoch 004: 2800 / 3002 loss=2.614, ppl=6.12, wps=5752.9, ups=0.09, wpb=64880, bsz=128, num_updates=11735, lr=9.99141e-05, gnorm=2.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=134345
2021-06-20 07:58:02 | INFO | train_inner | epoch 004: 2801 / 3002 loss=2.582, ppl=5.99, wps=5820.1, ups=0.09, wpb=64797, bsz=128, num_updates=11736, lr=9.99141e-05, gnorm=1.944, loss_scale=1, train_wall=11, gb_free=2.8, wall=134356
2021-06-20 07:58:13 | INFO | train_inner | epoch 004: 2802 / 3002 loss=2.547, ppl=5.84, wps=5889.7, ups=0.09, wpb=64833, bsz=128, num_updates=11737, lr=9.99141e-05, gnorm=1.993, loss_scale=1, train_wall=11, gb_free=2.8, wall=134367
2021-06-20 07:58:24 | INFO | train_inner | epoch 004: 2803 / 3002 loss=2.466, ppl=5.53, wps=5770.4, ups=0.09, wpb=64791, bsz=128, num_updates=11738, lr=9.99141e-05, gnorm=1.908, loss_scale=1, train_wall=11, gb_free=2.8, wall=134378
2021-06-20 07:58:35 | INFO | train_inner | epoch 004: 2804 / 3002 loss=2.506, ppl=5.68, wps=5867.7, ups=0.09, wpb=64851, bsz=128, num_updates=11739, lr=9.99141e-05, gnorm=2.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=134389
2021-06-20 07:58:46 | INFO | train_inner | epoch 004: 2805 / 3002 loss=2.532, ppl=5.78, wps=5871.7, ups=0.09, wpb=64825, bsz=128, num_updates=11740, lr=9.99141e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=134400
2021-06-20 07:58:57 | INFO | train_inner | epoch 004: 2806 / 3002 loss=2.478, ppl=5.57, wps=5864.9, ups=0.09, wpb=64832, bsz=128, num_updates=11741, lr=9.99141e-05, gnorm=2.035, loss_scale=1, train_wall=11, gb_free=2.8, wall=134411
2021-06-20 07:59:08 | INFO | train_inner | epoch 004: 2807 / 3002 loss=2.651, ppl=6.28, wps=5822.5, ups=0.09, wpb=64767, bsz=128, num_updates=11742, lr=9.99141e-05, gnorm=2.115, loss_scale=1, train_wall=11, gb_free=2.8, wall=134422
2021-06-20 07:59:19 | INFO | train_inner | epoch 004: 2808 / 3002 loss=2.66, ppl=6.32, wps=5776.5, ups=0.09, wpb=64850, bsz=128, num_updates=11743, lr=9.9914e-05, gnorm=2.121, loss_scale=1, train_wall=11, gb_free=2.8, wall=134434
2021-06-20 07:59:31 | INFO | train_inner | epoch 004: 2809 / 3002 loss=2.468, ppl=5.53, wps=5785.8, ups=0.09, wpb=64828, bsz=128, num_updates=11744, lr=9.9914e-05, gnorm=2.052, loss_scale=1, train_wall=11, gb_free=2.8, wall=134445
2021-06-20 07:59:42 | INFO | train_inner | epoch 004: 2810 / 3002 loss=2.45, ppl=5.46, wps=5801.1, ups=0.09, wpb=64891, bsz=128, num_updates=11745, lr=9.9914e-05, gnorm=2.053, loss_scale=1, train_wall=11, gb_free=2.8, wall=134456
2021-06-20 07:59:53 | INFO | train_inner | epoch 004: 2811 / 3002 loss=2.437, ppl=5.42, wps=5888.6, ups=0.09, wpb=64772, bsz=128, num_updates=11746, lr=9.9914e-05, gnorm=1.934, loss_scale=1, train_wall=10, gb_free=2.8, wall=134467
2021-06-20 08:00:04 | INFO | train_inner | epoch 004: 2812 / 3002 loss=2.588, ppl=6.01, wps=5881.9, ups=0.09, wpb=64790, bsz=128, num_updates=11747, lr=9.9914e-05, gnorm=2.023, loss_scale=1, train_wall=11, gb_free=2.8, wall=134478
2021-06-20 08:00:15 | INFO | train_inner | epoch 004: 2813 / 3002 loss=2.442, ppl=5.43, wps=5819, ups=0.09, wpb=64812, bsz=128, num_updates=11748, lr=9.9914e-05, gnorm=1.954, loss_scale=1, train_wall=11, gb_free=2.8, wall=134489
2021-06-20 08:00:26 | INFO | train_inner | epoch 004: 2814 / 3002 loss=2.675, ppl=6.39, wps=5804.7, ups=0.09, wpb=64748, bsz=128, num_updates=11749, lr=9.9914e-05, gnorm=1.974, loss_scale=1, train_wall=11, gb_free=2.8, wall=134500
2021-06-20 08:00:37 | INFO | train_inner | epoch 004: 2815 / 3002 loss=2.523, ppl=5.75, wps=5754.3, ups=0.09, wpb=64836, bsz=128, num_updates=11750, lr=9.9914e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=134512
2021-06-20 08:00:48 | INFO | train_inner | epoch 004: 2816 / 3002 loss=2.534, ppl=5.79, wps=5802.6, ups=0.09, wpb=64781, bsz=128, num_updates=11751, lr=9.9914e-05, gnorm=2.057, loss_scale=1, train_wall=11, gb_free=2.8, wall=134523
2021-06-20 08:01:00 | INFO | train_inner | epoch 004: 2817 / 3002 loss=2.513, ppl=5.71, wps=5784.3, ups=0.09, wpb=64798, bsz=128, num_updates=11752, lr=9.9914e-05, gnorm=1.965, loss_scale=1, train_wall=11, gb_free=2.8, wall=134534
2021-06-20 08:01:11 | INFO | train_inner | epoch 004: 2818 / 3002 loss=2.486, ppl=5.6, wps=5837.2, ups=0.09, wpb=64769, bsz=128, num_updates=11753, lr=9.9914e-05, gnorm=1.979, loss_scale=1, train_wall=11, gb_free=2.8, wall=134545
2021-06-20 08:01:22 | INFO | train_inner | epoch 004: 2819 / 3002 loss=2.577, ppl=5.97, wps=5924.4, ups=0.09, wpb=64787, bsz=128, num_updates=11754, lr=9.9914e-05, gnorm=2.285, loss_scale=1, train_wall=10, gb_free=2.8, wall=134556
2021-06-20 08:01:33 | INFO | train_inner | epoch 004: 2820 / 3002 loss=2.441, ppl=5.43, wps=5739.1, ups=0.09, wpb=64916, bsz=128, num_updates=11755, lr=9.9914e-05, gnorm=1.93, loss_scale=1, train_wall=11, gb_free=2.8, wall=134567
2021-06-20 08:01:44 | INFO | train_inner | epoch 004: 2821 / 3002 loss=2.375, ppl=5.19, wps=5834.8, ups=0.09, wpb=64860, bsz=128, num_updates=11756, lr=9.99139e-05, gnorm=2.247, loss_scale=1, train_wall=11, gb_free=2.8, wall=134578
2021-06-20 08:01:55 | INFO | train_inner | epoch 004: 2822 / 3002 loss=2.44, ppl=5.43, wps=5916.8, ups=0.09, wpb=64828, bsz=128, num_updates=11757, lr=9.99139e-05, gnorm=1.908, loss_scale=1, train_wall=11, gb_free=2.8, wall=134589
2021-06-20 08:02:06 | INFO | train_inner | epoch 004: 2823 / 3002 loss=2.523, ppl=5.75, wps=5771.8, ups=0.09, wpb=64715, bsz=128, num_updates=11758, lr=9.99139e-05, gnorm=2.001, loss_scale=1, train_wall=11, gb_free=2.8, wall=134601
2021-06-20 08:02:18 | INFO | train_inner | epoch 004: 2824 / 3002 loss=2.691, ppl=6.46, wps=5792.5, ups=0.09, wpb=64816, bsz=128, num_updates=11759, lr=9.99139e-05, gnorm=1.98, loss_scale=1, train_wall=11, gb_free=2.8, wall=134612
2021-06-20 08:02:29 | INFO | train_inner | epoch 004: 2825 / 3002 loss=2.554, ppl=5.87, wps=5796.7, ups=0.09, wpb=64847, bsz=128, num_updates=11760, lr=9.99139e-05, gnorm=2.062, loss_scale=1, train_wall=11, gb_free=2.8, wall=134623
2021-06-20 08:02:40 | INFO | train_inner | epoch 004: 2826 / 3002 loss=2.556, ppl=5.88, wps=5735.1, ups=0.09, wpb=64849, bsz=128, num_updates=11761, lr=9.99139e-05, gnorm=2.019, loss_scale=1, train_wall=11, gb_free=2.8, wall=134634
2021-06-20 08:02:51 | INFO | train_inner | epoch 004: 2827 / 3002 loss=2.576, ppl=5.96, wps=5882.1, ups=0.09, wpb=64762, bsz=128, num_updates=11762, lr=9.99139e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=134645
2021-06-20 08:03:02 | INFO | train_inner | epoch 004: 2828 / 3002 loss=2.59, ppl=6.02, wps=5814.6, ups=0.09, wpb=64875, bsz=128, num_updates=11763, lr=9.99139e-05, gnorm=2.113, loss_scale=1, train_wall=11, gb_free=2.8, wall=134657
2021-06-20 08:03:13 | INFO | train_inner | epoch 004: 2829 / 3002 loss=2.7, ppl=6.5, wps=5820.4, ups=0.09, wpb=64814, bsz=128, num_updates=11764, lr=9.99139e-05, gnorm=2.144, loss_scale=1, train_wall=11, gb_free=2.8, wall=134668
2021-06-20 08:03:25 | INFO | train_inner | epoch 004: 2830 / 3002 loss=2.536, ppl=5.8, wps=5756.5, ups=0.09, wpb=64833, bsz=128, num_updates=11765, lr=9.99139e-05, gnorm=2.041, loss_scale=1, train_wall=11, gb_free=2.8, wall=134679
2021-06-20 08:03:36 | INFO | train_inner | epoch 004: 2831 / 3002 loss=2.378, ppl=5.2, wps=5925.2, ups=0.09, wpb=64834, bsz=128, num_updates=11766, lr=9.99139e-05, gnorm=1.961, loss_scale=1, train_wall=10, gb_free=2.8, wall=134690
2021-06-20 08:03:47 | INFO | train_inner | epoch 004: 2832 / 3002 loss=2.679, ppl=6.4, wps=5810.4, ups=0.09, wpb=64756, bsz=128, num_updates=11767, lr=9.99139e-05, gnorm=1.925, loss_scale=1, train_wall=11, gb_free=2.8, wall=134701
2021-06-20 08:03:58 | INFO | train_inner | epoch 004: 2833 / 3002 loss=2.635, ppl=6.21, wps=5738.5, ups=0.09, wpb=64849, bsz=128, num_updates=11768, lr=9.99138e-05, gnorm=2.074, loss_scale=1, train_wall=11, gb_free=2.8, wall=134712
2021-06-20 08:04:09 | INFO | train_inner | epoch 004: 2834 / 3002 loss=2.561, ppl=5.9, wps=5893, ups=0.09, wpb=64807, bsz=128, num_updates=11769, lr=9.99138e-05, gnorm=1.944, loss_scale=1, train_wall=11, gb_free=2.8, wall=134723
2021-06-20 08:04:20 | INFO | train_inner | epoch 004: 2835 / 3002 loss=2.463, ppl=5.51, wps=5827.4, ups=0.09, wpb=64901, bsz=128, num_updates=11770, lr=9.99138e-05, gnorm=1.957, loss_scale=1, train_wall=11, gb_free=2.8, wall=134734
2021-06-20 08:04:31 | INFO | train_inner | epoch 004: 2836 / 3002 loss=2.451, ppl=5.47, wps=5811.4, ups=0.09, wpb=64818, bsz=128, num_updates=11771, lr=9.99138e-05, gnorm=1.978, loss_scale=1, train_wall=11, gb_free=2.8, wall=134746
2021-06-20 08:04:42 | INFO | train_inner | epoch 004: 2837 / 3002 loss=2.488, ppl=5.61, wps=5980.7, ups=0.09, wpb=64796, bsz=128, num_updates=11772, lr=9.99138e-05, gnorm=1.956, loss_scale=1, train_wall=10, gb_free=2.8, wall=134756
2021-06-20 08:04:53 | INFO | train_inner | epoch 004: 2838 / 3002 loss=2.487, ppl=5.61, wps=5701.2, ups=0.09, wpb=64815, bsz=128, num_updates=11773, lr=9.99138e-05, gnorm=1.987, loss_scale=1, train_wall=11, gb_free=2.8, wall=134768
2021-06-20 08:05:05 | INFO | train_inner | epoch 004: 2839 / 3002 loss=2.595, ppl=6.04, wps=5859, ups=0.09, wpb=64757, bsz=128, num_updates=11774, lr=9.99138e-05, gnorm=2.018, loss_scale=1, train_wall=11, gb_free=2.8, wall=134779
2021-06-20 08:05:15 | INFO | train_inner | epoch 004: 2840 / 3002 loss=2.536, ppl=5.8, wps=5901.6, ups=0.09, wpb=64786, bsz=128, num_updates=11775, lr=9.99138e-05, gnorm=1.99, loss_scale=1, train_wall=11, gb_free=2.8, wall=134790
2021-06-20 08:05:26 | INFO | train_inner | epoch 004: 2841 / 3002 loss=2.518, ppl=5.73, wps=5940.7, ups=0.09, wpb=64949, bsz=128, num_updates=11776, lr=9.99138e-05, gnorm=1.998, loss_scale=1, train_wall=10, gb_free=2.8, wall=134801
2021-06-20 08:05:37 | INFO | train_inner | epoch 004: 2842 / 3002 loss=2.504, ppl=5.67, wps=6002, ups=0.09, wpb=64869, bsz=128, num_updates=11777, lr=9.99138e-05, gnorm=1.917, loss_scale=1, train_wall=10, gb_free=2.8, wall=134812
2021-06-20 08:05:48 | INFO | train_inner | epoch 004: 2843 / 3002 loss=2.473, ppl=5.55, wps=5955.9, ups=0.09, wpb=64882, bsz=128, num_updates=11778, lr=9.99138e-05, gnorm=2.047, loss_scale=1, train_wall=10, gb_free=2.8, wall=134822
2021-06-20 08:05:59 | INFO | train_inner | epoch 004: 2844 / 3002 loss=2.544, ppl=5.83, wps=5840.4, ups=0.09, wpb=64844, bsz=128, num_updates=11779, lr=9.99138e-05, gnorm=1.92, loss_scale=1, train_wall=11, gb_free=2.8, wall=134834
2021-06-20 08:06:10 | INFO | train_inner | epoch 004: 2845 / 3002 loss=2.558, ppl=5.89, wps=5845, ups=0.09, wpb=64771, bsz=128, num_updates=11780, lr=9.99138e-05, gnorm=2.085, loss_scale=1, train_wall=11, gb_free=2.8, wall=134845
2021-06-20 08:06:21 | INFO | train_inner | epoch 004: 2846 / 3002 loss=2.599, ppl=6.06, wps=5836.3, ups=0.09, wpb=64782, bsz=128, num_updates=11781, lr=9.99137e-05, gnorm=2.031, loss_scale=1, train_wall=11, gb_free=2.8, wall=134856
2021-06-20 08:06:33 | INFO | train_inner | epoch 004: 2847 / 3002 loss=2.407, ppl=5.31, wps=5809.7, ups=0.09, wpb=64822, bsz=128, num_updates=11782, lr=9.99137e-05, gnorm=1.978, loss_scale=1, train_wall=11, gb_free=2.8, wall=134867
2021-06-20 08:06:44 | INFO | train_inner | epoch 004: 2848 / 3002 loss=2.595, ppl=6.04, wps=5816.3, ups=0.09, wpb=64682, bsz=128, num_updates=11783, lr=9.99137e-05, gnorm=2.112, loss_scale=1, train_wall=11, gb_free=2.8, wall=134878
2021-06-20 08:06:54 | INFO | train_inner | epoch 004: 2849 / 3002 loss=2.58, ppl=5.98, wps=6034.4, ups=0.09, wpb=64846, bsz=128, num_updates=11784, lr=9.99137e-05, gnorm=1.942, loss_scale=1, train_wall=10, gb_free=2.8, wall=134889
2021-06-20 08:07:06 | INFO | train_inner | epoch 004: 2850 / 3002 loss=2.54, ppl=5.81, wps=5822.3, ups=0.09, wpb=64713, bsz=128, num_updates=11785, lr=9.99137e-05, gnorm=2.027, loss_scale=1, train_wall=11, gb_free=2.8, wall=134900
2021-06-20 08:07:17 | INFO | train_inner | epoch 004: 2851 / 3002 loss=2.621, ppl=6.15, wps=5847.4, ups=0.09, wpb=64846, bsz=128, num_updates=11786, lr=9.99137e-05, gnorm=1.955, loss_scale=1, train_wall=11, gb_free=2.8, wall=134911
2021-06-20 08:07:28 | INFO | train_inner | epoch 004: 2852 / 3002 loss=2.57, ppl=5.94, wps=5920.4, ups=0.09, wpb=64882, bsz=128, num_updates=11787, lr=9.99137e-05, gnorm=1.955, loss_scale=1, train_wall=11, gb_free=2.8, wall=134922
2021-06-20 08:07:39 | INFO | train_inner | epoch 004: 2853 / 3002 loss=2.634, ppl=6.21, wps=5925.1, ups=0.09, wpb=64796, bsz=128, num_updates=11788, lr=9.99137e-05, gnorm=2.074, loss_scale=1, train_wall=10, gb_free=2.8, wall=134933
2021-06-20 08:07:50 | INFO | train_inner | epoch 004: 2854 / 3002 loss=2.57, ppl=5.94, wps=5858.6, ups=0.09, wpb=64812, bsz=128, num_updates=11789, lr=9.99137e-05, gnorm=2.041, loss_scale=1, train_wall=11, gb_free=2.8, wall=134944
2021-06-20 08:08:01 | INFO | train_inner | epoch 004: 2855 / 3002 loss=2.615, ppl=6.12, wps=5785.7, ups=0.09, wpb=64838, bsz=128, num_updates=11790, lr=9.99137e-05, gnorm=2, loss_scale=1, train_wall=11, gb_free=2.8, wall=134955
2021-06-20 08:08:12 | INFO | train_inner | epoch 004: 2856 / 3002 loss=2.625, ppl=6.17, wps=5756.3, ups=0.09, wpb=64810, bsz=128, num_updates=11791, lr=9.99137e-05, gnorm=2, loss_scale=1, train_wall=11, gb_free=2.8, wall=134966
2021-06-20 08:08:23 | INFO | train_inner | epoch 004: 2857 / 3002 loss=2.511, ppl=5.7, wps=5908.9, ups=0.09, wpb=64901, bsz=128, num_updates=11792, lr=9.99137e-05, gnorm=2.05, loss_scale=1, train_wall=11, gb_free=2.8, wall=134977
2021-06-20 08:08:34 | INFO | train_inner | epoch 004: 2858 / 3002 loss=2.595, ppl=6.04, wps=5877.6, ups=0.09, wpb=64851, bsz=128, num_updates=11793, lr=9.99136e-05, gnorm=2.059, loss_scale=1, train_wall=11, gb_free=2.8, wall=134988
2021-06-20 08:08:45 | INFO | train_inner | epoch 004: 2859 / 3002 loss=2.479, ppl=5.58, wps=5849.1, ups=0.09, wpb=64800, bsz=128, num_updates=11794, lr=9.99136e-05, gnorm=2.103, loss_scale=1, train_wall=11, gb_free=2.8, wall=134999
2021-06-20 08:08:56 | INFO | train_inner | epoch 004: 2860 / 3002 loss=2.86, ppl=7.26, wps=5860.4, ups=0.09, wpb=64792, bsz=128, num_updates=11795, lr=9.99136e-05, gnorm=2.063, loss_scale=1, train_wall=11, gb_free=2.8, wall=135011
2021-06-20 08:09:07 | INFO | train_inner | epoch 004: 2861 / 3002 loss=2.549, ppl=5.85, wps=5754.9, ups=0.09, wpb=64862, bsz=128, num_updates=11796, lr=9.99136e-05, gnorm=1.967, loss_scale=1, train_wall=11, gb_free=2.8, wall=135022
2021-06-20 08:09:19 | INFO | train_inner | epoch 004: 2862 / 3002 loss=2.416, ppl=5.34, wps=5842.1, ups=0.09, wpb=64857, bsz=128, num_updates=11797, lr=9.99136e-05, gnorm=1.855, loss_scale=1, train_wall=11, gb_free=2.8, wall=135033
2021-06-20 08:09:29 | INFO | train_inner | epoch 004: 2863 / 3002 loss=2.651, ppl=6.28, wps=5952.2, ups=0.09, wpb=64847, bsz=128, num_updates=11798, lr=9.99136e-05, gnorm=2.004, loss_scale=1, train_wall=10, gb_free=2.8, wall=135044
2021-06-20 08:09:41 | INFO | train_inner | epoch 004: 2864 / 3002 loss=2.631, ppl=6.2, wps=5840.8, ups=0.09, wpb=64735, bsz=128, num_updates=11799, lr=9.99136e-05, gnorm=2.005, loss_scale=1, train_wall=11, gb_free=2.8, wall=135055
2021-06-20 08:09:52 | INFO | train_inner | epoch 004: 2865 / 3002 loss=2.716, ppl=6.57, wps=5730.2, ups=0.09, wpb=64820, bsz=128, num_updates=11800, lr=9.99136e-05, gnorm=2.08, loss_scale=1, train_wall=11, gb_free=2.8, wall=135066
2021-06-20 08:10:03 | INFO | train_inner | epoch 004: 2866 / 3002 loss=2.557, ppl=5.89, wps=5783.4, ups=0.09, wpb=64816, bsz=128, num_updates=11801, lr=9.99136e-05, gnorm=2.317, loss_scale=1, train_wall=11, gb_free=2.8, wall=135077
2021-06-20 08:10:14 | INFO | train_inner | epoch 004: 2867 / 3002 loss=2.58, ppl=5.98, wps=5841.4, ups=0.09, wpb=64759, bsz=128, num_updates=11802, lr=9.99136e-05, gnorm=2.1, loss_scale=1, train_wall=11, gb_free=2.8, wall=135089
2021-06-20 08:10:25 | INFO | train_inner | epoch 004: 2868 / 3002 loss=2.607, ppl=6.09, wps=5832.9, ups=0.09, wpb=64799, bsz=128, num_updates=11803, lr=9.99136e-05, gnorm=2.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=135100
2021-06-20 08:10:36 | INFO | train_inner | epoch 004: 2869 / 3002 loss=2.508, ppl=5.69, wps=5807.8, ups=0.09, wpb=64839, bsz=128, num_updates=11804, lr=9.99136e-05, gnorm=2.093, loss_scale=1, train_wall=11, gb_free=2.8, wall=135111
2021-06-20 08:10:48 | INFO | train_inner | epoch 004: 2870 / 3002 loss=2.513, ppl=5.71, wps=5808.3, ups=0.09, wpb=64719, bsz=128, num_updates=11805, lr=9.99136e-05, gnorm=2.055, loss_scale=1, train_wall=11, gb_free=2.8, wall=135122
2021-06-20 08:10:59 | INFO | train_inner | epoch 004: 2871 / 3002 loss=2.707, ppl=6.53, wps=5875.8, ups=0.09, wpb=64765, bsz=128, num_updates=11806, lr=9.99135e-05, gnorm=2.126, loss_scale=1, train_wall=11, gb_free=2.8, wall=135133
2021-06-20 08:11:10 | INFO | train_inner | epoch 004: 2872 / 3002 loss=2.518, ppl=5.73, wps=5832.6, ups=0.09, wpb=64923, bsz=128, num_updates=11807, lr=9.99135e-05, gnorm=2.024, loss_scale=1, train_wall=11, gb_free=2.8, wall=135144
2021-06-20 08:11:21 | INFO | train_inner | epoch 004: 2873 / 3002 loss=2.383, ppl=5.22, wps=5879.4, ups=0.09, wpb=64802, bsz=128, num_updates=11808, lr=9.99135e-05, gnorm=1.983, loss_scale=1, train_wall=11, gb_free=2.8, wall=135155
2021-06-20 08:11:32 | INFO | train_inner | epoch 004: 2874 / 3002 loss=2.68, ppl=6.41, wps=5908.7, ups=0.09, wpb=64795, bsz=128, num_updates=11809, lr=9.99135e-05, gnorm=2.083, loss_scale=1, train_wall=11, gb_free=2.8, wall=135166
2021-06-20 08:11:43 | INFO | train_inner | epoch 004: 2875 / 3002 loss=2.49, ppl=5.62, wps=5907.7, ups=0.09, wpb=64879, bsz=128, num_updates=11810, lr=9.99135e-05, gnorm=1.995, loss_scale=1, train_wall=11, gb_free=2.8, wall=135177
2021-06-20 08:11:54 | INFO | train_inner | epoch 004: 2876 / 3002 loss=2.531, ppl=5.78, wps=5895.8, ups=0.09, wpb=64825, bsz=128, num_updates=11811, lr=9.99135e-05, gnorm=2.029, loss_scale=1, train_wall=11, gb_free=2.8, wall=135188
2021-06-20 08:12:05 | INFO | train_inner | epoch 004: 2877 / 3002 loss=2.586, ppl=6, wps=5647.8, ups=0.09, wpb=64716, bsz=128, num_updates=11812, lr=9.99135e-05, gnorm=2.176, loss_scale=1, train_wall=11, gb_free=2.8, wall=135199
2021-06-20 08:12:16 | INFO | train_inner | epoch 004: 2878 / 3002 loss=2.818, ppl=7.05, wps=5844.5, ups=0.09, wpb=64750, bsz=128, num_updates=11813, lr=9.99135e-05, gnorm=2.081, loss_scale=1, train_wall=11, gb_free=2.8, wall=135211
2021-06-20 08:12:27 | INFO | train_inner | epoch 004: 2879 / 3002 loss=2.644, ppl=6.25, wps=5839.2, ups=0.09, wpb=64880, bsz=128, num_updates=11814, lr=9.99135e-05, gnorm=2.095, loss_scale=1, train_wall=11, gb_free=2.8, wall=135222
2021-06-20 08:12:38 | INFO | train_inner | epoch 004: 2880 / 3002 loss=2.673, ppl=6.38, wps=5872.9, ups=0.09, wpb=64787, bsz=128, num_updates=11815, lr=9.99135e-05, gnorm=1.997, loss_scale=1, train_wall=11, gb_free=2.8, wall=135233
2021-06-20 08:12:50 | INFO | train_inner | epoch 004: 2881 / 3002 loss=2.577, ppl=5.97, wps=5778.2, ups=0.09, wpb=64804, bsz=128, num_updates=11816, lr=9.99135e-05, gnorm=1.997, loss_scale=1, train_wall=11, gb_free=2.8, wall=135244
2021-06-20 08:13:01 | INFO | train_inner | epoch 004: 2882 / 3002 loss=2.625, ppl=6.17, wps=5934.5, ups=0.09, wpb=64829, bsz=128, num_updates=11817, lr=9.99135e-05, gnorm=2.035, loss_scale=1, train_wall=10, gb_free=2.8, wall=135255
2021-06-20 08:13:12 | INFO | train_inner | epoch 004: 2883 / 3002 loss=2.564, ppl=5.91, wps=5816.1, ups=0.09, wpb=64833, bsz=128, num_updates=11818, lr=9.99134e-05, gnorm=1.88, loss_scale=1, train_wall=11, gb_free=2.8, wall=135266
2021-06-20 08:13:23 | INFO | train_inner | epoch 004: 2884 / 3002 loss=2.381, ppl=5.21, wps=5885.4, ups=0.09, wpb=64839, bsz=128, num_updates=11819, lr=9.99134e-05, gnorm=2.005, loss_scale=1, train_wall=11, gb_free=2.8, wall=135277
2021-06-20 08:13:34 | INFO | train_inner | epoch 004: 2885 / 3002 loss=2.604, ppl=6.08, wps=5744.5, ups=0.09, wpb=64827, bsz=128, num_updates=11820, lr=9.99134e-05, gnorm=1.956, loss_scale=1, train_wall=11, gb_free=2.8, wall=135288
2021-06-20 08:13:45 | INFO | train_inner | epoch 004: 2886 / 3002 loss=2.644, ppl=6.25, wps=5903.3, ups=0.09, wpb=64776, bsz=128, num_updates=11821, lr=9.99134e-05, gnorm=2.003, loss_scale=1, train_wall=11, gb_free=2.8, wall=135299
2021-06-20 08:13:56 | INFO | train_inner | epoch 004: 2887 / 3002 loss=2.554, ppl=5.87, wps=5822.4, ups=0.09, wpb=64873, bsz=128, num_updates=11822, lr=9.99134e-05, gnorm=2.081, loss_scale=1, train_wall=11, gb_free=2.8, wall=135310
2021-06-20 08:14:07 | INFO | train_inner | epoch 004: 2888 / 3002 loss=2.541, ppl=5.82, wps=5802.6, ups=0.09, wpb=64810, bsz=128, num_updates=11823, lr=9.99134e-05, gnorm=2.131, loss_scale=1, train_wall=11, gb_free=2.8, wall=135322
2021-06-20 08:14:18 | INFO | train_inner | epoch 004: 2889 / 3002 loss=2.465, ppl=5.52, wps=5857.9, ups=0.09, wpb=64823, bsz=128, num_updates=11824, lr=9.99134e-05, gnorm=2.238, loss_scale=1, train_wall=11, gb_free=2.8, wall=135333
2021-06-20 08:14:29 | INFO | train_inner | epoch 004: 2890 / 3002 loss=2.509, ppl=5.69, wps=5909.5, ups=0.09, wpb=64847, bsz=128, num_updates=11825, lr=9.99134e-05, gnorm=1.967, loss_scale=1, train_wall=11, gb_free=2.8, wall=135344
2021-06-20 08:14:40 | INFO | train_inner | epoch 004: 2891 / 3002 loss=2.603, ppl=6.07, wps=5921.1, ups=0.09, wpb=64705, bsz=128, num_updates=11826, lr=9.99134e-05, gnorm=2.018, loss_scale=1, train_wall=10, gb_free=2.8, wall=135355
2021-06-20 08:14:51 | INFO | train_inner | epoch 004: 2892 / 3002 loss=2.437, ppl=5.41, wps=5907.2, ups=0.09, wpb=64882, bsz=128, num_updates=11827, lr=9.99134e-05, gnorm=1.896, loss_scale=1, train_wall=11, gb_free=2.8, wall=135366
2021-06-20 08:15:02 | INFO | train_inner | epoch 004: 2893 / 3002 loss=2.723, ppl=6.6, wps=5848.8, ups=0.09, wpb=64766, bsz=128, num_updates=11828, lr=9.99134e-05, gnorm=2.082, loss_scale=1, train_wall=11, gb_free=2.8, wall=135377
2021-06-20 08:15:13 | INFO | train_inner | epoch 004: 2894 / 3002 loss=2.485, ppl=5.6, wps=5888.9, ups=0.09, wpb=64798, bsz=128, num_updates=11829, lr=9.99134e-05, gnorm=1.918, loss_scale=1, train_wall=11, gb_free=2.8, wall=135388
2021-06-20 08:15:24 | INFO | train_inner | epoch 004: 2895 / 3002 loss=2.59, ppl=6.02, wps=5829, ups=0.09, wpb=64796, bsz=128, num_updates=11830, lr=9.99134e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=135399
2021-06-20 08:15:36 | INFO | train_inner | epoch 004: 2896 / 3002 loss=2.375, ppl=5.19, wps=5714.4, ups=0.09, wpb=64722, bsz=128, num_updates=11831, lr=9.99133e-05, gnorm=1.943, loss_scale=1, train_wall=11, gb_free=2.8, wall=135410
2021-06-20 08:15:47 | INFO | train_inner | epoch 004: 2897 / 3002 loss=2.433, ppl=5.4, wps=5943, ups=0.09, wpb=64803, bsz=128, num_updates=11832, lr=9.99133e-05, gnorm=1.986, loss_scale=1, train_wall=10, gb_free=2.8, wall=135421
2021-06-20 08:15:58 | INFO | train_inner | epoch 004: 2898 / 3002 loss=2.46, ppl=5.5, wps=5868.1, ups=0.09, wpb=64786, bsz=128, num_updates=11833, lr=9.99133e-05, gnorm=2.004, loss_scale=1, train_wall=11, gb_free=2.8, wall=135432
2021-06-20 08:16:09 | INFO | train_inner | epoch 004: 2899 / 3002 loss=2.684, ppl=6.43, wps=5900, ups=0.09, wpb=64862, bsz=128, num_updates=11834, lr=9.99133e-05, gnorm=2.087, loss_scale=1, train_wall=11, gb_free=2.8, wall=135443
2021-06-20 08:16:20 | INFO | train_inner | epoch 004: 2900 / 3002 loss=2.657, ppl=6.31, wps=5818.2, ups=0.09, wpb=64838, bsz=128, num_updates=11835, lr=9.99133e-05, gnorm=2.104, loss_scale=1, train_wall=11, gb_free=2.8, wall=135454
2021-06-20 08:16:31 | INFO | train_inner | epoch 004: 2901 / 3002 loss=2.537, ppl=5.8, wps=5935.9, ups=0.09, wpb=64860, bsz=128, num_updates=11836, lr=9.99133e-05, gnorm=1.999, loss_scale=1, train_wall=10, gb_free=2.8, wall=135465
2021-06-20 08:16:42 | INFO | train_inner | epoch 004: 2902 / 3002 loss=2.433, ppl=5.4, wps=5736.5, ups=0.09, wpb=64892, bsz=128, num_updates=11837, lr=9.99133e-05, gnorm=2.016, loss_scale=1, train_wall=11, gb_free=2.8, wall=135476
2021-06-20 08:16:53 | INFO | train_inner | epoch 004: 2903 / 3002 loss=2.549, ppl=5.85, wps=5837.2, ups=0.09, wpb=64860, bsz=128, num_updates=11838, lr=9.99133e-05, gnorm=2.055, loss_scale=1, train_wall=11, gb_free=2.8, wall=135488
2021-06-20 08:17:04 | INFO | train_inner | epoch 004: 2904 / 3002 loss=2.456, ppl=5.49, wps=5783.2, ups=0.09, wpb=64828, bsz=128, num_updates=11839, lr=9.99133e-05, gnorm=2.038, loss_scale=1, train_wall=11, gb_free=2.8, wall=135499
2021-06-20 08:17:15 | INFO | train_inner | epoch 004: 2905 / 3002 loss=2.463, ppl=5.51, wps=5908, ups=0.09, wpb=64861, bsz=128, num_updates=11840, lr=9.99133e-05, gnorm=2.063, loss_scale=1, train_wall=10, gb_free=2.8, wall=135510
2021-06-20 08:17:26 | INFO | train_inner | epoch 004: 2906 / 3002 loss=2.732, ppl=6.64, wps=5822.3, ups=0.09, wpb=64811, bsz=128, num_updates=11841, lr=9.99133e-05, gnorm=2.096, loss_scale=1, train_wall=11, gb_free=2.8, wall=135521
2021-06-20 08:17:38 | INFO | train_inner | epoch 004: 2907 / 3002 loss=2.584, ppl=5.99, wps=5875.9, ups=0.09, wpb=64842, bsz=128, num_updates=11842, lr=9.99133e-05, gnorm=1.916, loss_scale=1, train_wall=11, gb_free=2.8, wall=135532
2021-06-20 08:17:49 | INFO | train_inner | epoch 004: 2908 / 3002 loss=2.474, ppl=5.56, wps=5816, ups=0.09, wpb=64842, bsz=128, num_updates=11843, lr=9.99132e-05, gnorm=1.908, loss_scale=1, train_wall=11, gb_free=2.8, wall=135543
2021-06-20 08:18:00 | INFO | train_inner | epoch 004: 2909 / 3002 loss=2.54, ppl=5.81, wps=5763.9, ups=0.09, wpb=64864, bsz=128, num_updates=11844, lr=9.99132e-05, gnorm=2.071, loss_scale=1, train_wall=11, gb_free=2.8, wall=135554
2021-06-20 08:18:11 | INFO | train_inner | epoch 004: 2910 / 3002 loss=2.632, ppl=6.2, wps=5789, ups=0.09, wpb=64794, bsz=128, num_updates=11845, lr=9.99132e-05, gnorm=3.831, loss_scale=1, train_wall=11, gb_free=2.8, wall=135565
2021-06-20 08:18:22 | INFO | train_inner | epoch 004: 2911 / 3002 loss=2.537, ppl=5.8, wps=5793.3, ups=0.09, wpb=64833, bsz=128, num_updates=11846, lr=9.99132e-05, gnorm=2.02, loss_scale=1, train_wall=11, gb_free=2.8, wall=135577
2021-06-20 08:18:33 | INFO | train_inner | epoch 004: 2912 / 3002 loss=2.502, ppl=5.66, wps=5884.1, ups=0.09, wpb=64867, bsz=128, num_updates=11847, lr=9.99132e-05, gnorm=2.023, loss_scale=1, train_wall=11, gb_free=2.8, wall=135588
2021-06-20 08:18:44 | INFO | train_inner | epoch 004: 2913 / 3002 loss=2.622, ppl=6.15, wps=5810.7, ups=0.09, wpb=64855, bsz=128, num_updates=11848, lr=9.99132e-05, gnorm=2.158, loss_scale=1, train_wall=11, gb_free=2.8, wall=135599
2021-06-20 08:18:56 | INFO | train_inner | epoch 004: 2914 / 3002 loss=2.458, ppl=5.49, wps=5885.9, ups=0.09, wpb=64834, bsz=128, num_updates=11849, lr=9.99132e-05, gnorm=2.029, loss_scale=1, train_wall=11, gb_free=2.8, wall=135610
2021-06-20 08:19:07 | INFO | train_inner | epoch 004: 2915 / 3002 loss=2.672, ppl=6.37, wps=5892.8, ups=0.09, wpb=64843, bsz=128, num_updates=11850, lr=9.99132e-05, gnorm=2.054, loss_scale=1, train_wall=11, gb_free=2.8, wall=135621
2021-06-20 08:19:18 | INFO | train_inner | epoch 004: 2916 / 3002 loss=2.595, ppl=6.04, wps=5846.8, ups=0.09, wpb=64880, bsz=128, num_updates=11851, lr=9.99132e-05, gnorm=1.989, loss_scale=1, train_wall=11, gb_free=2.8, wall=135632
2021-06-20 08:19:29 | INFO | train_inner | epoch 004: 2917 / 3002 loss=2.544, ppl=5.83, wps=5900.2, ups=0.09, wpb=64798, bsz=128, num_updates=11852, lr=9.99132e-05, gnorm=2.028, loss_scale=1, train_wall=11, gb_free=2.8, wall=135643
2021-06-20 08:19:39 | INFO | train_inner | epoch 004: 2918 / 3002 loss=2.476, ppl=5.56, wps=6017.6, ups=0.09, wpb=64833, bsz=128, num_updates=11853, lr=9.99132e-05, gnorm=1.933, loss_scale=1, train_wall=10, gb_free=2.8, wall=135654
2021-06-20 08:19:50 | INFO | train_inner | epoch 004: 2919 / 3002 loss=2.472, ppl=5.55, wps=5903.6, ups=0.09, wpb=64818, bsz=128, num_updates=11854, lr=9.99132e-05, gnorm=1.933, loss_scale=1, train_wall=11, gb_free=2.8, wall=135665
2021-06-20 08:20:01 | INFO | train_inner | epoch 004: 2920 / 3002 loss=2.593, ppl=6.03, wps=5862.5, ups=0.09, wpb=64800, bsz=128, num_updates=11855, lr=9.99132e-05, gnorm=2.15, loss_scale=2, train_wall=11, gb_free=2.8, wall=135676
2021-06-20 08:20:12 | INFO | train_inner | epoch 004: 2921 / 3002 loss=2.578, ppl=5.97, wps=5881.7, ups=0.09, wpb=64803, bsz=128, num_updates=11856, lr=9.99131e-05, gnorm=1.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=135687
2021-06-20 08:20:24 | INFO | train_inner | epoch 004: 2922 / 3002 loss=2.595, ppl=6.04, wps=5839.7, ups=0.09, wpb=64858, bsz=128, num_updates=11857, lr=9.99131e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=135698
2021-06-20 08:20:35 | INFO | train_inner | epoch 004: 2923 / 3002 loss=2.477, ppl=5.57, wps=5838.7, ups=0.09, wpb=64837, bsz=128, num_updates=11858, lr=9.99131e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=135709
2021-06-20 08:20:46 | INFO | train_inner | epoch 004: 2924 / 3002 loss=2.631, ppl=6.2, wps=5819.5, ups=0.09, wpb=64695, bsz=128, num_updates=11859, lr=9.99131e-05, gnorm=2.082, loss_scale=2, train_wall=11, gb_free=2.8, wall=135720
2021-06-20 08:20:57 | INFO | train_inner | epoch 004: 2925 / 3002 loss=2.562, ppl=5.91, wps=5930.7, ups=0.09, wpb=64901, bsz=128, num_updates=11860, lr=9.99131e-05, gnorm=2.053, loss_scale=2, train_wall=10, gb_free=2.8, wall=135731
2021-06-20 08:21:08 | INFO | train_inner | epoch 004: 2926 / 3002 loss=2.587, ppl=6.01, wps=5888.8, ups=0.09, wpb=64744, bsz=128, num_updates=11861, lr=9.99131e-05, gnorm=2.067, loss_scale=2, train_wall=11, gb_free=2.8, wall=135742
2021-06-20 08:21:19 | INFO | train_inner | epoch 004: 2927 / 3002 loss=2.675, ppl=6.39, wps=5898.3, ups=0.09, wpb=64849, bsz=128, num_updates=11862, lr=9.99131e-05, gnorm=2.577, loss_scale=2, train_wall=11, gb_free=2.8, wall=135753
2021-06-20 08:21:30 | INFO | train_inner | epoch 004: 2928 / 3002 loss=2.54, ppl=5.82, wps=5949.7, ups=0.09, wpb=64824, bsz=128, num_updates=11863, lr=9.99131e-05, gnorm=1.937, loss_scale=2, train_wall=10, gb_free=2.8, wall=135764
2021-06-20 08:21:41 | INFO | train_inner | epoch 004: 2929 / 3002 loss=2.71, ppl=6.55, wps=5784.9, ups=0.09, wpb=64717, bsz=128, num_updates=11864, lr=9.99131e-05, gnorm=1.985, loss_scale=2, train_wall=11, gb_free=2.8, wall=135775
2021-06-20 08:21:52 | INFO | train_inner | epoch 004: 2930 / 3002 loss=2.578, ppl=5.97, wps=5765.3, ups=0.09, wpb=64778, bsz=128, num_updates=11865, lr=9.99131e-05, gnorm=1.981, loss_scale=2, train_wall=11, gb_free=2.8, wall=135786
2021-06-20 08:22:03 | INFO | train_inner | epoch 004: 2931 / 3002 loss=2.614, ppl=6.12, wps=5931.3, ups=0.09, wpb=64864, bsz=128, num_updates=11866, lr=9.99131e-05, gnorm=1.979, loss_scale=2, train_wall=10, gb_free=2.8, wall=135797
2021-06-20 08:22:14 | INFO | train_inner | epoch 004: 2932 / 3002 loss=2.557, ppl=5.89, wps=5831.1, ups=0.09, wpb=64817, bsz=128, num_updates=11867, lr=9.99131e-05, gnorm=2.025, loss_scale=2, train_wall=11, gb_free=2.8, wall=135808
2021-06-20 08:22:25 | INFO | train_inner | epoch 004: 2933 / 3002 loss=2.741, ppl=6.69, wps=5917.1, ups=0.09, wpb=64825, bsz=128, num_updates=11868, lr=9.9913e-05, gnorm=2.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=135819
2021-06-20 08:22:36 | INFO | train_inner | epoch 004: 2934 / 3002 loss=2.481, ppl=5.58, wps=5995.7, ups=0.09, wpb=64832, bsz=128, num_updates=11869, lr=9.9913e-05, gnorm=1.967, loss_scale=2, train_wall=10, gb_free=2.8, wall=135830
2021-06-20 08:22:47 | INFO | train_inner | epoch 004: 2935 / 3002 loss=2.577, ppl=5.97, wps=5733.3, ups=0.09, wpb=64792, bsz=128, num_updates=11870, lr=9.9913e-05, gnorm=1.992, loss_scale=2, train_wall=11, gb_free=2.8, wall=135841
2021-06-20 08:22:58 | INFO | train_inner | epoch 004: 2936 / 3002 loss=2.474, ppl=5.56, wps=6018.3, ups=0.09, wpb=64870, bsz=128, num_updates=11871, lr=9.9913e-05, gnorm=1.951, loss_scale=2, train_wall=10, gb_free=2.8, wall=135852
2021-06-20 08:23:09 | INFO | train_inner | epoch 004: 2937 / 3002 loss=2.517, ppl=5.72, wps=5955.4, ups=0.09, wpb=64837, bsz=128, num_updates=11872, lr=9.9913e-05, gnorm=1.986, loss_scale=2, train_wall=10, gb_free=2.8, wall=135863
2021-06-20 08:23:20 | INFO | train_inner | epoch 004: 2938 / 3002 loss=2.385, ppl=5.22, wps=5788.9, ups=0.09, wpb=64811, bsz=128, num_updates=11873, lr=9.9913e-05, gnorm=2.007, loss_scale=2, train_wall=11, gb_free=2.8, wall=135874
2021-06-20 08:23:31 | INFO | train_inner | epoch 004: 2939 / 3002 loss=2.624, ppl=6.17, wps=5845.9, ups=0.09, wpb=64825, bsz=128, num_updates=11874, lr=9.9913e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=135885
2021-06-20 08:23:42 | INFO | train_inner | epoch 004: 2940 / 3002 loss=2.642, ppl=6.24, wps=5848.8, ups=0.09, wpb=64829, bsz=128, num_updates=11875, lr=9.9913e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=135896
2021-06-20 08:23:53 | INFO | train_inner | epoch 004: 2941 / 3002 loss=2.649, ppl=6.27, wps=5792.2, ups=0.09, wpb=64740, bsz=128, num_updates=11876, lr=9.9913e-05, gnorm=1.959, loss_scale=2, train_wall=11, gb_free=2.8, wall=135908
2021-06-20 08:24:04 | INFO | train_inner | epoch 004: 2942 / 3002 loss=2.442, ppl=5.43, wps=5900.4, ups=0.09, wpb=64814, bsz=128, num_updates=11877, lr=9.9913e-05, gnorm=1.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=135919
2021-06-20 08:24:15 | INFO | train_inner | epoch 004: 2943 / 3002 loss=2.584, ppl=6, wps=5885.3, ups=0.09, wpb=64897, bsz=128, num_updates=11878, lr=9.9913e-05, gnorm=2.027, loss_scale=2, train_wall=11, gb_free=2.8, wall=135930
2021-06-20 08:24:26 | INFO | train_inner | epoch 004: 2944 / 3002 loss=2.514, ppl=5.71, wps=5838.4, ups=0.09, wpb=64779, bsz=128, num_updates=11879, lr=9.9913e-05, gnorm=6.123, loss_scale=2, train_wall=11, gb_free=2.8, wall=135941
2021-06-20 08:24:38 | INFO | train_inner | epoch 004: 2945 / 3002 loss=2.801, ppl=6.97, wps=5774, ups=0.09, wpb=64770, bsz=128, num_updates=11880, lr=9.9913e-05, gnorm=2.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=135952
2021-06-20 08:24:49 | INFO | train_inner | epoch 004: 2946 / 3002 loss=2.417, ppl=5.34, wps=5899.9, ups=0.09, wpb=64792, bsz=128, num_updates=11881, lr=9.99129e-05, gnorm=2.034, loss_scale=2, train_wall=11, gb_free=2.8, wall=135963
2021-06-20 08:25:00 | INFO | train_inner | epoch 004: 2947 / 3002 loss=2.578, ppl=5.97, wps=5915.9, ups=0.09, wpb=64778, bsz=128, num_updates=11882, lr=9.99129e-05, gnorm=2.35, loss_scale=2, train_wall=10, gb_free=2.8, wall=135974
2021-06-20 08:25:11 | INFO | train_inner | epoch 004: 2948 / 3002 loss=2.718, ppl=6.58, wps=5907.5, ups=0.09, wpb=64841, bsz=128, num_updates=11883, lr=9.99129e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=135985
2021-06-20 08:25:22 | INFO | train_inner | epoch 004: 2949 / 3002 loss=2.562, ppl=5.9, wps=5796.9, ups=0.09, wpb=64816, bsz=128, num_updates=11884, lr=9.99129e-05, gnorm=1.985, loss_scale=2, train_wall=11, gb_free=2.8, wall=135996
2021-06-20 08:25:33 | INFO | train_inner | epoch 004: 2950 / 3002 loss=2.491, ppl=5.62, wps=5819.1, ups=0.09, wpb=64844, bsz=128, num_updates=11885, lr=9.99129e-05, gnorm=1.928, loss_scale=2, train_wall=11, gb_free=2.8, wall=136007
2021-06-20 08:25:44 | INFO | train_inner | epoch 004: 2951 / 3002 loss=2.632, ppl=6.2, wps=5810.4, ups=0.09, wpb=64818, bsz=128, num_updates=11886, lr=9.99129e-05, gnorm=1.939, loss_scale=2, train_wall=11, gb_free=2.8, wall=136018
2021-06-20 08:25:55 | INFO | train_inner | epoch 004: 2952 / 3002 loss=2.575, ppl=5.96, wps=5861.4, ups=0.09, wpb=64810, bsz=128, num_updates=11887, lr=9.99129e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=136029
2021-06-20 08:26:06 | INFO | train_inner | epoch 004: 2953 / 3002 loss=2.645, ppl=6.25, wps=5910.4, ups=0.09, wpb=64863, bsz=128, num_updates=11888, lr=9.99129e-05, gnorm=2.097, loss_scale=2, train_wall=11, gb_free=2.8, wall=136040
2021-06-20 08:26:17 | INFO | train_inner | epoch 004: 2954 / 3002 loss=2.475, ppl=5.56, wps=5920.2, ups=0.09, wpb=64820, bsz=128, num_updates=11889, lr=9.99129e-05, gnorm=2.002, loss_scale=2, train_wall=10, gb_free=2.8, wall=136051
2021-06-20 08:26:28 | INFO | train_inner | epoch 004: 2955 / 3002 loss=2.643, ppl=6.25, wps=5841.6, ups=0.09, wpb=64845, bsz=128, num_updates=11890, lr=9.99129e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=136062
2021-06-20 08:26:39 | INFO | train_inner | epoch 004: 2956 / 3002 loss=2.509, ppl=5.69, wps=5846.3, ups=0.09, wpb=64881, bsz=128, num_updates=11891, lr=9.99129e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=136074
2021-06-20 08:26:50 | INFO | train_inner | epoch 004: 2957 / 3002 loss=2.608, ppl=6.1, wps=5974.2, ups=0.09, wpb=64789, bsz=128, num_updates=11892, lr=9.99129e-05, gnorm=1.991, loss_scale=2, train_wall=10, gb_free=2.8, wall=136084
2021-06-20 08:27:01 | INFO | train_inner | epoch 004: 2958 / 3002 loss=2.603, ppl=6.08, wps=5862.7, ups=0.09, wpb=64816, bsz=128, num_updates=11893, lr=9.99128e-05, gnorm=2.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=136095
2021-06-20 08:27:12 | INFO | train_inner | epoch 004: 2959 / 3002 loss=2.499, ppl=5.65, wps=5772.9, ups=0.09, wpb=64858, bsz=128, num_updates=11894, lr=9.99128e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=136107
2021-06-20 08:27:23 | INFO | train_inner | epoch 004: 2960 / 3002 loss=2.515, ppl=5.71, wps=5957.1, ups=0.09, wpb=64841, bsz=128, num_updates=11895, lr=9.99128e-05, gnorm=2.053, loss_scale=2, train_wall=10, gb_free=2.8, wall=136118
2021-06-20 08:27:34 | INFO | train_inner | epoch 004: 2961 / 3002 loss=2.328, ppl=5.02, wps=5830.1, ups=0.09, wpb=64844, bsz=128, num_updates=11896, lr=9.99128e-05, gnorm=1.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=136129
2021-06-20 08:27:45 | INFO | train_inner | epoch 004: 2962 / 3002 loss=2.626, ppl=6.17, wps=5870.2, ups=0.09, wpb=64899, bsz=128, num_updates=11897, lr=9.99128e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=136140
2021-06-20 08:27:56 | INFO | train_inner | epoch 004: 2963 / 3002 loss=2.543, ppl=5.83, wps=5895, ups=0.09, wpb=64788, bsz=128, num_updates=11898, lr=9.99128e-05, gnorm=1.955, loss_scale=2, train_wall=10, gb_free=2.8, wall=136151
2021-06-20 08:28:08 | INFO | train_inner | epoch 004: 2964 / 3002 loss=2.592, ppl=6.03, wps=5831.3, ups=0.09, wpb=64851, bsz=128, num_updates=11899, lr=9.99128e-05, gnorm=2.169, loss_scale=2, train_wall=11, gb_free=2.8, wall=136162
2021-06-20 08:28:18 | INFO | train_inner | epoch 004: 2965 / 3002 loss=2.634, ppl=6.21, wps=5935.9, ups=0.09, wpb=64934, bsz=128, num_updates=11900, lr=9.99128e-05, gnorm=1.993, loss_scale=2, train_wall=10, gb_free=2.8, wall=136173
2021-06-20 08:28:30 | INFO | train_inner | epoch 004: 2966 / 3002 loss=2.517, ppl=5.72, wps=5829.3, ups=0.09, wpb=64829, bsz=128, num_updates=11901, lr=9.99128e-05, gnorm=2.129, loss_scale=2, train_wall=11, gb_free=2.8, wall=136184
2021-06-20 08:28:41 | INFO | train_inner | epoch 004: 2967 / 3002 loss=2.539, ppl=5.81, wps=5820.2, ups=0.09, wpb=64903, bsz=128, num_updates=11902, lr=9.99128e-05, gnorm=1.972, loss_scale=2, train_wall=11, gb_free=2.8, wall=136195
2021-06-20 08:28:52 | INFO | train_inner | epoch 004: 2968 / 3002 loss=2.532, ppl=5.78, wps=5783.4, ups=0.09, wpb=64773, bsz=128, num_updates=11903, lr=9.99128e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=136206
2021-06-20 08:29:03 | INFO | train_inner | epoch 004: 2969 / 3002 loss=2.453, ppl=5.48, wps=5902.1, ups=0.09, wpb=64913, bsz=128, num_updates=11904, lr=9.99128e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=136217
2021-06-20 08:29:14 | INFO | train_inner | epoch 004: 2970 / 3002 loss=2.589, ppl=6.02, wps=5896.8, ups=0.09, wpb=64836, bsz=128, num_updates=11905, lr=9.99128e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=136228
2021-06-20 08:29:25 | INFO | train_inner | epoch 004: 2971 / 3002 loss=2.589, ppl=6.02, wps=5869.6, ups=0.09, wpb=64786, bsz=128, num_updates=11906, lr=9.99127e-05, gnorm=1.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=136239
2021-06-20 08:29:36 | INFO | train_inner | epoch 004: 2972 / 3002 loss=2.439, ppl=5.42, wps=5890.8, ups=0.09, wpb=64876, bsz=128, num_updates=11907, lr=9.99127e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=136250
2021-06-20 08:29:47 | INFO | train_inner | epoch 004: 2973 / 3002 loss=2.538, ppl=5.81, wps=6055.4, ups=0.09, wpb=64868, bsz=128, num_updates=11908, lr=9.99127e-05, gnorm=2.079, loss_scale=2, train_wall=10, gb_free=2.8, wall=136261
2021-06-20 08:29:58 | INFO | train_inner | epoch 004: 2974 / 3002 loss=2.475, ppl=5.56, wps=5934.6, ups=0.09, wpb=64850, bsz=128, num_updates=11909, lr=9.99127e-05, gnorm=1.977, loss_scale=2, train_wall=10, gb_free=2.8, wall=136272
2021-06-20 08:30:09 | INFO | train_inner | epoch 004: 2975 / 3002 loss=2.708, ppl=6.53, wps=5917.8, ups=0.09, wpb=64757, bsz=128, num_updates=11910, lr=9.99127e-05, gnorm=2.076, loss_scale=2, train_wall=10, gb_free=2.8, wall=136283
2021-06-20 08:30:20 | INFO | train_inner | epoch 004: 2976 / 3002 loss=2.549, ppl=5.85, wps=5930, ups=0.09, wpb=64799, bsz=128, num_updates=11911, lr=9.99127e-05, gnorm=2.064, loss_scale=2, train_wall=10, gb_free=2.8, wall=136294
2021-06-20 08:30:30 | INFO | train_inner | epoch 004: 2977 / 3002 loss=2.516, ppl=5.72, wps=5935.3, ups=0.09, wpb=64771, bsz=128, num_updates=11912, lr=9.99127e-05, gnorm=1.963, loss_scale=2, train_wall=10, gb_free=2.8, wall=136305
2021-06-20 08:30:42 | INFO | train_inner | epoch 004: 2978 / 3002 loss=2.678, ppl=6.4, wps=5754.1, ups=0.09, wpb=64832, bsz=128, num_updates=11913, lr=9.99127e-05, gnorm=1.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=136316
2021-06-20 08:30:53 | INFO | train_inner | epoch 004: 2979 / 3002 loss=2.593, ppl=6.03, wps=5848.1, ups=0.09, wpb=64799, bsz=128, num_updates=11914, lr=9.99127e-05, gnorm=2.157, loss_scale=2, train_wall=11, gb_free=2.8, wall=136327
2021-06-20 08:31:04 | INFO | train_inner | epoch 004: 2980 / 3002 loss=2.612, ppl=6.11, wps=5852, ups=0.09, wpb=64857, bsz=128, num_updates=11915, lr=9.99127e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=136338
2021-06-20 08:31:15 | INFO | train_inner | epoch 004: 2981 / 3002 loss=2.585, ppl=6, wps=5860.6, ups=0.09, wpb=64859, bsz=128, num_updates=11916, lr=9.99127e-05, gnorm=1.927, loss_scale=2, train_wall=11, gb_free=2.8, wall=136349
2021-06-20 08:31:26 | INFO | train_inner | epoch 004: 2982 / 3002 loss=2.478, ppl=5.57, wps=5847.3, ups=0.09, wpb=64793, bsz=128, num_updates=11917, lr=9.99127e-05, gnorm=2.042, loss_scale=2, train_wall=11, gb_free=2.8, wall=136360
2021-06-20 08:31:37 | INFO | train_inner | epoch 004: 2983 / 3002 loss=2.529, ppl=5.77, wps=5823.8, ups=0.09, wpb=64843, bsz=128, num_updates=11918, lr=9.99126e-05, gnorm=1.911, loss_scale=2, train_wall=11, gb_free=2.8, wall=136371
2021-06-20 08:31:48 | INFO | train_inner | epoch 004: 2984 / 3002 loss=2.451, ppl=5.47, wps=5888.5, ups=0.09, wpb=64900, bsz=128, num_updates=11919, lr=9.99126e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=136382
2021-06-20 08:31:59 | INFO | train_inner | epoch 004: 2985 / 3002 loss=2.61, ppl=6.11, wps=5804.9, ups=0.09, wpb=64782, bsz=128, num_updates=11920, lr=9.99126e-05, gnorm=2.031, loss_scale=2, train_wall=11, gb_free=2.8, wall=136394
2021-06-20 08:32:10 | INFO | train_inner | epoch 004: 2986 / 3002 loss=2.469, ppl=5.54, wps=5918.1, ups=0.09, wpb=64774, bsz=128, num_updates=11921, lr=9.99126e-05, gnorm=2.038, loss_scale=2, train_wall=10, gb_free=2.8, wall=136405
2021-06-20 08:32:22 | INFO | train_inner | epoch 004: 2987 / 3002 loss=2.596, ppl=6.04, wps=5738.8, ups=0.09, wpb=64773, bsz=128, num_updates=11922, lr=9.99126e-05, gnorm=2.252, loss_scale=2, train_wall=11, gb_free=2.8, wall=136416
2021-06-20 08:32:32 | INFO | train_inner | epoch 004: 2988 / 3002 loss=2.706, ppl=6.53, wps=5944.8, ups=0.09, wpb=64824, bsz=128, num_updates=11923, lr=9.99126e-05, gnorm=2.039, loss_scale=2, train_wall=10, gb_free=2.8, wall=136427
2021-06-20 08:32:43 | INFO | train_inner | epoch 004: 2989 / 3002 loss=2.608, ppl=6.1, wps=5894, ups=0.09, wpb=64878, bsz=128, num_updates=11924, lr=9.99126e-05, gnorm=2.062, loss_scale=2, train_wall=11, gb_free=2.8, wall=136438
2021-06-20 08:32:55 | INFO | train_inner | epoch 004: 2990 / 3002 loss=2.583, ppl=5.99, wps=5839.6, ups=0.09, wpb=64844, bsz=128, num_updates=11925, lr=9.99126e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=136449
2021-06-20 08:33:06 | INFO | train_inner | epoch 004: 2991 / 3002 loss=2.57, ppl=5.94, wps=5739, ups=0.09, wpb=64772, bsz=128, num_updates=11926, lr=9.99126e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=136460
2021-06-20 08:33:17 | INFO | train_inner | epoch 004: 2992 / 3002 loss=2.446, ppl=5.45, wps=5835.4, ups=0.09, wpb=64773, bsz=128, num_updates=11927, lr=9.99126e-05, gnorm=1.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=136471
2021-06-20 08:33:28 | INFO | train_inner | epoch 004: 2993 / 3002 loss=2.431, ppl=5.39, wps=5818.2, ups=0.09, wpb=64830, bsz=128, num_updates=11928, lr=9.99126e-05, gnorm=1.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=136482
2021-06-20 08:33:39 | INFO | train_inner | epoch 004: 2994 / 3002 loss=2.571, ppl=5.94, wps=5919.2, ups=0.09, wpb=64823, bsz=128, num_updates=11929, lr=9.99126e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=136493
2021-06-20 08:33:50 | INFO | train_inner | epoch 004: 2995 / 3002 loss=2.415, ppl=5.33, wps=5868.1, ups=0.09, wpb=64840, bsz=128, num_updates=11930, lr=9.99126e-05, gnorm=2.305, loss_scale=2, train_wall=11, gb_free=2.8, wall=136504
2021-06-20 08:34:01 | INFO | train_inner | epoch 004: 2996 / 3002 loss=2.555, ppl=5.88, wps=5921.5, ups=0.09, wpb=64798, bsz=128, num_updates=11931, lr=9.99125e-05, gnorm=2.065, loss_scale=2, train_wall=10, gb_free=2.8, wall=136515
2021-06-20 08:34:12 | INFO | train_inner | epoch 004: 2997 / 3002 loss=2.585, ppl=6, wps=5883.6, ups=0.09, wpb=64897, bsz=128, num_updates=11932, lr=9.99125e-05, gnorm=2.079, loss_scale=2, train_wall=11, gb_free=2.8, wall=136526
2021-06-20 08:34:23 | INFO | train_inner | epoch 004: 2998 / 3002 loss=2.469, ppl=5.54, wps=5887.8, ups=0.09, wpb=64854, bsz=128, num_updates=11933, lr=9.99125e-05, gnorm=2.15, loss_scale=2, train_wall=11, gb_free=2.8, wall=136537
2021-06-20 08:34:34 | INFO | train_inner | epoch 004: 2999 / 3002 loss=2.476, ppl=5.56, wps=5907.8, ups=0.09, wpb=64832, bsz=128, num_updates=11934, lr=9.99125e-05, gnorm=1.925, loss_scale=2, train_wall=11, gb_free=2.8, wall=136548
2021-06-20 08:34:45 | INFO | train_inner | epoch 004: 3000 / 3002 loss=2.497, ppl=5.64, wps=5837.1, ups=0.09, wpb=64749, bsz=128, num_updates=11935, lr=9.99125e-05, gnorm=1.949, loss_scale=2, train_wall=11, gb_free=2.8, wall=136559
2021-06-20 08:34:56 | INFO | train_inner | epoch 004: 3001 / 3002 loss=2.416, ppl=5.34, wps=5797.1, ups=0.09, wpb=64824, bsz=128, num_updates=11936, lr=9.99125e-05, gnorm=1.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=136571
2021-06-20 08:35:03 | INFO | train_inner | epoch 004: 3002 / 3002 loss=2.395, ppl=5.26, wps=5755.2, ups=0.16, wpb=36396, bsz=72, num_updates=11937, lr=9.99125e-05, gnorm=2.61, loss_scale=2, train_wall=6, gb_free=2.8, wall=136577
2021-06-20 08:35:03 | INFO | fairseq_cli.train | begin validation on "valid" subset
2021-06-20 08:49:54 | INFO | valid | epoch 004 | valid on 'valid' subset | loss 2.405 | ppl 5.3 | wps 19798.9 | wpb 506.5 | bsz 1 | num_updates 11937 | best_loss 2.405
2021-06-20 08:49:54 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 4 @ 11937 updates
2021-06-20 08:49:54 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint4.pt
2021-06-20 08:50:06 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint4.pt
2021-06-20 08:56:00 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint4.pt (epoch 4 @ 11937 updates, score 2.405) (writing took 366.0893832309812 seconds)
2021-06-20 08:56:05 | INFO | fairseq_cli.train | end of epoch 4 (average epoch stats below)
2021-06-20 08:56:05 | INFO | train | epoch 004 | loss 2.584 | ppl 6 | wps 5614.7 | ups 0.09 | wpb 64819.4 | bsz 128 | num_updates 11937 | lr 9.99125e-05 | gnorm 2.584 | loss_scale 2 | train_wall 31853 | gb_free 2.8 | wall 137839
2021-06-20 08:56:05 | INFO | fairseq.trainer | begin training epoch 5
2021-06-20 08:56:05 | INFO | fairseq_cli.train | Start iterating over samples
2021-06-20 08:56:16 | INFO | train_inner | epoch 005: 1 / 3002 loss=2.48, ppl=5.58, wps=50.9, ups=0, wpb=64737, bsz=128, num_updates=11938, lr=9.99125e-05, gnorm=2.045, loss_scale=2, train_wall=10, gb_free=2.8, wall=137850
2021-06-20 08:56:26 | INFO | train_inner | epoch 005: 2 / 3002 loss=2.668, ppl=6.35, wps=6195.2, ups=0.1, wpb=64796, bsz=128, num_updates=11939, lr=9.99125e-05, gnorm=2.046, loss_scale=2, train_wall=10, gb_free=2.8, wall=137861
2021-06-20 08:56:37 | INFO | train_inner | epoch 005: 3 / 3002 loss=2.558, ppl=5.89, wps=6234.1, ups=0.1, wpb=64911, bsz=128, num_updates=11940, lr=9.99125e-05, gnorm=2.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=137871
2021-06-20 08:56:47 | INFO | train_inner | epoch 005: 4 / 3002 loss=2.412, ppl=5.32, wps=6313.1, ups=0.1, wpb=64855, bsz=128, num_updates=11941, lr=9.99125e-05, gnorm=1.938, loss_scale=2, train_wall=10, gb_free=2.8, wall=137881
2021-06-20 08:56:57 | INFO | train_inner | epoch 005: 5 / 3002 loss=2.434, ppl=5.4, wps=6257, ups=0.1, wpb=64869, bsz=128, num_updates=11942, lr=9.99125e-05, gnorm=1.996, loss_scale=2, train_wall=10, gb_free=2.8, wall=137892
2021-06-20 08:57:08 | INFO | train_inner | epoch 005: 6 / 3002 loss=2.461, ppl=5.5, wps=6194.2, ups=0.1, wpb=64805, bsz=128, num_updates=11943, lr=9.99124e-05, gnorm=2.074, loss_scale=2, train_wall=10, gb_free=2.8, wall=137902
2021-06-20 08:57:18 | INFO | train_inner | epoch 005: 7 / 3002 loss=2.403, ppl=5.29, wps=6071.5, ups=0.09, wpb=64854, bsz=128, num_updates=11944, lr=9.99124e-05, gnorm=2.019, loss_scale=2, train_wall=10, gb_free=2.8, wall=137913
2021-06-20 08:57:29 | INFO | train_inner | epoch 005: 8 / 3002 loss=2.508, ppl=5.69, wps=5988.3, ups=0.09, wpb=64883, bsz=128, num_updates=11945, lr=9.99124e-05, gnorm=2.024, loss_scale=2, train_wall=10, gb_free=2.8, wall=137924
2021-06-20 08:57:40 | INFO | train_inner | epoch 005: 9 / 3002 loss=2.441, ppl=5.43, wps=5876.4, ups=0.09, wpb=64864, bsz=128, num_updates=11946, lr=9.99124e-05, gnorm=1.975, loss_scale=2, train_wall=11, gb_free=2.8, wall=137935
2021-06-20 08:57:51 | INFO | train_inner | epoch 005: 10 / 3002 loss=2.49, ppl=5.62, wps=5908.4, ups=0.09, wpb=64797, bsz=128, num_updates=11947, lr=9.99124e-05, gnorm=1.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=137946
2021-06-20 08:58:02 | INFO | train_inner | epoch 005: 11 / 3002 loss=2.404, ppl=5.29, wps=5825.7, ups=0.09, wpb=64782, bsz=128, num_updates=11948, lr=9.99124e-05, gnorm=1.974, loss_scale=2, train_wall=11, gb_free=2.8, wall=137957
2021-06-20 08:58:13 | INFO | train_inner | epoch 005: 12 / 3002 loss=2.525, ppl=5.75, wps=6032, ups=0.09, wpb=64828, bsz=128, num_updates=11949, lr=9.99124e-05, gnorm=2.08, loss_scale=2, train_wall=10, gb_free=2.8, wall=137967
2021-06-20 08:58:24 | INFO | train_inner | epoch 005: 13 / 3002 loss=2.615, ppl=6.13, wps=5809.3, ups=0.09, wpb=64772, bsz=128, num_updates=11950, lr=9.99124e-05, gnorm=2.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=137979
2021-06-20 08:58:35 | INFO | train_inner | epoch 005: 14 / 3002 loss=2.355, ppl=5.12, wps=6020.8, ups=0.09, wpb=64855, bsz=128, num_updates=11951, lr=9.99124e-05, gnorm=1.997, loss_scale=2, train_wall=10, gb_free=2.8, wall=137989
2021-06-20 08:58:46 | INFO | train_inner | epoch 005: 15 / 3002 loss=2.475, ppl=5.56, wps=5949.7, ups=0.09, wpb=64878, bsz=128, num_updates=11952, lr=9.99124e-05, gnorm=1.986, loss_scale=2, train_wall=10, gb_free=2.8, wall=138000
2021-06-20 08:58:57 | INFO | train_inner | epoch 005: 16 / 3002 loss=2.464, ppl=5.52, wps=5989.8, ups=0.09, wpb=64852, bsz=128, num_updates=11953, lr=9.99124e-05, gnorm=2.262, loss_scale=2, train_wall=10, gb_free=2.8, wall=138011
2021-06-20 08:59:08 | INFO | train_inner | epoch 005: 17 / 3002 loss=2.456, ppl=5.49, wps=5756, ups=0.09, wpb=64848, bsz=128, num_updates=11954, lr=9.99124e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=138022
2021-06-20 08:59:19 | INFO | train_inner | epoch 005: 18 / 3002 loss=2.538, ppl=5.81, wps=5768, ups=0.09, wpb=64835, bsz=128, num_updates=11955, lr=9.99124e-05, gnorm=2.073, loss_scale=2, train_wall=11, gb_free=2.8, wall=138034
2021-06-20 08:59:30 | INFO | train_inner | epoch 005: 19 / 3002 loss=2.55, ppl=5.85, wps=5797.9, ups=0.09, wpb=64835, bsz=128, num_updates=11956, lr=9.99123e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=138045
2021-06-20 08:59:42 | INFO | train_inner | epoch 005: 20 / 3002 loss=2.534, ppl=5.79, wps=5725.5, ups=0.09, wpb=64891, bsz=128, num_updates=11957, lr=9.99123e-05, gnorm=2.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=138056
2021-06-20 08:59:53 | INFO | train_inner | epoch 005: 21 / 3002 loss=2.448, ppl=5.46, wps=5971.8, ups=0.09, wpb=64839, bsz=128, num_updates=11958, lr=9.99123e-05, gnorm=2.005, loss_scale=2, train_wall=10, gb_free=2.8, wall=138067
2021-06-20 09:00:04 | INFO | train_inner | epoch 005: 22 / 3002 loss=2.596, ppl=6.05, wps=5883.8, ups=0.09, wpb=64758, bsz=128, num_updates=11959, lr=9.99123e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=138078
2021-06-20 09:00:15 | INFO | train_inner | epoch 005: 23 / 3002 loss=2.709, ppl=6.54, wps=5773.8, ups=0.09, wpb=64829, bsz=128, num_updates=11960, lr=9.99123e-05, gnorm=2.105, loss_scale=2, train_wall=11, gb_free=2.8, wall=138089
2021-06-20 09:00:26 | INFO | train_inner | epoch 005: 24 / 3002 loss=2.598, ppl=6.06, wps=5852, ups=0.09, wpb=64810, bsz=128, num_updates=11961, lr=9.99123e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=138100
2021-06-20 09:00:37 | INFO | train_inner | epoch 005: 25 / 3002 loss=2.506, ppl=5.68, wps=5793.7, ups=0.09, wpb=64792, bsz=128, num_updates=11962, lr=9.99123e-05, gnorm=1.947, loss_scale=2, train_wall=11, gb_free=2.8, wall=138111
2021-06-20 09:00:48 | INFO | train_inner | epoch 005: 26 / 3002 loss=2.467, ppl=5.53, wps=5957.2, ups=0.09, wpb=64899, bsz=128, num_updates=11963, lr=9.99123e-05, gnorm=1.987, loss_scale=2, train_wall=10, gb_free=2.8, wall=138122
2021-06-20 09:00:59 | INFO | train_inner | epoch 005: 27 / 3002 loss=2.524, ppl=5.75, wps=5851.8, ups=0.09, wpb=64758, bsz=128, num_updates=11964, lr=9.99123e-05, gnorm=1.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=138133
2021-06-20 09:01:10 | INFO | train_inner | epoch 005: 28 / 3002 loss=2.587, ppl=6.01, wps=6031.5, ups=0.09, wpb=64783, bsz=128, num_updates=11965, lr=9.99123e-05, gnorm=2.062, loss_scale=2, train_wall=10, gb_free=2.8, wall=138144
2021-06-20 09:01:21 | INFO | train_inner | epoch 005: 29 / 3002 loss=2.442, ppl=5.43, wps=5986.3, ups=0.09, wpb=64850, bsz=128, num_updates=11966, lr=9.99123e-05, gnorm=2.009, loss_scale=2, train_wall=10, gb_free=2.8, wall=138155
2021-06-20 09:01:32 | INFO | train_inner | epoch 005: 30 / 3002 loss=2.419, ppl=5.35, wps=5868.7, ups=0.09, wpb=64785, bsz=128, num_updates=11967, lr=9.99123e-05, gnorm=1.981, loss_scale=2, train_wall=11, gb_free=2.8, wall=138166
2021-06-20 09:01:43 | INFO | train_inner | epoch 005: 31 / 3002 loss=2.598, ppl=6.05, wps=5778.6, ups=0.09, wpb=64777, bsz=128, num_updates=11968, lr=9.99122e-05, gnorm=2.063, loss_scale=2, train_wall=11, gb_free=2.8, wall=138177
2021-06-20 09:01:54 | INFO | train_inner | epoch 005: 32 / 3002 loss=2.401, ppl=5.28, wps=5763.2, ups=0.09, wpb=64844, bsz=128, num_updates=11969, lr=9.99122e-05, gnorm=2.17, loss_scale=2, train_wall=11, gb_free=2.8, wall=138189
2021-06-20 09:02:05 | INFO | train_inner | epoch 005: 33 / 3002 loss=2.624, ppl=6.17, wps=5827.2, ups=0.09, wpb=64781, bsz=128, num_updates=11970, lr=9.99122e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=138200
2021-06-20 09:02:16 | INFO | train_inner | epoch 005: 34 / 3002 loss=2.575, ppl=5.96, wps=5852.6, ups=0.09, wpb=64822, bsz=128, num_updates=11971, lr=9.99122e-05, gnorm=2.046, loss_scale=2, train_wall=11, gb_free=2.8, wall=138211
2021-06-20 09:02:27 | INFO | train_inner | epoch 005: 35 / 3002 loss=2.632, ppl=6.2, wps=5858.6, ups=0.09, wpb=64827, bsz=128, num_updates=11972, lr=9.99122e-05, gnorm=2.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=138222
2021-06-20 09:02:38 | INFO | train_inner | epoch 005: 36 / 3002 loss=2.489, ppl=5.61, wps=5902.8, ups=0.09, wpb=64793, bsz=128, num_updates=11973, lr=9.99122e-05, gnorm=1.922, loss_scale=2, train_wall=11, gb_free=2.8, wall=138233
2021-06-20 09:02:50 | INFO | train_inner | epoch 005: 37 / 3002 loss=2.503, ppl=5.67, wps=5828.2, ups=0.09, wpb=64842, bsz=128, num_updates=11974, lr=9.99122e-05, gnorm=1.951, loss_scale=2, train_wall=11, gb_free=2.8, wall=138244
2021-06-20 09:03:01 | INFO | train_inner | epoch 005: 38 / 3002 loss=2.44, ppl=5.43, wps=5896.8, ups=0.09, wpb=64769, bsz=128, num_updates=11975, lr=9.99122e-05, gnorm=2.137, loss_scale=2, train_wall=11, gb_free=2.8, wall=138255
2021-06-20 09:03:12 | INFO | train_inner | epoch 005: 39 / 3002 loss=2.615, ppl=6.13, wps=5769.7, ups=0.09, wpb=64805, bsz=128, num_updates=11976, lr=9.99122e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=138266
2021-06-20 09:03:23 | INFO | train_inner | epoch 005: 40 / 3002 loss=2.501, ppl=5.66, wps=5749.9, ups=0.09, wpb=64912, bsz=128, num_updates=11977, lr=9.99122e-05, gnorm=1.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=138277
2021-06-20 09:03:34 | INFO | train_inner | epoch 005: 41 / 3002 loss=2.56, ppl=5.9, wps=5889.2, ups=0.09, wpb=64841, bsz=128, num_updates=11978, lr=9.99122e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=138288
2021-06-20 09:03:45 | INFO | train_inner | epoch 005: 42 / 3002 loss=2.688, ppl=6.44, wps=5883.9, ups=0.09, wpb=64837, bsz=128, num_updates=11979, lr=9.99122e-05, gnorm=2.018, loss_scale=2, train_wall=11, gb_free=2.8, wall=138299
2021-06-20 09:03:56 | INFO | train_inner | epoch 005: 43 / 3002 loss=2.551, ppl=5.86, wps=5774.3, ups=0.09, wpb=64757, bsz=128, num_updates=11980, lr=9.99122e-05, gnorm=1.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=138311
2021-06-20 09:04:08 | INFO | train_inner | epoch 005: 44 / 3002 loss=2.469, ppl=5.54, wps=5775.4, ups=0.09, wpb=64880, bsz=128, num_updates=11981, lr=9.99121e-05, gnorm=2.003, loss_scale=2, train_wall=11, gb_free=2.8, wall=138322
2021-06-20 09:04:19 | INFO | train_inner | epoch 005: 45 / 3002 loss=2.514, ppl=5.71, wps=5739.4, ups=0.09, wpb=64824, bsz=128, num_updates=11982, lr=9.99121e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=138333
2021-06-20 09:04:30 | INFO | train_inner | epoch 005: 46 / 3002 loss=2.548, ppl=5.85, wps=5887.3, ups=0.09, wpb=64801, bsz=128, num_updates=11983, lr=9.99121e-05, gnorm=2.148, loss_scale=4, train_wall=11, gb_free=2.8, wall=138344
2021-06-20 09:04:41 | INFO | train_inner | epoch 005: 47 / 3002 loss=2.519, ppl=5.73, wps=5751.9, ups=0.09, wpb=64862, bsz=128, num_updates=11984, lr=9.99121e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=138355
2021-06-20 09:04:52 | INFO | train_inner | epoch 005: 48 / 3002 loss=2.635, ppl=6.21, wps=5803.9, ups=0.09, wpb=64770, bsz=128, num_updates=11985, lr=9.99121e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=138367
2021-06-20 09:05:03 | INFO | train_inner | epoch 005: 49 / 3002 loss=2.541, ppl=5.82, wps=5801.2, ups=0.09, wpb=64763, bsz=128, num_updates=11986, lr=9.99121e-05, gnorm=1.95, loss_scale=4, train_wall=11, gb_free=2.8, wall=138378
2021-06-20 09:05:15 | INFO | train_inner | epoch 005: 50 / 3002 loss=2.701, ppl=6.5, wps=5851.4, ups=0.09, wpb=64830, bsz=128, num_updates=11987, lr=9.99121e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=138389
2021-06-20 09:05:26 | INFO | train_inner | epoch 005: 51 / 3002 loss=2.41, ppl=5.31, wps=5894.2, ups=0.09, wpb=64925, bsz=128, num_updates=11988, lr=9.99121e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=138400
2021-06-20 09:05:37 | INFO | train_inner | epoch 005: 52 / 3002 loss=2.558, ppl=5.89, wps=5805.8, ups=0.09, wpb=64759, bsz=128, num_updates=11989, lr=9.99121e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=138411
2021-06-20 09:05:48 | INFO | train_inner | epoch 005: 53 / 3002 loss=2.407, ppl=5.31, wps=5838.2, ups=0.09, wpb=64805, bsz=128, num_updates=11990, lr=9.99121e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=138422
2021-06-20 09:05:59 | INFO | train_inner | epoch 005: 54 / 3002 loss=2.526, ppl=5.76, wps=5724.9, ups=0.09, wpb=64770, bsz=128, num_updates=11991, lr=9.99121e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=138433
2021-06-20 09:06:10 | INFO | train_inner | epoch 005: 55 / 3002 loss=2.489, ppl=5.61, wps=5859.4, ups=0.09, wpb=64817, bsz=128, num_updates=11992, lr=9.99121e-05, gnorm=2.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=138444
2021-06-20 09:06:21 | INFO | train_inner | epoch 005: 56 / 3002 loss=2.456, ppl=5.49, wps=5957.3, ups=0.09, wpb=64785, bsz=128, num_updates=11993, lr=9.9912e-05, gnorm=2.012, loss_scale=4, train_wall=10, gb_free=2.8, wall=138455
2021-06-20 09:06:32 | INFO | train_inner | epoch 005: 57 / 3002 loss=2.352, ppl=5.11, wps=5866.4, ups=0.09, wpb=64828, bsz=128, num_updates=11994, lr=9.9912e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=138466
2021-06-20 09:06:43 | INFO | train_inner | epoch 005: 58 / 3002 loss=2.552, ppl=5.86, wps=5939.1, ups=0.09, wpb=64777, bsz=128, num_updates=11995, lr=9.9912e-05, gnorm=1.943, loss_scale=4, train_wall=10, gb_free=2.8, wall=138477
2021-06-20 09:06:54 | INFO | train_inner | epoch 005: 59 / 3002 loss=2.609, ppl=6.1, wps=5775.3, ups=0.09, wpb=64826, bsz=128, num_updates=11996, lr=9.9912e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=138489
2021-06-20 09:07:05 | INFO | train_inner | epoch 005: 60 / 3002 loss=2.733, ppl=6.65, wps=5926.2, ups=0.09, wpb=64866, bsz=128, num_updates=11997, lr=9.9912e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=138499
2021-06-20 09:07:16 | INFO | train_inner | epoch 005: 61 / 3002 loss=2.557, ppl=5.89, wps=5981.4, ups=0.09, wpb=64842, bsz=128, num_updates=11998, lr=9.9912e-05, gnorm=2.001, loss_scale=4, train_wall=10, gb_free=2.8, wall=138510
2021-06-20 09:07:27 | INFO | train_inner | epoch 005: 62 / 3002 loss=2.493, ppl=5.63, wps=5886.4, ups=0.09, wpb=64850, bsz=128, num_updates=11999, lr=9.9912e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=138521
2021-06-20 09:07:38 | INFO | train_inner | epoch 005: 63 / 3002 loss=2.563, ppl=5.91, wps=5913.9, ups=0.09, wpb=64812, bsz=128, num_updates=12000, lr=9.9912e-05, gnorm=2.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=138532
2021-06-20 09:07:49 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-20 09:08:00 | INFO | train_inner | epoch 005: 65 / 3002 loss=2.482, ppl=5.59, wps=2927.1, ups=0.05, wpb=64859, bsz=128, num_updates=12001, lr=9.9912e-05, gnorm=2.001, loss_scale=2, train_wall=21, gb_free=2.8, wall=138554
2021-06-20 09:08:11 | INFO | train_inner | epoch 005: 66 / 3002 loss=2.491, ppl=5.62, wps=5838, ups=0.09, wpb=64806, bsz=128, num_updates=12002, lr=9.9912e-05, gnorm=1.924, loss_scale=2, train_wall=11, gb_free=2.8, wall=138566
2021-06-20 09:08:22 | INFO | train_inner | epoch 005: 67 / 3002 loss=2.548, ppl=5.85, wps=6005.4, ups=0.09, wpb=64828, bsz=128, num_updates=12003, lr=9.9912e-05, gnorm=2.048, loss_scale=2, train_wall=10, gb_free=2.8, wall=138576
2021-06-20 09:08:33 | INFO | train_inner | epoch 005: 68 / 3002 loss=2.617, ppl=6.13, wps=5785.6, ups=0.09, wpb=64788, bsz=128, num_updates=12004, lr=9.9912e-05, gnorm=2.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=138588
2021-06-20 09:08:44 | INFO | train_inner | epoch 005: 69 / 3002 loss=2.442, ppl=5.43, wps=5810.7, ups=0.09, wpb=64758, bsz=128, num_updates=12005, lr=9.9912e-05, gnorm=1.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=138599
2021-06-20 09:08:55 | INFO | train_inner | epoch 005: 70 / 3002 loss=2.461, ppl=5.51, wps=5896.5, ups=0.09, wpb=64870, bsz=128, num_updates=12006, lr=9.99119e-05, gnorm=1.929, loss_scale=2, train_wall=11, gb_free=2.8, wall=138610
2021-06-20 09:09:06 | INFO | train_inner | epoch 005: 71 / 3002 loss=2.589, ppl=6.02, wps=5857.7, ups=0.09, wpb=64860, bsz=128, num_updates=12007, lr=9.99119e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=138621
2021-06-20 09:09:18 | INFO | train_inner | epoch 005: 72 / 3002 loss=2.543, ppl=5.83, wps=5804.8, ups=0.09, wpb=64935, bsz=128, num_updates=12008, lr=9.99119e-05, gnorm=1.966, loss_scale=2, train_wall=11, gb_free=2.8, wall=138632
2021-06-20 09:09:29 | INFO | train_inner | epoch 005: 73 / 3002 loss=2.502, ppl=5.66, wps=5869.8, ups=0.09, wpb=64869, bsz=128, num_updates=12009, lr=9.99119e-05, gnorm=1.995, loss_scale=2, train_wall=11, gb_free=2.8, wall=138643
2021-06-20 09:09:40 | INFO | train_inner | epoch 005: 74 / 3002 loss=2.555, ppl=5.88, wps=5864.5, ups=0.09, wpb=64760, bsz=128, num_updates=12010, lr=9.99119e-05, gnorm=2.198, loss_scale=2, train_wall=11, gb_free=2.8, wall=138654
2021-06-20 09:09:51 | INFO | train_inner | epoch 005: 75 / 3002 loss=2.514, ppl=5.71, wps=5963.4, ups=0.09, wpb=64866, bsz=128, num_updates=12011, lr=9.99119e-05, gnorm=1.985, loss_scale=2, train_wall=10, gb_free=2.8, wall=138665
2021-06-20 09:10:02 | INFO | train_inner | epoch 005: 76 / 3002 loss=2.697, ppl=6.48, wps=5891.5, ups=0.09, wpb=64835, bsz=128, num_updates=12012, lr=9.99119e-05, gnorm=2.048, loss_scale=2, train_wall=11, gb_free=2.8, wall=138676
2021-06-20 09:10:13 | INFO | train_inner | epoch 005: 77 / 3002 loss=2.614, ppl=6.12, wps=5819.3, ups=0.09, wpb=64850, bsz=128, num_updates=12013, lr=9.99119e-05, gnorm=2.062, loss_scale=2, train_wall=11, gb_free=2.8, wall=138687
2021-06-20 09:10:24 | INFO | train_inner | epoch 005: 78 / 3002 loss=2.7, ppl=6.5, wps=5925.9, ups=0.09, wpb=64743, bsz=128, num_updates=12014, lr=9.99119e-05, gnorm=2.022, loss_scale=2, train_wall=10, gb_free=2.8, wall=138698
2021-06-20 09:10:35 | INFO | train_inner | epoch 005: 79 / 3002 loss=2.467, ppl=5.53, wps=5828.1, ups=0.09, wpb=64744, bsz=128, num_updates=12015, lr=9.99119e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=138709
2021-06-20 09:10:46 | INFO | train_inner | epoch 005: 80 / 3002 loss=2.431, ppl=5.39, wps=5837.5, ups=0.09, wpb=64817, bsz=128, num_updates=12016, lr=9.99119e-05, gnorm=1.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=138720
2021-06-20 09:10:57 | INFO | train_inner | epoch 005: 81 / 3002 loss=2.509, ppl=5.69, wps=5903.5, ups=0.09, wpb=64821, bsz=128, num_updates=12017, lr=9.99119e-05, gnorm=1.997, loss_scale=2, train_wall=11, gb_free=2.8, wall=138731
2021-06-20 09:11:08 | INFO | train_inner | epoch 005: 82 / 3002 loss=2.583, ppl=5.99, wps=5869.5, ups=0.09, wpb=64839, bsz=128, num_updates=12018, lr=9.99118e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=138742
2021-06-20 09:11:19 | INFO | train_inner | epoch 005: 83 / 3002 loss=2.374, ppl=5.19, wps=5849.1, ups=0.09, wpb=64828, bsz=128, num_updates=12019, lr=9.99118e-05, gnorm=1.992, loss_scale=2, train_wall=11, gb_free=2.8, wall=138753
2021-06-20 09:11:30 | INFO | train_inner | epoch 005: 84 / 3002 loss=2.539, ppl=5.81, wps=5848.5, ups=0.09, wpb=64939, bsz=128, num_updates=12020, lr=9.99118e-05, gnorm=2.118, loss_scale=2, train_wall=11, gb_free=2.8, wall=138764
2021-06-20 09:11:41 | INFO | train_inner | epoch 005: 85 / 3002 loss=2.618, ppl=6.14, wps=5851.8, ups=0.09, wpb=64869, bsz=128, num_updates=12021, lr=9.99118e-05, gnorm=4.312, loss_scale=2, train_wall=11, gb_free=2.8, wall=138776
2021-06-20 09:11:52 | INFO | train_inner | epoch 005: 86 / 3002 loss=2.576, ppl=5.96, wps=5800.7, ups=0.09, wpb=64845, bsz=128, num_updates=12022, lr=9.99118e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=138787
2021-06-20 09:12:04 | INFO | train_inner | epoch 005: 87 / 3002 loss=2.479, ppl=5.58, wps=5798.3, ups=0.09, wpb=64853, bsz=128, num_updates=12023, lr=9.99118e-05, gnorm=1.919, loss_scale=2, train_wall=11, gb_free=2.8, wall=138798
2021-06-20 09:12:15 | INFO | train_inner | epoch 005: 88 / 3002 loss=2.545, ppl=5.84, wps=5779, ups=0.09, wpb=64792, bsz=128, num_updates=12024, lr=9.99118e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=138809
2021-06-20 09:12:26 | INFO | train_inner | epoch 005: 89 / 3002 loss=2.624, ppl=6.17, wps=5908.3, ups=0.09, wpb=64817, bsz=128, num_updates=12025, lr=9.99118e-05, gnorm=2.763, loss_scale=2, train_wall=10, gb_free=2.8, wall=138820
2021-06-20 09:12:37 | INFO | train_inner | epoch 005: 90 / 3002 loss=2.479, ppl=5.58, wps=5901, ups=0.09, wpb=64842, bsz=128, num_updates=12026, lr=9.99118e-05, gnorm=2.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=138831
2021-06-20 09:12:48 | INFO | train_inner | epoch 005: 91 / 3002 loss=2.641, ppl=6.24, wps=5719.6, ups=0.09, wpb=64748, bsz=128, num_updates=12027, lr=9.99118e-05, gnorm=2.119, loss_scale=2, train_wall=11, gb_free=2.8, wall=138842
2021-06-20 09:12:59 | INFO | train_inner | epoch 005: 92 / 3002 loss=2.594, ppl=6.04, wps=5865.5, ups=0.09, wpb=64753, bsz=128, num_updates=12028, lr=9.99118e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=138853
2021-06-20 09:13:10 | INFO | train_inner | epoch 005: 93 / 3002 loss=2.527, ppl=5.76, wps=5970.4, ups=0.09, wpb=64871, bsz=128, num_updates=12029, lr=9.99118e-05, gnorm=2.017, loss_scale=2, train_wall=10, gb_free=2.8, wall=138864
2021-06-20 09:13:21 | INFO | train_inner | epoch 005: 94 / 3002 loss=2.51, ppl=5.7, wps=5768.1, ups=0.09, wpb=64833, bsz=128, num_updates=12030, lr=9.99118e-05, gnorm=1.952, loss_scale=2, train_wall=11, gb_free=2.8, wall=138876
2021-06-20 09:13:32 | INFO | train_inner | epoch 005: 95 / 3002 loss=2.575, ppl=5.96, wps=5897.5, ups=0.09, wpb=64824, bsz=128, num_updates=12031, lr=9.99117e-05, gnorm=2.055, loss_scale=2, train_wall=11, gb_free=2.8, wall=138887
2021-06-20 09:13:43 | INFO | train_inner | epoch 005: 96 / 3002 loss=2.537, ppl=5.8, wps=5811.9, ups=0.09, wpb=64753, bsz=128, num_updates=12032, lr=9.99117e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=138898
2021-06-20 09:13:55 | INFO | train_inner | epoch 005: 97 / 3002 loss=2.425, ppl=5.37, wps=5800.7, ups=0.09, wpb=64866, bsz=128, num_updates=12033, lr=9.99117e-05, gnorm=1.94, loss_scale=2, train_wall=11, gb_free=2.8, wall=138909
2021-06-20 09:14:06 | INFO | train_inner | epoch 005: 98 / 3002 loss=2.489, ppl=5.61, wps=5851.6, ups=0.09, wpb=64892, bsz=128, num_updates=12034, lr=9.99117e-05, gnorm=1.912, loss_scale=2, train_wall=11, gb_free=2.8, wall=138920
2021-06-20 09:14:17 | INFO | train_inner | epoch 005: 99 / 3002 loss=2.629, ppl=6.19, wps=5814.7, ups=0.09, wpb=64828, bsz=128, num_updates=12035, lr=9.99117e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=138931
2021-06-20 09:14:28 | INFO | train_inner | epoch 005: 100 / 3002 loss=2.485, ppl=5.6, wps=5839.6, ups=0.09, wpb=64850, bsz=128, num_updates=12036, lr=9.99117e-05, gnorm=1.998, loss_scale=2, train_wall=11, gb_free=2.8, wall=138942
2021-06-20 09:14:39 | INFO | train_inner | epoch 005: 101 / 3002 loss=2.397, ppl=5.27, wps=5844, ups=0.09, wpb=64846, bsz=128, num_updates=12037, lr=9.99117e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=138953
2021-06-20 09:14:50 | INFO | train_inner | epoch 005: 102 / 3002 loss=2.571, ppl=5.94, wps=5885.3, ups=0.09, wpb=64771, bsz=128, num_updates=12038, lr=9.99117e-05, gnorm=2.009, loss_scale=2, train_wall=11, gb_free=2.8, wall=138964
2021-06-20 09:15:01 | INFO | train_inner | epoch 005: 103 / 3002 loss=2.416, ppl=5.34, wps=5861.4, ups=0.09, wpb=64808, bsz=128, num_updates=12039, lr=9.99117e-05, gnorm=1.972, loss_scale=2, train_wall=11, gb_free=2.8, wall=138975
2021-06-20 09:15:12 | INFO | train_inner | epoch 005: 104 / 3002 loss=2.619, ppl=6.14, wps=5884, ups=0.09, wpb=64911, bsz=128, num_updates=12040, lr=9.99117e-05, gnorm=2.076, loss_scale=2, train_wall=11, gb_free=2.8, wall=138986
2021-06-20 09:15:23 | INFO | train_inner | epoch 005: 105 / 3002 loss=2.477, ppl=5.57, wps=5794.8, ups=0.09, wpb=64839, bsz=128, num_updates=12041, lr=9.99117e-05, gnorm=1.973, loss_scale=2, train_wall=11, gb_free=2.8, wall=138998
2021-06-20 09:15:35 | INFO | train_inner | epoch 005: 106 / 3002 loss=2.675, ppl=6.39, wps=5699.6, ups=0.09, wpb=64761, bsz=128, num_updates=12042, lr=9.99117e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=139009
2021-06-20 09:15:46 | INFO | train_inner | epoch 005: 107 / 3002 loss=2.591, ppl=6.03, wps=5866.4, ups=0.09, wpb=64770, bsz=128, num_updates=12043, lr=9.99116e-05, gnorm=1.987, loss_scale=2, train_wall=11, gb_free=2.8, wall=139020
2021-06-20 09:15:57 | INFO | train_inner | epoch 005: 108 / 3002 loss=2.482, ppl=5.59, wps=5807, ups=0.09, wpb=64825, bsz=128, num_updates=12044, lr=9.99116e-05, gnorm=1.937, loss_scale=2, train_wall=11, gb_free=2.8, wall=139031
2021-06-20 09:16:08 | INFO | train_inner | epoch 005: 109 / 3002 loss=2.491, ppl=5.62, wps=5806.2, ups=0.09, wpb=64809, bsz=128, num_updates=12045, lr=9.99116e-05, gnorm=1.949, loss_scale=2, train_wall=11, gb_free=2.8, wall=139042
2021-06-20 09:16:19 | INFO | train_inner | epoch 005: 110 / 3002 loss=2.558, ppl=5.89, wps=5831.7, ups=0.09, wpb=64812, bsz=128, num_updates=12046, lr=9.99116e-05, gnorm=2.012, loss_scale=2, train_wall=11, gb_free=2.8, wall=139053
2021-06-20 09:16:30 | INFO | train_inner | epoch 005: 111 / 3002 loss=2.509, ppl=5.69, wps=5935.5, ups=0.09, wpb=64796, bsz=128, num_updates=12047, lr=9.99116e-05, gnorm=2.013, loss_scale=2, train_wall=10, gb_free=2.8, wall=139064
2021-06-20 09:16:41 | INFO | train_inner | epoch 005: 112 / 3002 loss=2.449, ppl=5.46, wps=5903.1, ups=0.09, wpb=64852, bsz=128, num_updates=12048, lr=9.99116e-05, gnorm=1.997, loss_scale=2, train_wall=11, gb_free=2.8, wall=139075
2021-06-20 09:16:52 | INFO | train_inner | epoch 005: 113 / 3002 loss=2.44, ppl=5.43, wps=5856.3, ups=0.09, wpb=64813, bsz=128, num_updates=12049, lr=9.99116e-05, gnorm=2.015, loss_scale=2, train_wall=11, gb_free=2.8, wall=139086
2021-06-20 09:17:03 | INFO | train_inner | epoch 005: 114 / 3002 loss=2.541, ppl=5.82, wps=5877.7, ups=0.09, wpb=64913, bsz=128, num_updates=12050, lr=9.99116e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=139097
2021-06-20 09:17:14 | INFO | train_inner | epoch 005: 115 / 3002 loss=2.397, ppl=5.27, wps=5899.1, ups=0.09, wpb=64892, bsz=128, num_updates=12051, lr=9.99116e-05, gnorm=5.467, loss_scale=2, train_wall=11, gb_free=2.8, wall=139108
2021-06-20 09:17:25 | INFO | train_inner | epoch 005: 116 / 3002 loss=2.452, ppl=5.47, wps=5898.7, ups=0.09, wpb=64886, bsz=128, num_updates=12052, lr=9.99116e-05, gnorm=1.994, loss_scale=2, train_wall=11, gb_free=2.8, wall=139119
2021-06-20 09:17:36 | INFO | train_inner | epoch 005: 117 / 3002 loss=2.485, ppl=5.6, wps=5740.7, ups=0.09, wpb=64907, bsz=128, num_updates=12053, lr=9.99116e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=139131
2021-06-20 09:17:47 | INFO | train_inner | epoch 005: 118 / 3002 loss=2.406, ppl=5.3, wps=5859.6, ups=0.09, wpb=64822, bsz=128, num_updates=12054, lr=9.99116e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=139142
2021-06-20 09:17:59 | INFO | train_inner | epoch 005: 119 / 3002 loss=2.601, ppl=6.07, wps=5761.1, ups=0.09, wpb=64712, bsz=128, num_updates=12055, lr=9.99116e-05, gnorm=2.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=139153
2021-06-20 09:18:10 | INFO | train_inner | epoch 005: 120 / 3002 loss=2.59, ppl=6.02, wps=5749.2, ups=0.09, wpb=64795, bsz=128, num_updates=12056, lr=9.99115e-05, gnorm=2.144, loss_scale=2, train_wall=11, gb_free=2.8, wall=139164
2021-06-20 09:18:21 | INFO | train_inner | epoch 005: 121 / 3002 loss=2.625, ppl=6.17, wps=5857.5, ups=0.09, wpb=64742, bsz=128, num_updates=12057, lr=9.99115e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=139175
2021-06-20 09:18:32 | INFO | train_inner | epoch 005: 122 / 3002 loss=2.456, ppl=5.49, wps=5769.5, ups=0.09, wpb=64794, bsz=128, num_updates=12058, lr=9.99115e-05, gnorm=1.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=139187
2021-06-20 09:18:43 | INFO | train_inner | epoch 005: 123 / 3002 loss=2.636, ppl=6.22, wps=5910.6, ups=0.09, wpb=64835, bsz=128, num_updates=12059, lr=9.99115e-05, gnorm=1.974, loss_scale=2, train_wall=11, gb_free=2.8, wall=139198
2021-06-20 09:18:55 | INFO | train_inner | epoch 005: 124 / 3002 loss=2.624, ppl=6.16, wps=5715.7, ups=0.09, wpb=64790, bsz=128, num_updates=12060, lr=9.99115e-05, gnorm=15.192, loss_scale=2, train_wall=11, gb_free=2.8, wall=139209
2021-06-20 09:19:06 | INFO | train_inner | epoch 005: 125 / 3002 loss=2.529, ppl=5.77, wps=5900, ups=0.09, wpb=64846, bsz=128, num_updates=12061, lr=9.99115e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=139220
2021-06-20 09:19:17 | INFO | train_inner | epoch 005: 126 / 3002 loss=2.507, ppl=5.68, wps=5809.1, ups=0.09, wpb=64851, bsz=128, num_updates=12062, lr=9.99115e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=139231
2021-06-20 09:19:28 | INFO | train_inner | epoch 005: 127 / 3002 loss=2.502, ppl=5.66, wps=5817.8, ups=0.09, wpb=64802, bsz=128, num_updates=12063, lr=9.99115e-05, gnorm=2.034, loss_scale=2, train_wall=11, gb_free=2.8, wall=139242
2021-06-20 09:19:39 | INFO | train_inner | epoch 005: 128 / 3002 loss=2.595, ppl=6.04, wps=5967.6, ups=0.09, wpb=64829, bsz=128, num_updates=12064, lr=9.99115e-05, gnorm=2.085, loss_scale=2, train_wall=10, gb_free=2.8, wall=139253
2021-06-20 09:19:50 | INFO | train_inner | epoch 005: 129 / 3002 loss=2.518, ppl=5.73, wps=5724.8, ups=0.09, wpb=64831, bsz=128, num_updates=12065, lr=9.99115e-05, gnorm=2.22, loss_scale=2, train_wall=11, gb_free=2.8, wall=139264
2021-06-20 09:20:01 | INFO | train_inner | epoch 005: 130 / 3002 loss=2.427, ppl=5.38, wps=5807.5, ups=0.09, wpb=64819, bsz=128, num_updates=12066, lr=9.99115e-05, gnorm=2.05, loss_scale=2, train_wall=11, gb_free=2.8, wall=139276
2021-06-20 09:20:12 | INFO | train_inner | epoch 005: 131 / 3002 loss=2.478, ppl=5.57, wps=5965.3, ups=0.09, wpb=64822, bsz=128, num_updates=12067, lr=9.99115e-05, gnorm=2.137, loss_scale=2, train_wall=10, gb_free=2.8, wall=139286
2021-06-20 09:20:23 | INFO | train_inner | epoch 005: 132 / 3002 loss=2.449, ppl=5.46, wps=5831.9, ups=0.09, wpb=64890, bsz=128, num_updates=12068, lr=9.99114e-05, gnorm=2.026, loss_scale=2, train_wall=11, gb_free=2.8, wall=139298
2021-06-20 09:20:34 | INFO | train_inner | epoch 005: 133 / 3002 loss=2.532, ppl=5.78, wps=5803.2, ups=0.09, wpb=64784, bsz=128, num_updates=12069, lr=9.99114e-05, gnorm=2.012, loss_scale=2, train_wall=11, gb_free=2.8, wall=139309
2021-06-20 09:20:45 | INFO | train_inner | epoch 005: 134 / 3002 loss=2.537, ppl=5.8, wps=5849.3, ups=0.09, wpb=64876, bsz=128, num_updates=12070, lr=9.99114e-05, gnorm=1.97, loss_scale=2, train_wall=11, gb_free=2.8, wall=139320
2021-06-20 09:20:57 | INFO | train_inner | epoch 005: 135 / 3002 loss=2.448, ppl=5.46, wps=5856.5, ups=0.09, wpb=64865, bsz=128, num_updates=12071, lr=9.99114e-05, gnorm=1.996, loss_scale=2, train_wall=11, gb_free=2.8, wall=139331
2021-06-20 09:21:08 | INFO | train_inner | epoch 005: 136 / 3002 loss=2.399, ppl=5.27, wps=5886.5, ups=0.09, wpb=64846, bsz=128, num_updates=12072, lr=9.99114e-05, gnorm=1.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=139342
2021-06-20 09:21:19 | INFO | train_inner | epoch 005: 137 / 3002 loss=2.48, ppl=5.58, wps=5924.4, ups=0.09, wpb=64858, bsz=128, num_updates=12073, lr=9.99114e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=139353
2021-06-20 09:21:29 | INFO | train_inner | epoch 005: 138 / 3002 loss=2.467, ppl=5.53, wps=5931.7, ups=0.09, wpb=64859, bsz=128, num_updates=12074, lr=9.99114e-05, gnorm=1.945, loss_scale=2, train_wall=11, gb_free=2.8, wall=139364
2021-06-20 09:21:41 | INFO | train_inner | epoch 005: 139 / 3002 loss=2.462, ppl=5.51, wps=5826.6, ups=0.09, wpb=64788, bsz=128, num_updates=12075, lr=9.99114e-05, gnorm=1.955, loss_scale=2, train_wall=11, gb_free=2.8, wall=139375
2021-06-20 09:21:52 | INFO | train_inner | epoch 005: 140 / 3002 loss=2.509, ppl=5.69, wps=5755.2, ups=0.09, wpb=64833, bsz=128, num_updates=12076, lr=9.99114e-05, gnorm=2.363, loss_scale=2, train_wall=11, gb_free=2.8, wall=139386
2021-06-20 09:22:03 | INFO | train_inner | epoch 005: 141 / 3002 loss=2.447, ppl=5.45, wps=5843.8, ups=0.09, wpb=64869, bsz=128, num_updates=12077, lr=9.99114e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=139397
2021-06-20 09:22:14 | INFO | train_inner | epoch 005: 142 / 3002 loss=2.611, ppl=6.11, wps=5882.6, ups=0.09, wpb=64840, bsz=128, num_updates=12078, lr=9.99114e-05, gnorm=2.089, loss_scale=2, train_wall=11, gb_free=2.8, wall=139408
2021-06-20 09:22:25 | INFO | train_inner | epoch 005: 143 / 3002 loss=2.549, ppl=5.85, wps=5824.7, ups=0.09, wpb=64853, bsz=128, num_updates=12079, lr=9.99114e-05, gnorm=2.025, loss_scale=2, train_wall=11, gb_free=2.8, wall=139419
2021-06-20 09:22:36 | INFO | train_inner | epoch 005: 144 / 3002 loss=2.457, ppl=5.49, wps=5823.6, ups=0.09, wpb=64769, bsz=128, num_updates=12080, lr=9.99114e-05, gnorm=2.027, loss_scale=2, train_wall=11, gb_free=2.8, wall=139431
2021-06-20 09:22:47 | INFO | train_inner | epoch 005: 145 / 3002 loss=2.656, ppl=6.3, wps=5834.2, ups=0.09, wpb=64820, bsz=128, num_updates=12081, lr=9.99113e-05, gnorm=2.179, loss_scale=2, train_wall=11, gb_free=2.8, wall=139442
2021-06-20 09:22:58 | INFO | train_inner | epoch 005: 146 / 3002 loss=2.597, ppl=6.05, wps=5901.1, ups=0.09, wpb=64849, bsz=128, num_updates=12082, lr=9.99113e-05, gnorm=2.418, loss_scale=2, train_wall=11, gb_free=2.8, wall=139453
2021-06-20 09:23:09 | INFO | train_inner | epoch 005: 147 / 3002 loss=2.516, ppl=5.72, wps=6007.2, ups=0.09, wpb=64850, bsz=128, num_updates=12083, lr=9.99113e-05, gnorm=2.009, loss_scale=2, train_wall=10, gb_free=2.8, wall=139463
2021-06-20 09:23:20 | INFO | train_inner | epoch 005: 148 / 3002 loss=2.427, ppl=5.38, wps=5928.6, ups=0.09, wpb=64860, bsz=128, num_updates=12084, lr=9.99113e-05, gnorm=2.007, loss_scale=2, train_wall=10, gb_free=2.8, wall=139474
2021-06-20 09:23:31 | INFO | train_inner | epoch 005: 149 / 3002 loss=2.513, ppl=5.71, wps=5895.3, ups=0.09, wpb=64902, bsz=128, num_updates=12085, lr=9.99113e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=139485
2021-06-20 09:23:42 | INFO | train_inner | epoch 005: 150 / 3002 loss=2.494, ppl=5.63, wps=5720.6, ups=0.09, wpb=64775, bsz=128, num_updates=12086, lr=9.99113e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=139497
2021-06-20 09:23:53 | INFO | train_inner | epoch 005: 151 / 3002 loss=2.684, ppl=6.43, wps=5857.9, ups=0.09, wpb=64813, bsz=128, num_updates=12087, lr=9.99113e-05, gnorm=2.106, loss_scale=2, train_wall=11, gb_free=2.8, wall=139508
2021-06-20 09:24:05 | INFO | train_inner | epoch 005: 152 / 3002 loss=2.472, ppl=5.55, wps=5758.7, ups=0.09, wpb=64840, bsz=128, num_updates=12088, lr=9.99113e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=139519
2021-06-20 09:24:16 | INFO | train_inner | epoch 005: 153 / 3002 loss=2.286, ppl=4.88, wps=5677.2, ups=0.09, wpb=64780, bsz=128, num_updates=12089, lr=9.99113e-05, gnorm=1.956, loss_scale=2, train_wall=11, gb_free=2.8, wall=139530
2021-06-20 09:24:27 | INFO | train_inner | epoch 005: 154 / 3002 loss=2.529, ppl=5.77, wps=5773, ups=0.09, wpb=64831, bsz=128, num_updates=12090, lr=9.99113e-05, gnorm=2.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=139542
2021-06-20 09:24:38 | INFO | train_inner | epoch 005: 155 / 3002 loss=2.568, ppl=5.93, wps=5820.3, ups=0.09, wpb=64857, bsz=128, num_updates=12091, lr=9.99113e-05, gnorm=2.102, loss_scale=2, train_wall=11, gb_free=2.8, wall=139553
2021-06-20 09:24:49 | INFO | train_inner | epoch 005: 156 / 3002 loss=2.593, ppl=6.03, wps=5938.1, ups=0.09, wpb=64852, bsz=128, num_updates=12092, lr=9.99113e-05, gnorm=2.198, loss_scale=2, train_wall=10, gb_free=2.8, wall=139564
2021-06-20 09:25:01 | INFO | train_inner | epoch 005: 157 / 3002 loss=2.483, ppl=5.59, wps=5794.6, ups=0.09, wpb=64820, bsz=128, num_updates=12093, lr=9.99112e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=139575
2021-06-20 09:25:12 | INFO | train_inner | epoch 005: 158 / 3002 loss=2.647, ppl=6.26, wps=5751.5, ups=0.09, wpb=64860, bsz=128, num_updates=12094, lr=9.99112e-05, gnorm=5.268, loss_scale=2, train_wall=11, gb_free=2.8, wall=139586
2021-06-20 09:25:23 | INFO | train_inner | epoch 005: 159 / 3002 loss=2.615, ppl=6.12, wps=5800.9, ups=0.09, wpb=64810, bsz=128, num_updates=12095, lr=9.99112e-05, gnorm=2.104, loss_scale=2, train_wall=11, gb_free=2.8, wall=139597
2021-06-20 09:25:34 | INFO | train_inner | epoch 005: 160 / 3002 loss=2.491, ppl=5.62, wps=5779.5, ups=0.09, wpb=64783, bsz=128, num_updates=12096, lr=9.99112e-05, gnorm=1.998, loss_scale=2, train_wall=11, gb_free=2.8, wall=139609
2021-06-20 09:25:45 | INFO | train_inner | epoch 005: 161 / 3002 loss=2.417, ppl=5.34, wps=5844.8, ups=0.09, wpb=64776, bsz=128, num_updates=12097, lr=9.99112e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=139620
2021-06-20 09:25:56 | INFO | train_inner | epoch 005: 162 / 3002 loss=2.624, ppl=6.16, wps=5823.7, ups=0.09, wpb=64777, bsz=128, num_updates=12098, lr=9.99112e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=139631
2021-06-20 09:26:08 | INFO | train_inner | epoch 005: 163 / 3002 loss=2.448, ppl=5.46, wps=5794.7, ups=0.09, wpb=64791, bsz=128, num_updates=12099, lr=9.99112e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=139642
2021-06-20 09:26:19 | INFO | train_inner | epoch 005: 164 / 3002 loss=2.59, ppl=6.02, wps=5899.5, ups=0.09, wpb=64875, bsz=128, num_updates=12100, lr=9.99112e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=139653
2021-06-20 09:26:30 | INFO | train_inner | epoch 005: 165 / 3002 loss=2.44, ppl=5.43, wps=5841, ups=0.09, wpb=64758, bsz=128, num_updates=12101, lr=9.99112e-05, gnorm=2.12, loss_scale=2, train_wall=11, gb_free=2.8, wall=139664
2021-06-20 09:26:41 | INFO | train_inner | epoch 005: 166 / 3002 loss=2.462, ppl=5.51, wps=5750, ups=0.09, wpb=64868, bsz=128, num_updates=12102, lr=9.99112e-05, gnorm=2.286, loss_scale=2, train_wall=11, gb_free=2.8, wall=139675
2021-06-20 09:26:52 | INFO | train_inner | epoch 005: 167 / 3002 loss=2.477, ppl=5.57, wps=5808.6, ups=0.09, wpb=64874, bsz=128, num_updates=12103, lr=9.99112e-05, gnorm=2.101, loss_scale=2, train_wall=11, gb_free=2.8, wall=139687
2021-06-20 09:27:03 | INFO | train_inner | epoch 005: 168 / 3002 loss=2.541, ppl=5.82, wps=5817.6, ups=0.09, wpb=64827, bsz=128, num_updates=12104, lr=9.99112e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=139698
2021-06-20 09:27:14 | INFO | train_inner | epoch 005: 169 / 3002 loss=2.541, ppl=5.82, wps=5825.7, ups=0.09, wpb=64760, bsz=128, num_updates=12105, lr=9.99112e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=139709
2021-06-20 09:27:26 | INFO | train_inner | epoch 005: 170 / 3002 loss=2.709, ppl=6.54, wps=5784, ups=0.09, wpb=64790, bsz=128, num_updates=12106, lr=9.99111e-05, gnorm=2.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=139720
2021-06-20 09:27:37 | INFO | train_inner | epoch 005: 171 / 3002 loss=2.654, ppl=6.29, wps=5845.9, ups=0.09, wpb=64827, bsz=128, num_updates=12107, lr=9.99111e-05, gnorm=3.846, loss_scale=2, train_wall=11, gb_free=2.8, wall=139731
2021-06-20 09:27:48 | INFO | train_inner | epoch 005: 172 / 3002 loss=2.58, ppl=5.98, wps=5849.7, ups=0.09, wpb=64866, bsz=128, num_updates=12108, lr=9.99111e-05, gnorm=1.97, loss_scale=2, train_wall=11, gb_free=2.8, wall=139742
2021-06-20 09:27:59 | INFO | train_inner | epoch 005: 173 / 3002 loss=2.482, ppl=5.59, wps=5794.1, ups=0.09, wpb=64795, bsz=128, num_updates=12109, lr=9.99111e-05, gnorm=2.039, loss_scale=2, train_wall=11, gb_free=2.8, wall=139753
2021-06-20 09:28:10 | INFO | train_inner | epoch 005: 174 / 3002 loss=2.527, ppl=5.76, wps=5894.6, ups=0.09, wpb=64764, bsz=128, num_updates=12110, lr=9.99111e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=139764
2021-06-20 09:28:21 | INFO | train_inner | epoch 005: 175 / 3002 loss=2.607, ppl=6.09, wps=5939, ups=0.09, wpb=64864, bsz=128, num_updates=12111, lr=9.99111e-05, gnorm=2.008, loss_scale=2, train_wall=10, gb_free=2.8, wall=139775
2021-06-20 09:28:32 | INFO | train_inner | epoch 005: 176 / 3002 loss=2.52, ppl=5.73, wps=5736.4, ups=0.09, wpb=64813, bsz=128, num_updates=12112, lr=9.99111e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=139787
2021-06-20 09:28:43 | INFO | train_inner | epoch 005: 177 / 3002 loss=2.498, ppl=5.65, wps=5806.5, ups=0.09, wpb=64828, bsz=128, num_updates=12113, lr=9.99111e-05, gnorm=2.266, loss_scale=2, train_wall=11, gb_free=2.8, wall=139798
2021-06-20 09:28:54 | INFO | train_inner | epoch 005: 178 / 3002 loss=2.459, ppl=5.5, wps=5872.2, ups=0.09, wpb=64828, bsz=128, num_updates=12114, lr=9.99111e-05, gnorm=2.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=139809
2021-06-20 09:29:06 | INFO | train_inner | epoch 005: 179 / 3002 loss=2.544, ppl=5.83, wps=5719.6, ups=0.09, wpb=64844, bsz=128, num_updates=12115, lr=9.99111e-05, gnorm=2.024, loss_scale=2, train_wall=11, gb_free=2.8, wall=139820
2021-06-20 09:29:17 | INFO | train_inner | epoch 005: 180 / 3002 loss=2.577, ppl=5.97, wps=5821.2, ups=0.09, wpb=64730, bsz=128, num_updates=12116, lr=9.99111e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=139831
2021-06-20 09:29:28 | INFO | train_inner | epoch 005: 181 / 3002 loss=2.423, ppl=5.36, wps=5843.3, ups=0.09, wpb=64787, bsz=128, num_updates=12117, lr=9.99111e-05, gnorm=2.065, loss_scale=2, train_wall=11, gb_free=2.8, wall=139842
2021-06-20 09:29:39 | INFO | train_inner | epoch 005: 182 / 3002 loss=2.619, ppl=6.14, wps=5838.6, ups=0.09, wpb=64831, bsz=128, num_updates=12118, lr=9.9911e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=139853
2021-06-20 09:29:50 | INFO | train_inner | epoch 005: 183 / 3002 loss=2.455, ppl=5.48, wps=5874, ups=0.09, wpb=64898, bsz=128, num_updates=12119, lr=9.9911e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=139864
2021-06-20 09:30:01 | INFO | train_inner | epoch 005: 184 / 3002 loss=2.568, ppl=5.93, wps=5691.7, ups=0.09, wpb=64803, bsz=128, num_updates=12120, lr=9.9911e-05, gnorm=2.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=139876
2021-06-20 09:30:13 | INFO | train_inner | epoch 005: 185 / 3002 loss=2.437, ppl=5.42, wps=5837, ups=0.09, wpb=64916, bsz=128, num_updates=12121, lr=9.9911e-05, gnorm=2.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=139887
2021-06-20 09:30:24 | INFO | train_inner | epoch 005: 186 / 3002 loss=2.612, ppl=6.11, wps=5804.5, ups=0.09, wpb=64816, bsz=128, num_updates=12122, lr=9.9911e-05, gnorm=2.003, loss_scale=2, train_wall=11, gb_free=2.8, wall=139898
2021-06-20 09:30:35 | INFO | train_inner | epoch 005: 187 / 3002 loss=2.457, ppl=5.49, wps=5795.5, ups=0.09, wpb=64801, bsz=128, num_updates=12123, lr=9.9911e-05, gnorm=2.119, loss_scale=2, train_wall=11, gb_free=2.8, wall=139909
2021-06-20 09:30:46 | INFO | train_inner | epoch 005: 188 / 3002 loss=2.66, ppl=6.32, wps=5820.2, ups=0.09, wpb=64777, bsz=128, num_updates=12124, lr=9.9911e-05, gnorm=2.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=139920
2021-06-20 09:30:57 | INFO | train_inner | epoch 005: 189 / 3002 loss=2.456, ppl=5.49, wps=5826.7, ups=0.09, wpb=64879, bsz=128, num_updates=12125, lr=9.9911e-05, gnorm=2.053, loss_scale=2, train_wall=11, gb_free=2.8, wall=139932
2021-06-20 09:31:08 | INFO | train_inner | epoch 005: 190 / 3002 loss=2.589, ppl=6.02, wps=5796.3, ups=0.09, wpb=64739, bsz=128, num_updates=12126, lr=9.9911e-05, gnorm=2.275, loss_scale=2, train_wall=11, gb_free=2.8, wall=139943
2021-06-20 09:31:19 | INFO | train_inner | epoch 005: 191 / 3002 loss=2.616, ppl=6.13, wps=5907.9, ups=0.09, wpb=64816, bsz=128, num_updates=12127, lr=9.9911e-05, gnorm=2.094, loss_scale=2, train_wall=11, gb_free=2.8, wall=139954
2021-06-20 09:31:31 | INFO | train_inner | epoch 005: 192 / 3002 loss=2.502, ppl=5.67, wps=5811.8, ups=0.09, wpb=64826, bsz=128, num_updates=12128, lr=9.9911e-05, gnorm=2.053, loss_scale=4, train_wall=11, gb_free=2.8, wall=139965
2021-06-20 09:31:42 | INFO | train_inner | epoch 005: 193 / 3002 loss=2.532, ppl=5.78, wps=5855.8, ups=0.09, wpb=64839, bsz=128, num_updates=12129, lr=9.9911e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=139976
2021-06-20 09:31:53 | INFO | train_inner | epoch 005: 194 / 3002 loss=2.49, ppl=5.62, wps=5941.8, ups=0.09, wpb=64901, bsz=128, num_updates=12130, lr=9.9911e-05, gnorm=1.971, loss_scale=4, train_wall=10, gb_free=2.8, wall=139987
2021-06-20 09:32:04 | INFO | train_inner | epoch 005: 195 / 3002 loss=2.67, ppl=6.36, wps=5847.3, ups=0.09, wpb=64820, bsz=128, num_updates=12131, lr=9.99109e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=139998
2021-06-20 09:32:15 | INFO | train_inner | epoch 005: 196 / 3002 loss=2.529, ppl=5.77, wps=5870.2, ups=0.09, wpb=64892, bsz=128, num_updates=12132, lr=9.99109e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=140009
2021-06-20 09:32:26 | INFO | train_inner | epoch 005: 197 / 3002 loss=2.659, ppl=6.32, wps=5869.5, ups=0.09, wpb=64781, bsz=128, num_updates=12133, lr=9.99109e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=140020
2021-06-20 09:32:37 | INFO | train_inner | epoch 005: 198 / 3002 loss=2.423, ppl=5.36, wps=5876, ups=0.09, wpb=64806, bsz=128, num_updates=12134, lr=9.99109e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=140031
2021-06-20 09:32:48 | INFO | train_inner | epoch 005: 199 / 3002 loss=2.58, ppl=5.98, wps=5854.7, ups=0.09, wpb=64866, bsz=128, num_updates=12135, lr=9.99109e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=140042
2021-06-20 09:32:59 | INFO | train_inner | epoch 005: 200 / 3002 loss=2.507, ppl=5.68, wps=5936.1, ups=0.09, wpb=64813, bsz=128, num_updates=12136, lr=9.99109e-05, gnorm=1.906, loss_scale=4, train_wall=10, gb_free=2.8, wall=140053
2021-06-20 09:33:10 | INFO | train_inner | epoch 005: 201 / 3002 loss=2.483, ppl=5.59, wps=5862.8, ups=0.09, wpb=64857, bsz=128, num_updates=12137, lr=9.99109e-05, gnorm=4.701, loss_scale=4, train_wall=11, gb_free=2.8, wall=140064
2021-06-20 09:33:21 | INFO | train_inner | epoch 005: 202 / 3002 loss=2.53, ppl=5.77, wps=5856.8, ups=0.09, wpb=64822, bsz=128, num_updates=12138, lr=9.99109e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=140075
2021-06-20 09:33:32 | INFO | train_inner | epoch 005: 203 / 3002 loss=2.531, ppl=5.78, wps=5739.5, ups=0.09, wpb=64906, bsz=128, num_updates=12139, lr=9.99109e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=140086
2021-06-20 09:33:43 | INFO | train_inner | epoch 005: 204 / 3002 loss=2.606, ppl=6.09, wps=5840.2, ups=0.09, wpb=64833, bsz=128, num_updates=12140, lr=9.99109e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=140098
2021-06-20 09:33:54 | INFO | train_inner | epoch 005: 205 / 3002 loss=2.523, ppl=5.75, wps=5924.6, ups=0.09, wpb=64915, bsz=128, num_updates=12141, lr=9.99109e-05, gnorm=2.062, loss_scale=4, train_wall=10, gb_free=2.8, wall=140109
2021-06-20 09:34:05 | INFO | train_inner | epoch 005: 206 / 3002 loss=2.556, ppl=5.88, wps=5766.8, ups=0.09, wpb=64739, bsz=128, num_updates=12142, lr=9.99109e-05, gnorm=2.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=140120
2021-06-20 09:34:17 | INFO | train_inner | epoch 005: 207 / 3002 loss=2.441, ppl=5.43, wps=5808.9, ups=0.09, wpb=64807, bsz=128, num_updates=12143, lr=9.99108e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=140131
2021-06-20 09:34:28 | INFO | train_inner | epoch 005: 208 / 3002 loss=2.593, ppl=6.03, wps=5734.1, ups=0.09, wpb=64813, bsz=128, num_updates=12144, lr=9.99108e-05, gnorm=2.088, loss_scale=4, train_wall=11, gb_free=2.8, wall=140142
2021-06-20 09:34:39 | INFO | train_inner | epoch 005: 209 / 3002 loss=2.589, ppl=6.02, wps=5902.4, ups=0.09, wpb=64830, bsz=128, num_updates=12145, lr=9.99108e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=140153
2021-06-20 09:34:50 | INFO | train_inner | epoch 005: 210 / 3002 loss=2.534, ppl=5.79, wps=5964.6, ups=0.09, wpb=64853, bsz=128, num_updates=12146, lr=9.99108e-05, gnorm=1.942, loss_scale=4, train_wall=10, gb_free=2.8, wall=140164
2021-06-20 09:35:01 | INFO | train_inner | epoch 005: 211 / 3002 loss=2.524, ppl=5.75, wps=5830.3, ups=0.09, wpb=64753, bsz=128, num_updates=12147, lr=9.99108e-05, gnorm=1.968, loss_scale=4, train_wall=11, gb_free=2.8, wall=140175
2021-06-20 09:35:12 | INFO | train_inner | epoch 005: 212 / 3002 loss=2.387, ppl=5.23, wps=5910.7, ups=0.09, wpb=64777, bsz=128, num_updates=12148, lr=9.99108e-05, gnorm=2.698, loss_scale=4, train_wall=11, gb_free=2.8, wall=140186
2021-06-20 09:35:23 | INFO | train_inner | epoch 005: 213 / 3002 loss=2.626, ppl=6.17, wps=5869.3, ups=0.09, wpb=64860, bsz=128, num_updates=12149, lr=9.99108e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=140197
2021-06-20 09:35:34 | INFO | train_inner | epoch 005: 214 / 3002 loss=2.581, ppl=5.98, wps=5852.6, ups=0.09, wpb=64805, bsz=128, num_updates=12150, lr=9.99108e-05, gnorm=2.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=140208
2021-06-20 09:35:45 | INFO | train_inner | epoch 005: 215 / 3002 loss=2.513, ppl=5.71, wps=5701.3, ups=0.09, wpb=64808, bsz=128, num_updates=12151, lr=9.99108e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=140220
2021-06-20 09:35:57 | INFO | train_inner | epoch 005: 216 / 3002 loss=2.521, ppl=5.74, wps=5799.5, ups=0.09, wpb=64819, bsz=128, num_updates=12152, lr=9.99108e-05, gnorm=2.138, loss_scale=4, train_wall=11, gb_free=2.8, wall=140231
2021-06-20 09:36:07 | INFO | train_inner | epoch 005: 217 / 3002 loss=2.582, ppl=5.99, wps=5933.5, ups=0.09, wpb=64731, bsz=128, num_updates=12153, lr=9.99108e-05, gnorm=2.04, loss_scale=4, train_wall=10, gb_free=2.8, wall=140242
2021-06-20 09:36:18 | INFO | train_inner | epoch 005: 218 / 3002 loss=2.433, ppl=5.4, wps=5850.9, ups=0.09, wpb=64875, bsz=128, num_updates=12154, lr=9.99108e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=140253
2021-06-20 09:36:30 | INFO | train_inner | epoch 005: 219 / 3002 loss=2.544, ppl=5.83, wps=5893.6, ups=0.09, wpb=64963, bsz=128, num_updates=12155, lr=9.99108e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=140264
2021-06-20 09:36:41 | INFO | train_inner | epoch 005: 220 / 3002 loss=2.561, ppl=5.9, wps=5904, ups=0.09, wpb=64898, bsz=128, num_updates=12156, lr=9.99107e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=140275
2021-06-20 09:36:51 | INFO | train_inner | epoch 005: 221 / 3002 loss=2.509, ppl=5.69, wps=5912.8, ups=0.09, wpb=64769, bsz=128, num_updates=12157, lr=9.99107e-05, gnorm=2.001, loss_scale=4, train_wall=10, gb_free=2.8, wall=140286
2021-06-20 09:37:03 | INFO | train_inner | epoch 005: 222 / 3002 loss=2.401, ppl=5.28, wps=5776.2, ups=0.09, wpb=64860, bsz=128, num_updates=12158, lr=9.99107e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=140297
2021-06-20 09:37:14 | INFO | train_inner | epoch 005: 223 / 3002 loss=2.634, ppl=6.21, wps=5749.8, ups=0.09, wpb=64866, bsz=128, num_updates=12159, lr=9.99107e-05, gnorm=2.031, loss_scale=4, train_wall=11, gb_free=2.8, wall=140308
2021-06-20 09:37:25 | INFO | train_inner | epoch 005: 224 / 3002 loss=2.465, ppl=5.52, wps=5892.1, ups=0.09, wpb=64856, bsz=128, num_updates=12160, lr=9.99107e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=140319
2021-06-20 09:37:36 | INFO | train_inner | epoch 005: 225 / 3002 loss=2.488, ppl=5.61, wps=5788.2, ups=0.09, wpb=64802, bsz=128, num_updates=12161, lr=9.99107e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=140331
2021-06-20 09:37:47 | INFO | train_inner | epoch 005: 226 / 3002 loss=2.551, ppl=5.86, wps=5811.2, ups=0.09, wpb=64835, bsz=128, num_updates=12162, lr=9.99107e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=140342
2021-06-20 09:37:59 | INFO | train_inner | epoch 005: 227 / 3002 loss=2.56, ppl=5.9, wps=5777.7, ups=0.09, wpb=64812, bsz=128, num_updates=12163, lr=9.99107e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=140353
2021-06-20 09:38:09 | INFO | train_inner | epoch 005: 228 / 3002 loss=2.526, ppl=5.76, wps=5943.8, ups=0.09, wpb=64835, bsz=128, num_updates=12164, lr=9.99107e-05, gnorm=2.112, loss_scale=4, train_wall=10, gb_free=2.8, wall=140364
2021-06-20 09:38:21 | INFO | train_inner | epoch 005: 229 / 3002 loss=2.681, ppl=6.41, wps=5747.5, ups=0.09, wpb=64834, bsz=128, num_updates=12165, lr=9.99107e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=140375
2021-06-20 09:38:32 | INFO | train_inner | epoch 005: 230 / 3002 loss=2.578, ppl=5.97, wps=5898.5, ups=0.09, wpb=64857, bsz=128, num_updates=12166, lr=9.99107e-05, gnorm=2.065, loss_scale=4, train_wall=11, gb_free=2.8, wall=140386
2021-06-20 09:38:43 | INFO | train_inner | epoch 005: 231 / 3002 loss=2.416, ppl=5.34, wps=5854.5, ups=0.09, wpb=64794, bsz=128, num_updates=12167, lr=9.99107e-05, gnorm=1.906, loss_scale=4, train_wall=11, gb_free=2.8, wall=140397
2021-06-20 09:38:54 | INFO | train_inner | epoch 005: 232 / 3002 loss=2.635, ppl=6.21, wps=5836.9, ups=0.09, wpb=64759, bsz=128, num_updates=12168, lr=9.99106e-05, gnorm=2.226, loss_scale=4, train_wall=11, gb_free=2.8, wall=140408
2021-06-20 09:39:05 | INFO | train_inner | epoch 005: 233 / 3002 loss=2.418, ppl=5.34, wps=5857.8, ups=0.09, wpb=64936, bsz=128, num_updates=12169, lr=9.99106e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=140419
2021-06-20 09:39:16 | INFO | train_inner | epoch 005: 234 / 3002 loss=2.561, ppl=5.9, wps=5896.4, ups=0.09, wpb=64787, bsz=128, num_updates=12170, lr=9.99106e-05, gnorm=2.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=140430
2021-06-20 09:39:27 | INFO | train_inner | epoch 005: 235 / 3002 loss=2.597, ppl=6.05, wps=5903.6, ups=0.09, wpb=64813, bsz=128, num_updates=12171, lr=9.99106e-05, gnorm=2.059, loss_scale=4, train_wall=10, gb_free=2.8, wall=140441
2021-06-20 09:39:38 | INFO | train_inner | epoch 005: 236 / 3002 loss=2.509, ppl=5.69, wps=5872.2, ups=0.09, wpb=64835, bsz=128, num_updates=12172, lr=9.99106e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=140452
2021-06-20 09:39:49 | INFO | train_inner | epoch 005: 237 / 3002 loss=2.487, ppl=5.61, wps=5793.3, ups=0.09, wpb=64814, bsz=128, num_updates=12173, lr=9.99106e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=140464
2021-06-20 09:40:00 | INFO | train_inner | epoch 005: 238 / 3002 loss=2.412, ppl=5.32, wps=6040.2, ups=0.09, wpb=64923, bsz=128, num_updates=12174, lr=9.99106e-05, gnorm=2.145, loss_scale=4, train_wall=10, gb_free=2.8, wall=140474
2021-06-20 09:40:11 | INFO | train_inner | epoch 005: 239 / 3002 loss=2.539, ppl=5.81, wps=5874.7, ups=0.09, wpb=64893, bsz=128, num_updates=12175, lr=9.99106e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=140485
2021-06-20 09:40:22 | INFO | train_inner | epoch 005: 240 / 3002 loss=2.428, ppl=5.38, wps=5955.9, ups=0.09, wpb=64869, bsz=128, num_updates=12176, lr=9.99106e-05, gnorm=2.217, loss_scale=4, train_wall=10, gb_free=2.8, wall=140496
2021-06-20 09:40:33 | INFO | train_inner | epoch 005: 241 / 3002 loss=2.477, ppl=5.57, wps=5765.7, ups=0.09, wpb=64708, bsz=128, num_updates=12177, lr=9.99106e-05, gnorm=2.089, loss_scale=4, train_wall=11, gb_free=2.8, wall=140507
2021-06-20 09:40:44 | INFO | train_inner | epoch 005: 242 / 3002 loss=2.598, ppl=6.06, wps=5943, ups=0.09, wpb=64792, bsz=128, num_updates=12178, lr=9.99106e-05, gnorm=1.935, loss_scale=4, train_wall=10, gb_free=2.8, wall=140518
2021-06-20 09:40:55 | INFO | train_inner | epoch 005: 243 / 3002 loss=2.512, ppl=5.7, wps=5899.4, ups=0.09, wpb=64822, bsz=128, num_updates=12179, lr=9.99106e-05, gnorm=5.656, loss_scale=4, train_wall=11, gb_free=2.8, wall=140529
2021-06-20 09:41:06 | INFO | train_inner | epoch 005: 244 / 3002 loss=2.459, ppl=5.5, wps=5778, ups=0.09, wpb=64836, bsz=128, num_updates=12180, lr=9.99106e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=140541
2021-06-20 09:41:17 | INFO | train_inner | epoch 005: 245 / 3002 loss=2.642, ppl=6.24, wps=5851.5, ups=0.09, wpb=64886, bsz=128, num_updates=12181, lr=9.99105e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=140552
2021-06-20 09:41:28 | INFO | train_inner | epoch 005: 246 / 3002 loss=2.678, ppl=6.4, wps=5833.7, ups=0.09, wpb=64827, bsz=128, num_updates=12182, lr=9.99105e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=140563
2021-06-20 09:41:39 | INFO | train_inner | epoch 005: 247 / 3002 loss=2.638, ppl=6.23, wps=5884.1, ups=0.09, wpb=64794, bsz=128, num_updates=12183, lr=9.99105e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=140574
2021-06-20 09:41:50 | INFO | train_inner | epoch 005: 248 / 3002 loss=2.707, ppl=6.53, wps=5964.6, ups=0.09, wpb=64768, bsz=128, num_updates=12184, lr=9.99105e-05, gnorm=1.994, loss_scale=4, train_wall=10, gb_free=2.8, wall=140585
2021-06-20 09:42:01 | INFO | train_inner | epoch 005: 249 / 3002 loss=2.496, ppl=5.64, wps=5897.5, ups=0.09, wpb=64905, bsz=128, num_updates=12185, lr=9.99105e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=140596
2021-06-20 09:42:13 | INFO | train_inner | epoch 005: 250 / 3002 loss=2.451, ppl=5.47, wps=5731.7, ups=0.09, wpb=64793, bsz=128, num_updates=12186, lr=9.99105e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=140607
2021-06-20 09:42:23 | INFO | train_inner | epoch 005: 251 / 3002 loss=2.555, ppl=5.88, wps=5978.8, ups=0.09, wpb=64874, bsz=128, num_updates=12187, lr=9.99105e-05, gnorm=2.128, loss_scale=4, train_wall=10, gb_free=2.8, wall=140618
2021-06-20 09:42:35 | INFO | train_inner | epoch 005: 252 / 3002 loss=2.518, ppl=5.73, wps=5825.8, ups=0.09, wpb=64798, bsz=128, num_updates=12188, lr=9.99105e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=140629
2021-06-20 09:42:46 | INFO | train_inner | epoch 005: 253 / 3002 loss=2.723, ppl=6.6, wps=5878.8, ups=0.09, wpb=64858, bsz=128, num_updates=12189, lr=9.99105e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=140640
2021-06-20 09:42:57 | INFO | train_inner | epoch 005: 254 / 3002 loss=2.646, ppl=6.26, wps=5913.5, ups=0.09, wpb=64750, bsz=128, num_updates=12190, lr=9.99105e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=140651
2021-06-20 09:43:08 | INFO | train_inner | epoch 005: 255 / 3002 loss=2.504, ppl=5.67, wps=5849.2, ups=0.09, wpb=64794, bsz=128, num_updates=12191, lr=9.99105e-05, gnorm=1.936, loss_scale=4, train_wall=11, gb_free=2.8, wall=140662
2021-06-20 09:43:19 | INFO | train_inner | epoch 005: 256 / 3002 loss=2.473, ppl=5.55, wps=5811.8, ups=0.09, wpb=64840, bsz=128, num_updates=12192, lr=9.99105e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=140673
2021-06-20 09:43:30 | INFO | train_inner | epoch 005: 257 / 3002 loss=2.701, ppl=6.5, wps=5814.6, ups=0.09, wpb=64873, bsz=128, num_updates=12193, lr=9.99104e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=140684
2021-06-20 09:43:41 | INFO | train_inner | epoch 005: 258 / 3002 loss=2.671, ppl=6.37, wps=5780.6, ups=0.09, wpb=64779, bsz=128, num_updates=12194, lr=9.99104e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=140695
2021-06-20 09:43:52 | INFO | train_inner | epoch 005: 259 / 3002 loss=2.536, ppl=5.8, wps=5930.4, ups=0.09, wpb=64847, bsz=128, num_updates=12195, lr=9.99104e-05, gnorm=1.99, loss_scale=4, train_wall=10, gb_free=2.8, wall=140706
2021-06-20 09:44:03 | INFO | train_inner | epoch 005: 260 / 3002 loss=2.462, ppl=5.51, wps=5782.4, ups=0.09, wpb=64819, bsz=128, num_updates=12196, lr=9.99104e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=140718
2021-06-20 09:44:14 | INFO | train_inner | epoch 005: 261 / 3002 loss=2.428, ppl=5.38, wps=5821.9, ups=0.09, wpb=64790, bsz=128, num_updates=12197, lr=9.99104e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=140729
2021-06-20 09:44:25 | INFO | train_inner | epoch 005: 262 / 3002 loss=2.479, ppl=5.57, wps=5860.1, ups=0.09, wpb=64788, bsz=128, num_updates=12198, lr=9.99104e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=140740
2021-06-20 09:44:37 | INFO | train_inner | epoch 005: 263 / 3002 loss=2.664, ppl=6.34, wps=5844.2, ups=0.09, wpb=64792, bsz=128, num_updates=12199, lr=9.99104e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=140751
2021-06-20 09:44:48 | INFO | train_inner | epoch 005: 264 / 3002 loss=2.505, ppl=5.68, wps=5851.4, ups=0.09, wpb=64823, bsz=128, num_updates=12200, lr=9.99104e-05, gnorm=2.386, loss_scale=4, train_wall=11, gb_free=2.8, wall=140762
2021-06-20 09:44:59 | INFO | train_inner | epoch 005: 265 / 3002 loss=2.511, ppl=5.7, wps=5884.3, ups=0.09, wpb=64735, bsz=128, num_updates=12201, lr=9.99104e-05, gnorm=2.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=140773
2021-06-20 09:45:10 | INFO | train_inner | epoch 005: 266 / 3002 loss=2.476, ppl=5.56, wps=5953, ups=0.09, wpb=64883, bsz=128, num_updates=12202, lr=9.99104e-05, gnorm=1.962, loss_scale=4, train_wall=10, gb_free=2.8, wall=140784
2021-06-20 09:45:21 | INFO | train_inner | epoch 005: 267 / 3002 loss=2.485, ppl=5.6, wps=5771.9, ups=0.09, wpb=64750, bsz=128, num_updates=12203, lr=9.99104e-05, gnorm=2.821, loss_scale=4, train_wall=11, gb_free=2.8, wall=140795
2021-06-20 09:45:32 | INFO | train_inner | epoch 005: 268 / 3002 loss=2.532, ppl=5.79, wps=5999, ups=0.09, wpb=64842, bsz=128, num_updates=12204, lr=9.99104e-05, gnorm=2.097, loss_scale=4, train_wall=10, gb_free=2.8, wall=140806
2021-06-20 09:45:43 | INFO | train_inner | epoch 005: 269 / 3002 loss=2.48, ppl=5.58, wps=5838.6, ups=0.09, wpb=64882, bsz=128, num_updates=12205, lr=9.99104e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=140817
2021-06-20 09:45:54 | INFO | train_inner | epoch 005: 270 / 3002 loss=2.576, ppl=5.96, wps=5775.7, ups=0.09, wpb=64795, bsz=128, num_updates=12206, lr=9.99103e-05, gnorm=2.183, loss_scale=4, train_wall=11, gb_free=2.8, wall=140828
2021-06-20 09:46:05 | INFO | train_inner | epoch 005: 271 / 3002 loss=2.537, ppl=5.8, wps=5885.5, ups=0.09, wpb=64851, bsz=128, num_updates=12207, lr=9.99103e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=140839
2021-06-20 09:46:16 | INFO | train_inner | epoch 005: 272 / 3002 loss=2.67, ppl=6.37, wps=5801.7, ups=0.09, wpb=64794, bsz=128, num_updates=12208, lr=9.99103e-05, gnorm=3.224, loss_scale=4, train_wall=11, gb_free=2.8, wall=140850
2021-06-20 09:46:27 | INFO | train_inner | epoch 005: 273 / 3002 loss=2.524, ppl=5.75, wps=5820, ups=0.09, wpb=64878, bsz=128, num_updates=12209, lr=9.99103e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=140862
2021-06-20 09:46:38 | INFO | train_inner | epoch 005: 274 / 3002 loss=2.601, ppl=6.07, wps=5889.8, ups=0.09, wpb=64910, bsz=128, num_updates=12210, lr=9.99103e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=140873
2021-06-20 09:46:49 | INFO | train_inner | epoch 005: 275 / 3002 loss=2.432, ppl=5.4, wps=5894.6, ups=0.09, wpb=64738, bsz=128, num_updates=12211, lr=9.99103e-05, gnorm=2.124, loss_scale=4, train_wall=11, gb_free=2.8, wall=140884
2021-06-20 09:47:00 | INFO | train_inner | epoch 005: 276 / 3002 loss=2.618, ppl=6.14, wps=6022.3, ups=0.09, wpb=64925, bsz=128, num_updates=12212, lr=9.99103e-05, gnorm=2.018, loss_scale=4, train_wall=10, gb_free=2.8, wall=140894
2021-06-20 09:47:11 | INFO | train_inner | epoch 005: 277 / 3002 loss=2.688, ppl=6.44, wps=5726.8, ups=0.09, wpb=64734, bsz=128, num_updates=12213, lr=9.99103e-05, gnorm=2.601, loss_scale=4, train_wall=11, gb_free=2.8, wall=140906
2021-06-20 09:47:23 | INFO | train_inner | epoch 005: 278 / 3002 loss=2.61, ppl=6.1, wps=5781.1, ups=0.09, wpb=64805, bsz=128, num_updates=12214, lr=9.99103e-05, gnorm=2.054, loss_scale=4, train_wall=11, gb_free=2.8, wall=140917
2021-06-20 09:47:34 | INFO | train_inner | epoch 005: 279 / 3002 loss=2.616, ppl=6.13, wps=5713.1, ups=0.09, wpb=64854, bsz=128, num_updates=12215, lr=9.99103e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=140928
2021-06-20 09:47:45 | INFO | train_inner | epoch 005: 280 / 3002 loss=2.435, ppl=5.41, wps=5781, ups=0.09, wpb=64807, bsz=128, num_updates=12216, lr=9.99103e-05, gnorm=2.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=140939
2021-06-20 09:47:56 | INFO | train_inner | epoch 005: 281 / 3002 loss=2.59, ppl=6.02, wps=5868.9, ups=0.09, wpb=64827, bsz=128, num_updates=12217, lr=9.99103e-05, gnorm=2.669, loss_scale=4, train_wall=11, gb_free=2.8, wall=140950
2021-06-20 09:48:07 | INFO | train_inner | epoch 005: 282 / 3002 loss=2.547, ppl=5.84, wps=5891.5, ups=0.09, wpb=64813, bsz=128, num_updates=12218, lr=9.99102e-05, gnorm=2.114, loss_scale=4, train_wall=11, gb_free=2.8, wall=140961
2021-06-20 09:48:18 | INFO | train_inner | epoch 005: 283 / 3002 loss=2.642, ppl=6.24, wps=5855.6, ups=0.09, wpb=64809, bsz=128, num_updates=12219, lr=9.99102e-05, gnorm=2.1, loss_scale=4, train_wall=11, gb_free=2.8, wall=140973
2021-06-20 09:48:29 | INFO | train_inner | epoch 005: 284 / 3002 loss=2.624, ppl=6.17, wps=5877.7, ups=0.09, wpb=64831, bsz=128, num_updates=12220, lr=9.99102e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=140984
2021-06-20 09:48:40 | INFO | train_inner | epoch 005: 285 / 3002 loss=2.469, ppl=5.54, wps=5909.2, ups=0.09, wpb=64928, bsz=128, num_updates=12221, lr=9.99102e-05, gnorm=2.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=140995
2021-06-20 09:48:51 | INFO | train_inner | epoch 005: 286 / 3002 loss=2.568, ppl=5.93, wps=5889.6, ups=0.09, wpb=64794, bsz=128, num_updates=12222, lr=9.99102e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=141006
2021-06-20 09:49:02 | INFO | train_inner | epoch 005: 287 / 3002 loss=2.341, ppl=5.07, wps=5827.4, ups=0.09, wpb=64835, bsz=128, num_updates=12223, lr=9.99102e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=141017
2021-06-20 09:49:13 | INFO | train_inner | epoch 005: 288 / 3002 loss=2.372, ppl=5.17, wps=5829, ups=0.09, wpb=64811, bsz=128, num_updates=12224, lr=9.99102e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=141028
2021-06-20 09:49:25 | INFO | train_inner | epoch 005: 289 / 3002 loss=2.669, ppl=6.36, wps=5783.9, ups=0.09, wpb=64813, bsz=128, num_updates=12225, lr=9.99102e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=141039
2021-06-20 09:49:36 | INFO | train_inner | epoch 005: 290 / 3002 loss=2.505, ppl=5.68, wps=5968.6, ups=0.09, wpb=64839, bsz=128, num_updates=12226, lr=9.99102e-05, gnorm=1.979, loss_scale=4, train_wall=10, gb_free=2.8, wall=141050
2021-06-20 09:49:47 | INFO | train_inner | epoch 005: 291 / 3002 loss=2.482, ppl=5.59, wps=5883.2, ups=0.09, wpb=64837, bsz=128, num_updates=12227, lr=9.99102e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=141061
2021-06-20 09:49:58 | INFO | train_inner | epoch 005: 292 / 3002 loss=2.572, ppl=5.95, wps=5824, ups=0.09, wpb=64835, bsz=128, num_updates=12228, lr=9.99102e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=141072
2021-06-20 09:50:08 | INFO | train_inner | epoch 005: 293 / 3002 loss=2.563, ppl=5.91, wps=6025.8, ups=0.09, wpb=64843, bsz=128, num_updates=12229, lr=9.99102e-05, gnorm=1.973, loss_scale=4, train_wall=10, gb_free=2.8, wall=141083
2021-06-20 09:50:20 | INFO | train_inner | epoch 005: 294 / 3002 loss=2.419, ppl=5.35, wps=5841.6, ups=0.09, wpb=64767, bsz=128, num_updates=12230, lr=9.99102e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=141094
2021-06-20 09:50:31 | INFO | train_inner | epoch 005: 295 / 3002 loss=2.651, ppl=6.28, wps=5792.1, ups=0.09, wpb=64839, bsz=128, num_updates=12231, lr=9.99101e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=141105
2021-06-20 09:50:42 | INFO | train_inner | epoch 005: 296 / 3002 loss=2.417, ppl=5.34, wps=5898.3, ups=0.09, wpb=64892, bsz=128, num_updates=12232, lr=9.99101e-05, gnorm=1.886, loss_scale=4, train_wall=11, gb_free=2.8, wall=141116
2021-06-20 09:50:53 | INFO | train_inner | epoch 005: 297 / 3002 loss=2.448, ppl=5.46, wps=5778.1, ups=0.09, wpb=64891, bsz=128, num_updates=12233, lr=9.99101e-05, gnorm=2.097, loss_scale=4, train_wall=11, gb_free=2.8, wall=141127
2021-06-20 09:51:04 | INFO | train_inner | epoch 005: 298 / 3002 loss=2.404, ppl=5.29, wps=5853.1, ups=0.09, wpb=64777, bsz=128, num_updates=12234, lr=9.99101e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=141138
2021-06-20 09:51:15 | INFO | train_inner | epoch 005: 299 / 3002 loss=2.473, ppl=5.55, wps=5687.9, ups=0.09, wpb=64812, bsz=128, num_updates=12235, lr=9.99101e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=141150
2021-06-20 09:51:26 | INFO | train_inner | epoch 005: 300 / 3002 loss=2.515, ppl=5.72, wps=5867.3, ups=0.09, wpb=64742, bsz=128, num_updates=12236, lr=9.99101e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=141161
2021-06-20 09:51:37 | INFO | train_inner | epoch 005: 301 / 3002 loss=2.421, ppl=5.36, wps=5952.8, ups=0.09, wpb=64858, bsz=128, num_updates=12237, lr=9.99101e-05, gnorm=1.998, loss_scale=4, train_wall=10, gb_free=2.8, wall=141172
2021-06-20 09:51:48 | INFO | train_inner | epoch 005: 302 / 3002 loss=2.546, ppl=5.84, wps=5905.6, ups=0.09, wpb=64773, bsz=128, num_updates=12238, lr=9.99101e-05, gnorm=1.969, loss_scale=4, train_wall=10, gb_free=2.8, wall=141183
2021-06-20 09:51:59 | INFO | train_inner | epoch 005: 303 / 3002 loss=2.5, ppl=5.66, wps=5894.7, ups=0.09, wpb=64846, bsz=128, num_updates=12239, lr=9.99101e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=141194
2021-06-20 09:52:10 | INFO | train_inner | epoch 005: 304 / 3002 loss=2.446, ppl=5.45, wps=5884.9, ups=0.09, wpb=64834, bsz=128, num_updates=12240, lr=9.99101e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=141205
2021-06-20 09:52:21 | INFO | train_inner | epoch 005: 305 / 3002 loss=2.451, ppl=5.47, wps=5913.7, ups=0.09, wpb=64835, bsz=128, num_updates=12241, lr=9.99101e-05, gnorm=2.385, loss_scale=4, train_wall=10, gb_free=2.8, wall=141216
2021-06-20 09:52:33 | INFO | train_inner | epoch 005: 306 / 3002 loss=2.532, ppl=5.78, wps=5730, ups=0.09, wpb=64851, bsz=128, num_updates=12242, lr=9.99101e-05, gnorm=2.044, loss_scale=4, train_wall=11, gb_free=2.8, wall=141227
2021-06-20 09:52:44 | INFO | train_inner | epoch 005: 307 / 3002 loss=2.454, ppl=5.48, wps=5891.2, ups=0.09, wpb=64837, bsz=128, num_updates=12243, lr=9.991e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=141238
2021-06-20 09:52:54 | INFO | train_inner | epoch 005: 308 / 3002 loss=2.503, ppl=5.67, wps=6013.5, ups=0.09, wpb=64798, bsz=128, num_updates=12244, lr=9.991e-05, gnorm=2.89, loss_scale=4, train_wall=10, gb_free=2.8, wall=141249
2021-06-20 09:53:05 | INFO | train_inner | epoch 005: 309 / 3002 loss=2.556, ppl=5.88, wps=5858.5, ups=0.09, wpb=64768, bsz=128, num_updates=12245, lr=9.991e-05, gnorm=1.993, loss_scale=4, train_wall=11, gb_free=2.8, wall=141260
2021-06-20 09:53:17 | INFO | train_inner | epoch 005: 310 / 3002 loss=2.383, ppl=5.22, wps=5852.3, ups=0.09, wpb=64802, bsz=128, num_updates=12246, lr=9.991e-05, gnorm=1.897, loss_scale=4, train_wall=11, gb_free=2.8, wall=141271
2021-06-20 09:53:28 | INFO | train_inner | epoch 005: 311 / 3002 loss=2.444, ppl=5.44, wps=5872, ups=0.09, wpb=64873, bsz=128, num_updates=12247, lr=9.991e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=141282
2021-06-20 09:53:39 | INFO | train_inner | epoch 005: 312 / 3002 loss=2.511, ppl=5.7, wps=5821.3, ups=0.09, wpb=64797, bsz=128, num_updates=12248, lr=9.991e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=141293
2021-06-20 09:53:50 | INFO | train_inner | epoch 005: 313 / 3002 loss=2.645, ppl=6.26, wps=5852.5, ups=0.09, wpb=64796, bsz=128, num_updates=12249, lr=9.991e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=141304
2021-06-20 09:54:01 | INFO | train_inner | epoch 005: 314 / 3002 loss=2.469, ppl=5.54, wps=5863.2, ups=0.09, wpb=64823, bsz=128, num_updates=12250, lr=9.991e-05, gnorm=2.158, loss_scale=4, train_wall=11, gb_free=2.8, wall=141315
2021-06-20 09:54:12 | INFO | train_inner | epoch 005: 315 / 3002 loss=2.581, ppl=5.98, wps=5769.7, ups=0.09, wpb=64769, bsz=128, num_updates=12251, lr=9.991e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=141326
2021-06-20 09:54:23 | INFO | train_inner | epoch 005: 316 / 3002 loss=2.6, ppl=6.06, wps=5801.6, ups=0.09, wpb=64834, bsz=128, num_updates=12252, lr=9.991e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=141338
2021-06-20 09:54:34 | INFO | train_inner | epoch 005: 317 / 3002 loss=2.522, ppl=5.74, wps=5795.1, ups=0.09, wpb=64833, bsz=128, num_updates=12253, lr=9.991e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=141349
2021-06-20 09:54:45 | INFO | train_inner | epoch 005: 318 / 3002 loss=2.646, ppl=6.26, wps=5871.4, ups=0.09, wpb=64860, bsz=128, num_updates=12254, lr=9.991e-05, gnorm=2.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=141360
2021-06-20 09:54:57 | INFO | train_inner | epoch 005: 319 / 3002 loss=2.403, ppl=5.29, wps=5858.6, ups=0.09, wpb=64899, bsz=128, num_updates=12255, lr=9.991e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=141371
2021-06-20 09:55:08 | INFO | train_inner | epoch 005: 320 / 3002 loss=2.625, ppl=6.17, wps=5836.7, ups=0.09, wpb=64781, bsz=128, num_updates=12256, lr=9.99099e-05, gnorm=2.769, loss_scale=8, train_wall=11, gb_free=2.8, wall=141382
2021-06-20 09:55:19 | INFO | train_inner | epoch 005: 321 / 3002 loss=2.692, ppl=6.46, wps=5790.3, ups=0.09, wpb=64852, bsz=128, num_updates=12257, lr=9.99099e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=141393
2021-06-20 09:55:30 | INFO | train_inner | epoch 005: 322 / 3002 loss=2.592, ppl=6.03, wps=5878.6, ups=0.09, wpb=64918, bsz=128, num_updates=12258, lr=9.99099e-05, gnorm=2.085, loss_scale=8, train_wall=11, gb_free=2.8, wall=141404
2021-06-20 09:55:41 | INFO | train_inner | epoch 005: 323 / 3002 loss=2.53, ppl=5.78, wps=5764.8, ups=0.09, wpb=64811, bsz=128, num_updates=12259, lr=9.99099e-05, gnorm=2.544, loss_scale=8, train_wall=11, gb_free=2.8, wall=141415
2021-06-20 09:55:52 | INFO | train_inner | epoch 005: 324 / 3002 loss=2.47, ppl=5.54, wps=5841.3, ups=0.09, wpb=64715, bsz=128, num_updates=12260, lr=9.99099e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=141427
2021-06-20 09:56:03 | INFO | train_inner | epoch 005: 325 / 3002 loss=2.563, ppl=5.91, wps=5785.1, ups=0.09, wpb=64803, bsz=128, num_updates=12261, lr=9.99099e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=141438
2021-06-20 09:56:15 | INFO | train_inner | epoch 005: 326 / 3002 loss=2.512, ppl=5.7, wps=5751.3, ups=0.09, wpb=64869, bsz=128, num_updates=12262, lr=9.99099e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=141449
2021-06-20 09:56:26 | INFO | train_inner | epoch 005: 327 / 3002 loss=2.655, ppl=6.3, wps=5813.6, ups=0.09, wpb=64822, bsz=128, num_updates=12263, lr=9.99099e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=141460
2021-06-20 09:56:37 | INFO | train_inner | epoch 005: 328 / 3002 loss=2.373, ppl=5.18, wps=5817.5, ups=0.09, wpb=64833, bsz=128, num_updates=12264, lr=9.99099e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=141471
2021-06-20 09:56:48 | INFO | train_inner | epoch 005: 329 / 3002 loss=2.374, ppl=5.18, wps=5884.9, ups=0.09, wpb=64875, bsz=128, num_updates=12265, lr=9.99099e-05, gnorm=2.063, loss_scale=8, train_wall=11, gb_free=2.8, wall=141482
2021-06-20 09:56:59 | INFO | train_inner | epoch 005: 330 / 3002 loss=2.547, ppl=5.84, wps=5899, ups=0.09, wpb=64863, bsz=128, num_updates=12266, lr=9.99099e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=141493
2021-06-20 09:57:10 | INFO | train_inner | epoch 005: 331 / 3002 loss=2.586, ppl=6.01, wps=5802.5, ups=0.09, wpb=64858, bsz=128, num_updates=12267, lr=9.99099e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=141505
2021-06-20 09:57:21 | INFO | train_inner | epoch 005: 332 / 3002 loss=2.653, ppl=6.29, wps=5840.1, ups=0.09, wpb=64864, bsz=128, num_updates=12268, lr=9.99098e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=141516
2021-06-20 09:57:33 | INFO | train_inner | epoch 005: 333 / 3002 loss=2.417, ppl=5.34, wps=5784.4, ups=0.09, wpb=64836, bsz=128, num_updates=12269, lr=9.99098e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=141527
2021-06-20 09:57:44 | INFO | train_inner | epoch 005: 334 / 3002 loss=2.452, ppl=5.47, wps=5876.5, ups=0.09, wpb=64859, bsz=128, num_updates=12270, lr=9.99098e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=141538
2021-06-20 09:57:55 | INFO | train_inner | epoch 005: 335 / 3002 loss=2.413, ppl=5.33, wps=5826.4, ups=0.09, wpb=64817, bsz=128, num_updates=12271, lr=9.99098e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=141549
2021-06-20 09:58:06 | INFO | train_inner | epoch 005: 336 / 3002 loss=2.689, ppl=6.45, wps=5873, ups=0.09, wpb=64800, bsz=128, num_updates=12272, lr=9.99098e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=141560
2021-06-20 09:58:17 | INFO | train_inner | epoch 005: 337 / 3002 loss=2.552, ppl=5.86, wps=5761.6, ups=0.09, wpb=64815, bsz=128, num_updates=12273, lr=9.99098e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=141571
2021-06-20 09:58:28 | INFO | train_inner | epoch 005: 338 / 3002 loss=2.493, ppl=5.63, wps=5918, ups=0.09, wpb=64883, bsz=128, num_updates=12274, lr=9.99098e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=141582
2021-06-20 09:58:39 | INFO | train_inner | epoch 005: 339 / 3002 loss=2.472, ppl=5.55, wps=5821.2, ups=0.09, wpb=64848, bsz=128, num_updates=12275, lr=9.99098e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=141593
2021-06-20 09:58:50 | INFO | train_inner | epoch 005: 340 / 3002 loss=2.3, ppl=4.92, wps=5878.5, ups=0.09, wpb=64898, bsz=128, num_updates=12276, lr=9.99098e-05, gnorm=2.092, loss_scale=8, train_wall=11, gb_free=2.8, wall=141604
2021-06-20 09:59:01 | INFO | train_inner | epoch 005: 341 / 3002 loss=2.538, ppl=5.81, wps=5835.4, ups=0.09, wpb=64842, bsz=128, num_updates=12277, lr=9.99098e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=141616
2021-06-20 09:59:12 | INFO | train_inner | epoch 005: 342 / 3002 loss=2.474, ppl=5.55, wps=5812.4, ups=0.09, wpb=64849, bsz=128, num_updates=12278, lr=9.99098e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=141627
2021-06-20 09:59:24 | INFO | train_inner | epoch 005: 343 / 3002 loss=2.682, ppl=6.42, wps=5787.1, ups=0.09, wpb=64849, bsz=128, num_updates=12279, lr=9.99098e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=141638
2021-06-20 09:59:34 | INFO | train_inner | epoch 005: 344 / 3002 loss=2.519, ppl=5.73, wps=5989.5, ups=0.09, wpb=64808, bsz=128, num_updates=12280, lr=9.99098e-05, gnorm=2.043, loss_scale=8, train_wall=10, gb_free=2.8, wall=141649
2021-06-20 09:59:46 | INFO | train_inner | epoch 005: 345 / 3002 loss=2.705, ppl=6.52, wps=5820.8, ups=0.09, wpb=64814, bsz=128, num_updates=12281, lr=9.99097e-05, gnorm=2.159, loss_scale=8, train_wall=11, gb_free=2.8, wall=141660
2021-06-20 09:59:56 | INFO | train_inner | epoch 005: 346 / 3002 loss=2.743, ppl=6.7, wps=5948.6, ups=0.09, wpb=64738, bsz=128, num_updates=12282, lr=9.99097e-05, gnorm=2.081, loss_scale=8, train_wall=10, gb_free=2.8, wall=141671
2021-06-20 10:00:07 | INFO | train_inner | epoch 005: 347 / 3002 loss=2.598, ppl=6.06, wps=5943.2, ups=0.09, wpb=64855, bsz=128, num_updates=12283, lr=9.99097e-05, gnorm=1.973, loss_scale=8, train_wall=10, gb_free=2.8, wall=141682
2021-06-20 10:00:18 | INFO | train_inner | epoch 005: 348 / 3002 loss=2.528, ppl=5.77, wps=5881.6, ups=0.09, wpb=64878, bsz=128, num_updates=12284, lr=9.99097e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=141693
2021-06-20 10:00:30 | INFO | train_inner | epoch 005: 349 / 3002 loss=2.627, ppl=6.18, wps=5781.1, ups=0.09, wpb=64840, bsz=128, num_updates=12285, lr=9.99097e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=141704
2021-06-20 10:00:41 | INFO | train_inner | epoch 005: 350 / 3002 loss=2.505, ppl=5.68, wps=5671.7, ups=0.09, wpb=64791, bsz=128, num_updates=12286, lr=9.99097e-05, gnorm=2.097, loss_scale=8, train_wall=11, gb_free=2.8, wall=141715
2021-06-20 10:00:52 | INFO | train_inner | epoch 005: 351 / 3002 loss=2.447, ppl=5.45, wps=5856.9, ups=0.09, wpb=64832, bsz=128, num_updates=12287, lr=9.99097e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=141726
2021-06-20 10:01:03 | INFO | train_inner | epoch 005: 352 / 3002 loss=2.448, ppl=5.46, wps=5849, ups=0.09, wpb=64815, bsz=128, num_updates=12288, lr=9.99097e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=141738
2021-06-20 10:01:14 | INFO | train_inner | epoch 005: 353 / 3002 loss=2.479, ppl=5.57, wps=5759.2, ups=0.09, wpb=64758, bsz=128, num_updates=12289, lr=9.99097e-05, gnorm=2.107, loss_scale=8, train_wall=11, gb_free=2.8, wall=141749
2021-06-20 10:01:25 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-20 10:01:36 | INFO | train_inner | epoch 005: 355 / 3002 loss=2.665, ppl=6.34, wps=2957.5, ups=0.05, wpb=64845, bsz=128, num_updates=12290, lr=9.99097e-05, gnorm=4.009, loss_scale=4, train_wall=21, gb_free=2.8, wall=141771
2021-06-20 10:01:47 | INFO | train_inner | epoch 005: 356 / 3002 loss=2.625, ppl=6.17, wps=5957.9, ups=0.09, wpb=64834, bsz=128, num_updates=12291, lr=9.99097e-05, gnorm=2.008, loss_scale=4, train_wall=10, gb_free=2.8, wall=141782
2021-06-20 10:01:58 | INFO | train_inner | epoch 005: 357 / 3002 loss=2.429, ppl=5.38, wps=5864.9, ups=0.09, wpb=64873, bsz=128, num_updates=12292, lr=9.99097e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=141793
2021-06-20 10:02:10 | INFO | train_inner | epoch 005: 358 / 3002 loss=2.351, ppl=5.1, wps=5773.3, ups=0.09, wpb=64886, bsz=128, num_updates=12293, lr=9.99096e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=141804
2021-06-20 10:02:21 | INFO | train_inner | epoch 005: 359 / 3002 loss=2.39, ppl=5.24, wps=5783.6, ups=0.09, wpb=64786, bsz=128, num_updates=12294, lr=9.99096e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=141815
2021-06-20 10:02:32 | INFO | train_inner | epoch 005: 360 / 3002 loss=2.524, ppl=5.75, wps=5996.5, ups=0.09, wpb=64756, bsz=128, num_updates=12295, lr=9.99096e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=141826
2021-06-20 10:02:42 | INFO | train_inner | epoch 005: 361 / 3002 loss=2.462, ppl=5.51, wps=5947, ups=0.09, wpb=64798, bsz=128, num_updates=12296, lr=9.99096e-05, gnorm=1.975, loss_scale=4, train_wall=10, gb_free=2.8, wall=141837
2021-06-20 10:02:53 | INFO | train_inner | epoch 005: 362 / 3002 loss=2.597, ppl=6.05, wps=5894.9, ups=0.09, wpb=64842, bsz=128, num_updates=12297, lr=9.99096e-05, gnorm=2.333, loss_scale=4, train_wall=11, gb_free=2.8, wall=141848
2021-06-20 10:03:04 | INFO | train_inner | epoch 005: 363 / 3002 loss=2.534, ppl=5.79, wps=5883.5, ups=0.09, wpb=64808, bsz=128, num_updates=12298, lr=9.99096e-05, gnorm=1.973, loss_scale=4, train_wall=11, gb_free=2.8, wall=141859
2021-06-20 10:03:16 | INFO | train_inner | epoch 005: 364 / 3002 loss=2.361, ppl=5.14, wps=5802.5, ups=0.09, wpb=64785, bsz=128, num_updates=12299, lr=9.99096e-05, gnorm=1.968, loss_scale=4, train_wall=11, gb_free=2.8, wall=141870
2021-06-20 10:03:27 | INFO | train_inner | epoch 005: 365 / 3002 loss=2.455, ppl=5.48, wps=5735.3, ups=0.09, wpb=64853, bsz=128, num_updates=12300, lr=9.99096e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=141881
2021-06-20 10:03:38 | INFO | train_inner | epoch 005: 366 / 3002 loss=2.413, ppl=5.33, wps=5824.5, ups=0.09, wpb=64813, bsz=128, num_updates=12301, lr=9.99096e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=141892
2021-06-20 10:03:49 | INFO | train_inner | epoch 005: 367 / 3002 loss=2.36, ppl=5.13, wps=5837.7, ups=0.09, wpb=64844, bsz=128, num_updates=12302, lr=9.99096e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=141903
2021-06-20 10:04:00 | INFO | train_inner | epoch 005: 368 / 3002 loss=2.45, ppl=5.46, wps=5880.3, ups=0.09, wpb=64850, bsz=128, num_updates=12303, lr=9.99096e-05, gnorm=1.913, loss_scale=4, train_wall=11, gb_free=2.8, wall=141915
2021-06-20 10:04:11 | INFO | train_inner | epoch 005: 369 / 3002 loss=2.521, ppl=5.74, wps=5796.1, ups=0.09, wpb=64790, bsz=128, num_updates=12304, lr=9.99096e-05, gnorm=1.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=141926
2021-06-20 10:04:23 | INFO | train_inner | epoch 005: 370 / 3002 loss=2.462, ppl=5.51, wps=5812.4, ups=0.09, wpb=64922, bsz=128, num_updates=12305, lr=9.99096e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=141937
2021-06-20 10:04:34 | INFO | train_inner | epoch 005: 371 / 3002 loss=2.646, ppl=6.26, wps=5756.7, ups=0.09, wpb=64773, bsz=128, num_updates=12306, lr=9.99095e-05, gnorm=5.288, loss_scale=4, train_wall=11, gb_free=2.8, wall=141948
2021-06-20 10:04:45 | INFO | train_inner | epoch 005: 372 / 3002 loss=2.532, ppl=5.78, wps=5977, ups=0.09, wpb=64857, bsz=128, num_updates=12307, lr=9.99095e-05, gnorm=2.058, loss_scale=4, train_wall=10, gb_free=2.8, wall=141959
2021-06-20 10:04:56 | INFO | train_inner | epoch 005: 373 / 3002 loss=2.461, ppl=5.51, wps=5775, ups=0.09, wpb=64833, bsz=128, num_updates=12308, lr=9.99095e-05, gnorm=2.402, loss_scale=4, train_wall=11, gb_free=2.8, wall=141970
2021-06-20 10:05:07 | INFO | train_inner | epoch 005: 374 / 3002 loss=2.396, ppl=5.26, wps=5761.1, ups=0.09, wpb=64838, bsz=128, num_updates=12309, lr=9.99095e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=141981
2021-06-20 10:05:18 | INFO | train_inner | epoch 005: 375 / 3002 loss=2.591, ppl=6.03, wps=5800.9, ups=0.09, wpb=64845, bsz=128, num_updates=12310, lr=9.99095e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=141993
2021-06-20 10:05:29 | INFO | train_inner | epoch 005: 376 / 3002 loss=2.619, ppl=6.14, wps=5838.7, ups=0.09, wpb=64877, bsz=128, num_updates=12311, lr=9.99095e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=142004
2021-06-20 10:05:40 | INFO | train_inner | epoch 005: 377 / 3002 loss=2.407, ppl=5.31, wps=5913, ups=0.09, wpb=64870, bsz=128, num_updates=12312, lr=9.99095e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=142015
2021-06-20 10:05:51 | INFO | train_inner | epoch 005: 378 / 3002 loss=2.431, ppl=5.39, wps=5861.1, ups=0.09, wpb=64860, bsz=128, num_updates=12313, lr=9.99095e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=142026
2021-06-20 10:06:02 | INFO | train_inner | epoch 005: 379 / 3002 loss=2.53, ppl=5.77, wps=5913.3, ups=0.09, wpb=64872, bsz=128, num_updates=12314, lr=9.99095e-05, gnorm=3.168, loss_scale=4, train_wall=10, gb_free=2.8, wall=142037
2021-06-20 10:06:13 | INFO | train_inner | epoch 005: 380 / 3002 loss=2.58, ppl=5.98, wps=5853.5, ups=0.09, wpb=64810, bsz=128, num_updates=12315, lr=9.99095e-05, gnorm=2.056, loss_scale=4, train_wall=11, gb_free=2.8, wall=142048
2021-06-20 10:06:25 | INFO | train_inner | epoch 005: 381 / 3002 loss=2.633, ppl=6.2, wps=5856.9, ups=0.09, wpb=64769, bsz=128, num_updates=12316, lr=9.99095e-05, gnorm=2.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=142059
2021-06-20 10:06:35 | INFO | train_inner | epoch 005: 382 / 3002 loss=2.556, ppl=5.88, wps=5966.8, ups=0.09, wpb=64868, bsz=128, num_updates=12317, lr=9.99095e-05, gnorm=2.08, loss_scale=4, train_wall=10, gb_free=2.8, wall=142070
2021-06-20 10:06:47 | INFO | train_inner | epoch 005: 383 / 3002 loss=2.347, ppl=5.09, wps=5798.4, ups=0.09, wpb=64772, bsz=128, num_updates=12318, lr=9.99094e-05, gnorm=2.298, loss_scale=4, train_wall=11, gb_free=2.8, wall=142081
2021-06-20 10:06:58 | INFO | train_inner | epoch 005: 384 / 3002 loss=2.486, ppl=5.6, wps=5811.2, ups=0.09, wpb=64805, bsz=128, num_updates=12319, lr=9.99094e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=142092
2021-06-20 10:07:09 | INFO | train_inner | epoch 005: 385 / 3002 loss=2.514, ppl=5.71, wps=5804.7, ups=0.09, wpb=64787, bsz=128, num_updates=12320, lr=9.99094e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=142103
2021-06-20 10:07:20 | INFO | train_inner | epoch 005: 386 / 3002 loss=2.665, ppl=6.34, wps=5797.5, ups=0.09, wpb=64787, bsz=128, num_updates=12321, lr=9.99094e-05, gnorm=2.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=142114
2021-06-20 10:07:31 | INFO | train_inner | epoch 005: 387 / 3002 loss=2.528, ppl=5.77, wps=5709.6, ups=0.09, wpb=64849, bsz=128, num_updates=12322, lr=9.99094e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=142126
2021-06-20 10:07:42 | INFO | train_inner | epoch 005: 388 / 3002 loss=2.54, ppl=5.82, wps=5945.7, ups=0.09, wpb=64870, bsz=128, num_updates=12323, lr=9.99094e-05, gnorm=1.963, loss_scale=4, train_wall=10, gb_free=2.8, wall=142137
2021-06-20 10:07:53 | INFO | train_inner | epoch 005: 389 / 3002 loss=2.511, ppl=5.7, wps=6036.2, ups=0.09, wpb=64892, bsz=128, num_updates=12324, lr=9.99094e-05, gnorm=1.913, loss_scale=4, train_wall=10, gb_free=2.8, wall=142147
2021-06-20 10:08:04 | INFO | train_inner | epoch 005: 390 / 3002 loss=2.648, ppl=6.27, wps=5830.8, ups=0.09, wpb=64851, bsz=128, num_updates=12325, lr=9.99094e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=142159
2021-06-20 10:08:15 | INFO | train_inner | epoch 005: 391 / 3002 loss=2.601, ppl=6.07, wps=5799.1, ups=0.09, wpb=64879, bsz=128, num_updates=12326, lr=9.99094e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=142170
2021-06-20 10:08:27 | INFO | train_inner | epoch 005: 392 / 3002 loss=2.651, ppl=6.28, wps=5827.3, ups=0.09, wpb=64789, bsz=128, num_updates=12327, lr=9.99094e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=142181
2021-06-20 10:08:38 | INFO | train_inner | epoch 005: 393 / 3002 loss=2.477, ppl=5.57, wps=5831.9, ups=0.09, wpb=64886, bsz=128, num_updates=12328, lr=9.99094e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=142192
2021-06-20 10:08:49 | INFO | train_inner | epoch 005: 394 / 3002 loss=2.651, ppl=6.28, wps=5852.5, ups=0.09, wpb=64816, bsz=128, num_updates=12329, lr=9.99094e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=142203
2021-06-20 10:09:00 | INFO | train_inner | epoch 005: 395 / 3002 loss=2.432, ppl=5.4, wps=5865.5, ups=0.09, wpb=64844, bsz=128, num_updates=12330, lr=9.99094e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=142214
2021-06-20 10:09:11 | INFO | train_inner | epoch 005: 396 / 3002 loss=2.712, ppl=6.55, wps=5703.7, ups=0.09, wpb=64789, bsz=128, num_updates=12331, lr=9.99093e-05, gnorm=2.058, loss_scale=4, train_wall=11, gb_free=2.8, wall=142225
2021-06-20 10:09:22 | INFO | train_inner | epoch 005: 397 / 3002 loss=2.587, ppl=6.01, wps=5933.1, ups=0.09, wpb=64843, bsz=128, num_updates=12332, lr=9.99093e-05, gnorm=2.053, loss_scale=4, train_wall=10, gb_free=2.8, wall=142236
2021-06-20 10:09:33 | INFO | train_inner | epoch 005: 398 / 3002 loss=2.423, ppl=5.36, wps=5933.3, ups=0.09, wpb=64834, bsz=128, num_updates=12333, lr=9.99093e-05, gnorm=1.932, loss_scale=4, train_wall=10, gb_free=2.8, wall=142247
2021-06-20 10:09:44 | INFO | train_inner | epoch 005: 399 / 3002 loss=2.62, ppl=6.15, wps=5729.1, ups=0.09, wpb=64765, bsz=128, num_updates=12334, lr=9.99093e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=142259
2021-06-20 10:09:55 | INFO | train_inner | epoch 005: 400 / 3002 loss=2.432, ppl=5.4, wps=5904.1, ups=0.09, wpb=64812, bsz=128, num_updates=12335, lr=9.99093e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=142270
2021-06-20 10:10:06 | INFO | train_inner | epoch 005: 401 / 3002 loss=2.364, ppl=5.15, wps=5863.9, ups=0.09, wpb=64902, bsz=128, num_updates=12336, lr=9.99093e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=142281
2021-06-20 10:10:17 | INFO | train_inner | epoch 005: 402 / 3002 loss=2.437, ppl=5.41, wps=5872.1, ups=0.09, wpb=64848, bsz=128, num_updates=12337, lr=9.99093e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=142292
2021-06-20 10:10:28 | INFO | train_inner | epoch 005: 403 / 3002 loss=2.454, ppl=5.48, wps=5854.1, ups=0.09, wpb=64801, bsz=128, num_updates=12338, lr=9.99093e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=142303
2021-06-20 10:10:39 | INFO | train_inner | epoch 005: 404 / 3002 loss=2.52, ppl=5.74, wps=5970.6, ups=0.09, wpb=64799, bsz=128, num_updates=12339, lr=9.99093e-05, gnorm=2.159, loss_scale=4, train_wall=10, gb_free=2.8, wall=142314
2021-06-20 10:10:50 | INFO | train_inner | epoch 005: 405 / 3002 loss=2.559, ppl=5.89, wps=6016.7, ups=0.09, wpb=64788, bsz=128, num_updates=12340, lr=9.99093e-05, gnorm=2.098, loss_scale=4, train_wall=10, gb_free=2.8, wall=142324
2021-06-20 10:11:01 | INFO | train_inner | epoch 005: 406 / 3002 loss=2.539, ppl=5.81, wps=5908.5, ups=0.09, wpb=64851, bsz=128, num_updates=12341, lr=9.99093e-05, gnorm=1.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=142335
2021-06-20 10:11:12 | INFO | train_inner | epoch 005: 407 / 3002 loss=2.429, ppl=5.39, wps=5729.6, ups=0.09, wpb=64751, bsz=128, num_updates=12342, lr=9.99093e-05, gnorm=2.285, loss_scale=4, train_wall=11, gb_free=2.8, wall=142347
2021-06-20 10:11:23 | INFO | train_inner | epoch 005: 408 / 3002 loss=2.397, ppl=5.27, wps=5902.1, ups=0.09, wpb=64815, bsz=128, num_updates=12343, lr=9.99092e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=142358
2021-06-20 10:11:35 | INFO | train_inner | epoch 005: 409 / 3002 loss=2.464, ppl=5.52, wps=5766.4, ups=0.09, wpb=64801, bsz=128, num_updates=12344, lr=9.99092e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=142369
2021-06-20 10:11:46 | INFO | train_inner | epoch 005: 410 / 3002 loss=2.366, ppl=5.15, wps=5839.2, ups=0.09, wpb=64812, bsz=128, num_updates=12345, lr=9.99092e-05, gnorm=1.939, loss_scale=4, train_wall=11, gb_free=2.8, wall=142380
2021-06-20 10:11:56 | INFO | train_inner | epoch 005: 411 / 3002 loss=2.526, ppl=5.76, wps=6031, ups=0.09, wpb=64876, bsz=128, num_updates=12346, lr=9.99092e-05, gnorm=2.009, loss_scale=4, train_wall=10, gb_free=2.8, wall=142391
2021-06-20 10:12:07 | INFO | train_inner | epoch 005: 412 / 3002 loss=2.378, ppl=5.2, wps=5863, ups=0.09, wpb=64843, bsz=128, num_updates=12347, lr=9.99092e-05, gnorm=2.416, loss_scale=4, train_wall=11, gb_free=2.8, wall=142402
2021-06-20 10:12:19 | INFO | train_inner | epoch 005: 413 / 3002 loss=2.582, ppl=5.99, wps=5810.8, ups=0.09, wpb=64768, bsz=128, num_updates=12348, lr=9.99092e-05, gnorm=3.877, loss_scale=4, train_wall=11, gb_free=2.8, wall=142413
2021-06-20 10:12:30 | INFO | train_inner | epoch 005: 414 / 3002 loss=2.644, ppl=6.25, wps=5817.7, ups=0.09, wpb=64786, bsz=128, num_updates=12349, lr=9.99092e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=142424
2021-06-20 10:12:41 | INFO | train_inner | epoch 005: 415 / 3002 loss=2.582, ppl=5.99, wps=5921, ups=0.09, wpb=64877, bsz=128, num_updates=12350, lr=9.99092e-05, gnorm=2.118, loss_scale=4, train_wall=11, gb_free=2.8, wall=142435
2021-06-20 10:12:52 | INFO | train_inner | epoch 005: 416 / 3002 loss=2.453, ppl=5.48, wps=5824.4, ups=0.09, wpb=64907, bsz=128, num_updates=12351, lr=9.99092e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=142446
2021-06-20 10:13:03 | INFO | train_inner | epoch 005: 417 / 3002 loss=2.634, ppl=6.21, wps=5843.9, ups=0.09, wpb=64854, bsz=128, num_updates=12352, lr=9.99092e-05, gnorm=1.931, loss_scale=4, train_wall=11, gb_free=2.8, wall=142457
2021-06-20 10:13:14 | INFO | train_inner | epoch 005: 418 / 3002 loss=2.426, ppl=5.37, wps=5813.6, ups=0.09, wpb=64837, bsz=128, num_updates=12353, lr=9.99092e-05, gnorm=2.268, loss_scale=4, train_wall=11, gb_free=2.8, wall=142468
2021-06-20 10:13:25 | INFO | train_inner | epoch 005: 419 / 3002 loss=2.452, ppl=5.47, wps=5905.6, ups=0.09, wpb=64871, bsz=128, num_updates=12354, lr=9.99092e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=142479
2021-06-20 10:13:36 | INFO | train_inner | epoch 005: 420 / 3002 loss=2.541, ppl=5.82, wps=5908.6, ups=0.09, wpb=64865, bsz=128, num_updates=12355, lr=9.99092e-05, gnorm=1.986, loss_scale=4, train_wall=10, gb_free=2.8, wall=142490
2021-06-20 10:13:47 | INFO | train_inner | epoch 005: 421 / 3002 loss=2.591, ppl=6.03, wps=5882.2, ups=0.09, wpb=64814, bsz=128, num_updates=12356, lr=9.99091e-05, gnorm=1.929, loss_scale=4, train_wall=11, gb_free=2.8, wall=142501
2021-06-20 10:13:58 | INFO | train_inner | epoch 005: 422 / 3002 loss=2.42, ppl=5.35, wps=5799.8, ups=0.09, wpb=64796, bsz=128, num_updates=12357, lr=9.99091e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=142513
2021-06-20 10:14:09 | INFO | train_inner | epoch 005: 423 / 3002 loss=2.425, ppl=5.37, wps=5890.9, ups=0.09, wpb=64874, bsz=128, num_updates=12358, lr=9.99091e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=142524
2021-06-20 10:14:20 | INFO | train_inner | epoch 005: 424 / 3002 loss=2.501, ppl=5.66, wps=5866.1, ups=0.09, wpb=64799, bsz=128, num_updates=12359, lr=9.99091e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=142535
2021-06-20 10:14:31 | INFO | train_inner | epoch 005: 425 / 3002 loss=2.53, ppl=5.77, wps=5859, ups=0.09, wpb=64768, bsz=128, num_updates=12360, lr=9.99091e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=142546
2021-06-20 10:14:43 | INFO | train_inner | epoch 005: 426 / 3002 loss=2.501, ppl=5.66, wps=5807.2, ups=0.09, wpb=64797, bsz=128, num_updates=12361, lr=9.99091e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=142557
2021-06-20 10:14:54 | INFO | train_inner | epoch 005: 427 / 3002 loss=2.594, ppl=6.04, wps=5919.9, ups=0.09, wpb=64887, bsz=128, num_updates=12362, lr=9.99091e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=142568
2021-06-20 10:15:04 | INFO | train_inner | epoch 005: 428 / 3002 loss=2.691, ppl=6.46, wps=5971.3, ups=0.09, wpb=64855, bsz=128, num_updates=12363, lr=9.99091e-05, gnorm=2.004, loss_scale=4, train_wall=10, gb_free=2.8, wall=142579
2021-06-20 10:15:15 | INFO | train_inner | epoch 005: 429 / 3002 loss=2.42, ppl=5.35, wps=5839.2, ups=0.09, wpb=64866, bsz=128, num_updates=12364, lr=9.99091e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=142590
2021-06-20 10:15:26 | INFO | train_inner | epoch 005: 430 / 3002 loss=2.493, ppl=5.63, wps=5891.4, ups=0.09, wpb=64863, bsz=128, num_updates=12365, lr=9.99091e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=142601
2021-06-20 10:15:37 | INFO | train_inner | epoch 005: 431 / 3002 loss=2.531, ppl=5.78, wps=5917.9, ups=0.09, wpb=64761, bsz=128, num_updates=12366, lr=9.99091e-05, gnorm=2.078, loss_scale=4, train_wall=10, gb_free=2.8, wall=142612
2021-06-20 10:15:48 | INFO | train_inner | epoch 005: 432 / 3002 loss=2.475, ppl=5.56, wps=5886.6, ups=0.09, wpb=64839, bsz=128, num_updates=12367, lr=9.99091e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=142623
2021-06-20 10:16:00 | INFO | train_inner | epoch 005: 433 / 3002 loss=2.55, ppl=5.86, wps=5804.1, ups=0.09, wpb=64798, bsz=128, num_updates=12368, lr=9.9909e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=142634
2021-06-20 10:16:11 | INFO | train_inner | epoch 005: 434 / 3002 loss=2.609, ppl=6.1, wps=5842.5, ups=0.09, wpb=64815, bsz=128, num_updates=12369, lr=9.9909e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=142645
2021-06-20 10:16:22 | INFO | train_inner | epoch 005: 435 / 3002 loss=2.608, ppl=6.1, wps=5798.1, ups=0.09, wpb=64842, bsz=128, num_updates=12370, lr=9.9909e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=142656
2021-06-20 10:16:33 | INFO | train_inner | epoch 005: 436 / 3002 loss=2.527, ppl=5.76, wps=5764.8, ups=0.09, wpb=64820, bsz=128, num_updates=12371, lr=9.9909e-05, gnorm=2.085, loss_scale=4, train_wall=11, gb_free=2.8, wall=142667
2021-06-20 10:16:44 | INFO | train_inner | epoch 005: 437 / 3002 loss=2.668, ppl=6.35, wps=5748.2, ups=0.09, wpb=64799, bsz=128, num_updates=12372, lr=9.9909e-05, gnorm=2.068, loss_scale=4, train_wall=11, gb_free=2.8, wall=142679
2021-06-20 10:16:56 | INFO | train_inner | epoch 005: 438 / 3002 loss=2.554, ppl=5.87, wps=5836.2, ups=0.09, wpb=64880, bsz=128, num_updates=12373, lr=9.9909e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=142690
2021-06-20 10:17:07 | INFO | train_inner | epoch 005: 439 / 3002 loss=2.516, ppl=5.72, wps=5761.4, ups=0.09, wpb=64827, bsz=128, num_updates=12374, lr=9.9909e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=142701
2021-06-20 10:17:18 | INFO | train_inner | epoch 005: 440 / 3002 loss=2.548, ppl=5.85, wps=5792.9, ups=0.09, wpb=64865, bsz=128, num_updates=12375, lr=9.9909e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=142712
2021-06-20 10:17:29 | INFO | train_inner | epoch 005: 441 / 3002 loss=2.659, ppl=6.32, wps=5806.6, ups=0.09, wpb=64712, bsz=128, num_updates=12376, lr=9.9909e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=142723
2021-06-20 10:17:40 | INFO | train_inner | epoch 005: 442 / 3002 loss=2.632, ppl=6.2, wps=5781.4, ups=0.09, wpb=64886, bsz=128, num_updates=12377, lr=9.9909e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=142735
2021-06-20 10:17:52 | INFO | train_inner | epoch 005: 443 / 3002 loss=2.482, ppl=5.59, wps=5811.6, ups=0.09, wpb=64837, bsz=128, num_updates=12378, lr=9.9909e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=142746
2021-06-20 10:18:03 | INFO | train_inner | epoch 005: 444 / 3002 loss=2.593, ppl=6.03, wps=5860.8, ups=0.09, wpb=64804, bsz=128, num_updates=12379, lr=9.9909e-05, gnorm=1.919, loss_scale=4, train_wall=11, gb_free=2.8, wall=142757
2021-06-20 10:18:14 | INFO | train_inner | epoch 005: 445 / 3002 loss=2.644, ppl=6.25, wps=5777.3, ups=0.09, wpb=64746, bsz=128, num_updates=12380, lr=9.9909e-05, gnorm=2.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=142768
2021-06-20 10:18:25 | INFO | train_inner | epoch 005: 446 / 3002 loss=2.615, ppl=6.13, wps=5832.5, ups=0.09, wpb=64871, bsz=128, num_updates=12381, lr=9.99089e-05, gnorm=1.935, loss_scale=4, train_wall=11, gb_free=2.8, wall=142779
2021-06-20 10:18:36 | INFO | train_inner | epoch 005: 447 / 3002 loss=2.414, ppl=5.33, wps=5882.1, ups=0.09, wpb=64814, bsz=128, num_updates=12382, lr=9.99089e-05, gnorm=1.991, loss_scale=4, train_wall=11, gb_free=2.8, wall=142790
2021-06-20 10:18:47 | INFO | train_inner | epoch 005: 448 / 3002 loss=2.556, ppl=5.88, wps=5803.6, ups=0.09, wpb=64827, bsz=128, num_updates=12383, lr=9.99089e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=142801
2021-06-20 10:18:58 | INFO | train_inner | epoch 005: 449 / 3002 loss=2.426, ppl=5.37, wps=5949.2, ups=0.09, wpb=64859, bsz=128, num_updates=12384, lr=9.99089e-05, gnorm=2.18, loss_scale=4, train_wall=10, gb_free=2.8, wall=142812
2021-06-20 10:19:09 | INFO | train_inner | epoch 005: 450 / 3002 loss=2.426, ppl=5.38, wps=5943.3, ups=0.09, wpb=64871, bsz=128, num_updates=12385, lr=9.99089e-05, gnorm=2.051, loss_scale=4, train_wall=10, gb_free=2.8, wall=142823
2021-06-20 10:19:20 | INFO | train_inner | epoch 005: 451 / 3002 loss=2.547, ppl=5.84, wps=5780.2, ups=0.09, wpb=64819, bsz=128, num_updates=12386, lr=9.99089e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=142834
2021-06-20 10:19:31 | INFO | train_inner | epoch 005: 452 / 3002 loss=2.557, ppl=5.88, wps=5782, ups=0.09, wpb=64824, bsz=128, num_updates=12387, lr=9.99089e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=142846
2021-06-20 10:19:43 | INFO | train_inner | epoch 005: 453 / 3002 loss=2.527, ppl=5.77, wps=5794, ups=0.09, wpb=64813, bsz=128, num_updates=12388, lr=9.99089e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=142857
2021-06-20 10:19:54 | INFO | train_inner | epoch 005: 454 / 3002 loss=2.475, ppl=5.56, wps=5894.1, ups=0.09, wpb=64838, bsz=128, num_updates=12389, lr=9.99089e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=142868
2021-06-20 10:20:05 | INFO | train_inner | epoch 005: 455 / 3002 loss=2.494, ppl=5.63, wps=5829.6, ups=0.09, wpb=64835, bsz=128, num_updates=12390, lr=9.99089e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=142879
2021-06-20 10:20:16 | INFO | train_inner | epoch 005: 456 / 3002 loss=2.455, ppl=5.48, wps=5922.5, ups=0.09, wpb=64830, bsz=128, num_updates=12391, lr=9.99089e-05, gnorm=2.041, loss_scale=4, train_wall=10, gb_free=2.8, wall=142890
2021-06-20 10:20:27 | INFO | train_inner | epoch 005: 457 / 3002 loss=2.538, ppl=5.81, wps=5830.3, ups=0.09, wpb=64852, bsz=128, num_updates=12392, lr=9.99089e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=142901
2021-06-20 10:20:38 | INFO | train_inner | epoch 005: 458 / 3002 loss=2.425, ppl=5.37, wps=5868.9, ups=0.09, wpb=64822, bsz=128, num_updates=12393, lr=9.99088e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=142912
2021-06-20 10:20:49 | INFO | train_inner | epoch 005: 459 / 3002 loss=2.52, ppl=5.73, wps=5806.2, ups=0.09, wpb=64828, bsz=128, num_updates=12394, lr=9.99088e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=142923
2021-06-20 10:21:00 | INFO | train_inner | epoch 005: 460 / 3002 loss=2.707, ppl=6.53, wps=5836.3, ups=0.09, wpb=64779, bsz=128, num_updates=12395, lr=9.99088e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=142934
2021-06-20 10:21:11 | INFO | train_inner | epoch 005: 461 / 3002 loss=2.617, ppl=6.13, wps=5769.6, ups=0.09, wpb=64799, bsz=128, num_updates=12396, lr=9.99088e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=142946
2021-06-20 10:21:22 | INFO | train_inner | epoch 005: 462 / 3002 loss=2.652, ppl=6.28, wps=5829.3, ups=0.09, wpb=64799, bsz=128, num_updates=12397, lr=9.99088e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=142957
2021-06-20 10:21:33 | INFO | train_inner | epoch 005: 463 / 3002 loss=2.513, ppl=5.71, wps=5960.3, ups=0.09, wpb=64867, bsz=128, num_updates=12398, lr=9.99088e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=142968
2021-06-20 10:21:44 | INFO | train_inner | epoch 005: 464 / 3002 loss=2.39, ppl=5.24, wps=5838.2, ups=0.09, wpb=64815, bsz=128, num_updates=12399, lr=9.99088e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=142979
2021-06-20 10:21:56 | INFO | train_inner | epoch 005: 465 / 3002 loss=2.4, ppl=5.28, wps=5758.3, ups=0.09, wpb=64844, bsz=128, num_updates=12400, lr=9.99088e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=142990
2021-06-20 10:22:07 | INFO | train_inner | epoch 005: 466 / 3002 loss=2.479, ppl=5.58, wps=5848.8, ups=0.09, wpb=64952, bsz=128, num_updates=12401, lr=9.99088e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=143001
2021-06-20 10:22:18 | INFO | train_inner | epoch 005: 467 / 3002 loss=2.633, ppl=6.2, wps=5815.1, ups=0.09, wpb=64802, bsz=128, num_updates=12402, lr=9.99088e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=143012
2021-06-20 10:22:29 | INFO | train_inner | epoch 005: 468 / 3002 loss=2.572, ppl=5.95, wps=5954.1, ups=0.09, wpb=64835, bsz=128, num_updates=12403, lr=9.99088e-05, gnorm=2.045, loss_scale=4, train_wall=10, gb_free=2.8, wall=143023
2021-06-20 10:22:40 | INFO | train_inner | epoch 005: 469 / 3002 loss=2.57, ppl=5.94, wps=5767.9, ups=0.09, wpb=64802, bsz=128, num_updates=12404, lr=9.99088e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=143034
2021-06-20 10:22:51 | INFO | train_inner | epoch 005: 470 / 3002 loss=2.596, ppl=6.04, wps=5857.8, ups=0.09, wpb=64920, bsz=128, num_updates=12405, lr=9.99088e-05, gnorm=2.165, loss_scale=4, train_wall=11, gb_free=2.8, wall=143045
2021-06-20 10:23:02 | INFO | train_inner | epoch 005: 471 / 3002 loss=2.543, ppl=5.83, wps=5935.4, ups=0.09, wpb=64759, bsz=128, num_updates=12406, lr=9.99087e-05, gnorm=2.095, loss_scale=4, train_wall=10, gb_free=2.8, wall=143056
2021-06-20 10:23:13 | INFO | train_inner | epoch 005: 472 / 3002 loss=2.633, ppl=6.2, wps=5774.8, ups=0.09, wpb=64755, bsz=128, num_updates=12407, lr=9.99087e-05, gnorm=2.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=143068
2021-06-20 10:23:24 | INFO | train_inner | epoch 005: 473 / 3002 loss=2.483, ppl=5.59, wps=5886.8, ups=0.09, wpb=64824, bsz=128, num_updates=12408, lr=9.99087e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=143079
2021-06-20 10:23:35 | INFO | train_inner | epoch 005: 474 / 3002 loss=2.559, ppl=5.89, wps=5980.9, ups=0.09, wpb=64861, bsz=128, num_updates=12409, lr=9.99087e-05, gnorm=1.961, loss_scale=4, train_wall=10, gb_free=2.8, wall=143089
2021-06-20 10:23:46 | INFO | train_inner | epoch 005: 475 / 3002 loss=2.455, ppl=5.48, wps=5874.4, ups=0.09, wpb=64691, bsz=128, num_updates=12410, lr=9.99087e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=143100
2021-06-20 10:23:57 | INFO | train_inner | epoch 005: 476 / 3002 loss=2.394, ppl=5.26, wps=5700.7, ups=0.09, wpb=64846, bsz=128, num_updates=12411, lr=9.99087e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=143112
2021-06-20 10:24:09 | INFO | train_inner | epoch 005: 477 / 3002 loss=2.532, ppl=5.78, wps=5809.9, ups=0.09, wpb=64859, bsz=128, num_updates=12412, lr=9.99087e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=143123
2021-06-20 10:24:20 | INFO | train_inner | epoch 005: 478 / 3002 loss=2.554, ppl=5.87, wps=5746.3, ups=0.09, wpb=64770, bsz=128, num_updates=12413, lr=9.99087e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=143134
2021-06-20 10:24:31 | INFO | train_inner | epoch 005: 479 / 3002 loss=2.42, ppl=5.35, wps=5802.8, ups=0.09, wpb=64794, bsz=128, num_updates=12414, lr=9.99087e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=143145
2021-06-20 10:24:42 | INFO | train_inner | epoch 005: 480 / 3002 loss=2.632, ppl=6.2, wps=5855.3, ups=0.09, wpb=64802, bsz=128, num_updates=12415, lr=9.99087e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=143156
2021-06-20 10:24:53 | INFO | train_inner | epoch 005: 481 / 3002 loss=2.683, ppl=6.42, wps=5880.2, ups=0.09, wpb=64829, bsz=128, num_updates=12416, lr=9.99087e-05, gnorm=2.065, loss_scale=4, train_wall=11, gb_free=2.8, wall=143167
2021-06-20 10:25:04 | INFO | train_inner | epoch 005: 482 / 3002 loss=2.61, ppl=6.1, wps=5924.1, ups=0.09, wpb=64874, bsz=128, num_updates=12417, lr=9.99087e-05, gnorm=1.992, loss_scale=8, train_wall=10, gb_free=2.8, wall=143178
2021-06-20 10:25:15 | INFO | train_inner | epoch 005: 483 / 3002 loss=2.582, ppl=5.99, wps=5790.9, ups=0.09, wpb=64816, bsz=128, num_updates=12418, lr=9.99086e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=143190
2021-06-20 10:25:27 | INFO | train_inner | epoch 005: 484 / 3002 loss=2.587, ppl=6.01, wps=5749.6, ups=0.09, wpb=64818, bsz=128, num_updates=12419, lr=9.99086e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=143201
2021-06-20 10:25:37 | INFO | train_inner | epoch 005: 485 / 3002 loss=2.647, ppl=6.26, wps=5950.7, ups=0.09, wpb=64867, bsz=128, num_updates=12420, lr=9.99086e-05, gnorm=2.014, loss_scale=8, train_wall=10, gb_free=2.8, wall=143212
2021-06-20 10:25:49 | INFO | train_inner | epoch 005: 486 / 3002 loss=2.397, ppl=5.27, wps=5810.7, ups=0.09, wpb=64772, bsz=128, num_updates=12421, lr=9.99086e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=143223
2021-06-20 10:26:00 | INFO | train_inner | epoch 005: 487 / 3002 loss=2.567, ppl=5.93, wps=5806.9, ups=0.09, wpb=64801, bsz=128, num_updates=12422, lr=9.99086e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=143234
2021-06-20 10:26:11 | INFO | train_inner | epoch 005: 488 / 3002 loss=2.316, ppl=4.98, wps=5718.2, ups=0.09, wpb=64797, bsz=128, num_updates=12423, lr=9.99086e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=143245
2021-06-20 10:26:22 | INFO | train_inner | epoch 005: 489 / 3002 loss=2.625, ppl=6.17, wps=5791.2, ups=0.09, wpb=64874, bsz=128, num_updates=12424, lr=9.99086e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=143257
2021-06-20 10:26:33 | INFO | train_inner | epoch 005: 490 / 3002 loss=2.624, ppl=6.17, wps=5902.6, ups=0.09, wpb=64884, bsz=128, num_updates=12425, lr=9.99086e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=143268
2021-06-20 10:26:44 | INFO | train_inner | epoch 005: 491 / 3002 loss=2.528, ppl=5.77, wps=5869.8, ups=0.09, wpb=64840, bsz=128, num_updates=12426, lr=9.99086e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=143279
2021-06-20 10:26:55 | INFO | train_inner | epoch 005: 492 / 3002 loss=2.365, ppl=5.15, wps=5864.1, ups=0.09, wpb=64860, bsz=128, num_updates=12427, lr=9.99086e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=143290
2021-06-20 10:27:07 | INFO | train_inner | epoch 005: 493 / 3002 loss=2.533, ppl=5.79, wps=5764.9, ups=0.09, wpb=64875, bsz=128, num_updates=12428, lr=9.99086e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=143301
2021-06-20 10:27:18 | INFO | train_inner | epoch 005: 494 / 3002 loss=2.474, ppl=5.56, wps=5839.4, ups=0.09, wpb=64871, bsz=128, num_updates=12429, lr=9.99086e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=143312
2021-06-20 10:27:29 | INFO | train_inner | epoch 005: 495 / 3002 loss=2.551, ppl=5.86, wps=5845.7, ups=0.09, wpb=64845, bsz=128, num_updates=12430, lr=9.99086e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=143323
2021-06-20 10:27:40 | INFO | train_inner | epoch 005: 496 / 3002 loss=2.619, ppl=6.14, wps=5899.6, ups=0.09, wpb=64834, bsz=128, num_updates=12431, lr=9.99085e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=143334
2021-06-20 10:27:51 | INFO | train_inner | epoch 005: 497 / 3002 loss=2.704, ppl=6.52, wps=5807.2, ups=0.09, wpb=64738, bsz=128, num_updates=12432, lr=9.99085e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=143345
2021-06-20 10:28:02 | INFO | train_inner | epoch 005: 498 / 3002 loss=2.549, ppl=5.85, wps=5786.3, ups=0.09, wpb=64790, bsz=128, num_updates=12433, lr=9.99085e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=143357
2021-06-20 10:28:13 | INFO | train_inner | epoch 005: 499 / 3002 loss=2.512, ppl=5.7, wps=5865.8, ups=0.09, wpb=64838, bsz=128, num_updates=12434, lr=9.99085e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=143368
2021-06-20 10:28:24 | INFO | train_inner | epoch 005: 500 / 3002 loss=2.565, ppl=5.92, wps=5818.7, ups=0.09, wpb=64821, bsz=128, num_updates=12435, lr=9.99085e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=143379
2021-06-20 10:28:36 | INFO | train_inner | epoch 005: 501 / 3002 loss=2.586, ppl=6.01, wps=5815, ups=0.09, wpb=64783, bsz=128, num_updates=12436, lr=9.99085e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=143390
2021-06-20 10:28:47 | INFO | train_inner | epoch 005: 502 / 3002 loss=2.476, ppl=5.56, wps=5902.4, ups=0.09, wpb=64795, bsz=128, num_updates=12437, lr=9.99085e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=143401
2021-06-20 10:28:58 | INFO | train_inner | epoch 005: 503 / 3002 loss=2.49, ppl=5.62, wps=5852.9, ups=0.09, wpb=64874, bsz=128, num_updates=12438, lr=9.99085e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=143412
2021-06-20 10:29:09 | INFO | train_inner | epoch 005: 504 / 3002 loss=2.434, ppl=5.4, wps=5874.3, ups=0.09, wpb=64785, bsz=128, num_updates=12439, lr=9.99085e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=143423
2021-06-20 10:29:20 | INFO | train_inner | epoch 005: 505 / 3002 loss=2.403, ppl=5.29, wps=5903.6, ups=0.09, wpb=64907, bsz=128, num_updates=12440, lr=9.99085e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=143434
2021-06-20 10:29:31 | INFO | train_inner | epoch 005: 506 / 3002 loss=2.478, ppl=5.57, wps=5738.2, ups=0.09, wpb=64816, bsz=128, num_updates=12441, lr=9.99085e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=143445
2021-06-20 10:29:42 | INFO | train_inner | epoch 005: 507 / 3002 loss=2.518, ppl=5.73, wps=5779.9, ups=0.09, wpb=64791, bsz=128, num_updates=12442, lr=9.99085e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=143456
2021-06-20 10:29:53 | INFO | train_inner | epoch 005: 508 / 3002 loss=2.5, ppl=5.66, wps=5864.1, ups=0.09, wpb=64801, bsz=128, num_updates=12443, lr=9.99084e-05, gnorm=2.043, loss_scale=8, train_wall=11, gb_free=2.8, wall=143468
2021-06-20 10:30:04 | INFO | train_inner | epoch 005: 509 / 3002 loss=2.466, ppl=5.52, wps=5856.4, ups=0.09, wpb=64854, bsz=128, num_updates=12444, lr=9.99084e-05, gnorm=2.411, loss_scale=8, train_wall=11, gb_free=2.8, wall=143479
2021-06-20 10:30:15 | INFO | train_inner | epoch 005: 510 / 3002 loss=2.479, ppl=5.57, wps=5815.9, ups=0.09, wpb=64864, bsz=128, num_updates=12445, lr=9.99084e-05, gnorm=5.315, loss_scale=8, train_wall=11, gb_free=2.8, wall=143490
2021-06-20 10:30:27 | INFO | train_inner | epoch 005: 511 / 3002 loss=2.716, ppl=6.57, wps=5803.1, ups=0.09, wpb=64847, bsz=128, num_updates=12446, lr=9.99084e-05, gnorm=1.864, loss_scale=8, train_wall=11, gb_free=2.8, wall=143501
2021-06-20 10:30:38 | INFO | train_inner | epoch 005: 512 / 3002 loss=2.401, ppl=5.28, wps=5858.9, ups=0.09, wpb=64896, bsz=128, num_updates=12447, lr=9.99084e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=143512
2021-06-20 10:30:49 | INFO | train_inner | epoch 005: 513 / 3002 loss=2.594, ppl=6.04, wps=5800, ups=0.09, wpb=64800, bsz=128, num_updates=12448, lr=9.99084e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=143523
2021-06-20 10:31:00 | INFO | train_inner | epoch 005: 514 / 3002 loss=2.45, ppl=5.46, wps=5925.3, ups=0.09, wpb=64866, bsz=128, num_updates=12449, lr=9.99084e-05, gnorm=1.968, loss_scale=8, train_wall=10, gb_free=2.8, wall=143534
2021-06-20 10:31:11 | INFO | train_inner | epoch 005: 515 / 3002 loss=2.56, ppl=5.9, wps=5813.5, ups=0.09, wpb=64749, bsz=128, num_updates=12450, lr=9.99084e-05, gnorm=2.754, loss_scale=8, train_wall=11, gb_free=2.8, wall=143545
2021-06-20 10:31:22 | INFO | train_inner | epoch 005: 516 / 3002 loss=2.503, ppl=5.67, wps=5831.8, ups=0.09, wpb=64893, bsz=128, num_updates=12451, lr=9.99084e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=143556
2021-06-20 10:31:33 | INFO | train_inner | epoch 005: 517 / 3002 loss=2.555, ppl=5.88, wps=5784.4, ups=0.09, wpb=64849, bsz=128, num_updates=12452, lr=9.99084e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=143568
2021-06-20 10:31:44 | INFO | train_inner | epoch 005: 518 / 3002 loss=2.514, ppl=5.71, wps=5766.6, ups=0.09, wpb=64735, bsz=128, num_updates=12453, lr=9.99084e-05, gnorm=2.018, loss_scale=8, train_wall=11, gb_free=2.8, wall=143579
2021-06-20 10:31:56 | INFO | train_inner | epoch 005: 519 / 3002 loss=2.499, ppl=5.65, wps=5787.9, ups=0.09, wpb=64836, bsz=128, num_updates=12454, lr=9.99084e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=143590
2021-06-20 10:32:07 | INFO | train_inner | epoch 005: 520 / 3002 loss=2.46, ppl=5.5, wps=5792.3, ups=0.09, wpb=64803, bsz=128, num_updates=12455, lr=9.99084e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=143601
2021-06-20 10:32:18 | INFO | train_inner | epoch 005: 521 / 3002 loss=2.633, ppl=6.2, wps=5832.3, ups=0.09, wpb=64876, bsz=128, num_updates=12456, lr=9.99083e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=143612
2021-06-20 10:32:29 | INFO | train_inner | epoch 005: 522 / 3002 loss=2.559, ppl=5.89, wps=5794.8, ups=0.09, wpb=64812, bsz=128, num_updates=12457, lr=9.99083e-05, gnorm=2.041, loss_scale=8, train_wall=11, gb_free=2.8, wall=143624
2021-06-20 10:32:40 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-20 10:32:51 | INFO | train_inner | epoch 005: 524 / 3002 loss=2.651, ppl=6.28, wps=2951.9, ups=0.05, wpb=64807, bsz=128, num_updates=12458, lr=9.99083e-05, gnorm=2.33, loss_scale=4, train_wall=21, gb_free=2.8, wall=143645
2021-06-20 10:33:02 | INFO | train_inner | epoch 005: 525 / 3002 loss=2.552, ppl=5.87, wps=5799.3, ups=0.09, wpb=64784, bsz=128, num_updates=12459, lr=9.99083e-05, gnorm=2.072, loss_scale=4, train_wall=11, gb_free=2.8, wall=143657
2021-06-20 10:33:14 | INFO | train_inner | epoch 005: 526 / 3002 loss=2.471, ppl=5.55, wps=5713.2, ups=0.09, wpb=64847, bsz=128, num_updates=12460, lr=9.99083e-05, gnorm=11.592, loss_scale=4, train_wall=11, gb_free=2.8, wall=143668
2021-06-20 10:33:25 | INFO | train_inner | epoch 005: 527 / 3002 loss=2.624, ppl=6.16, wps=5841.3, ups=0.09, wpb=64751, bsz=128, num_updates=12461, lr=9.99083e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=143679
2021-06-20 10:33:36 | INFO | train_inner | epoch 005: 528 / 3002 loss=2.623, ppl=6.16, wps=5909.4, ups=0.09, wpb=64842, bsz=128, num_updates=12462, lr=9.99083e-05, gnorm=1.948, loss_scale=4, train_wall=10, gb_free=2.8, wall=143690
2021-06-20 10:33:47 | INFO | train_inner | epoch 005: 529 / 3002 loss=2.561, ppl=5.9, wps=5701.7, ups=0.09, wpb=64814, bsz=128, num_updates=12463, lr=9.99083e-05, gnorm=2.048, loss_scale=4, train_wall=11, gb_free=2.8, wall=143701
2021-06-20 10:33:58 | INFO | train_inner | epoch 005: 530 / 3002 loss=2.548, ppl=5.85, wps=5973.1, ups=0.09, wpb=64872, bsz=128, num_updates=12464, lr=9.99083e-05, gnorm=2.122, loss_scale=4, train_wall=10, gb_free=2.8, wall=143712
2021-06-20 10:34:09 | INFO | train_inner | epoch 005: 531 / 3002 loss=2.643, ppl=6.24, wps=5861.9, ups=0.09, wpb=64797, bsz=128, num_updates=12465, lr=9.99083e-05, gnorm=2.06, loss_scale=4, train_wall=11, gb_free=2.8, wall=143723
2021-06-20 10:34:20 | INFO | train_inner | epoch 005: 532 / 3002 loss=2.571, ppl=5.94, wps=5725.2, ups=0.09, wpb=64878, bsz=128, num_updates=12466, lr=9.99083e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=143735
2021-06-20 10:34:32 | INFO | train_inner | epoch 005: 533 / 3002 loss=2.408, ppl=5.31, wps=5786.8, ups=0.09, wpb=64802, bsz=128, num_updates=12467, lr=9.99083e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=143746
2021-06-20 10:34:43 | INFO | train_inner | epoch 005: 534 / 3002 loss=2.465, ppl=5.52, wps=5866.8, ups=0.09, wpb=64807, bsz=128, num_updates=12468, lr=9.99082e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=143757
2021-06-20 10:34:54 | INFO | train_inner | epoch 005: 535 / 3002 loss=2.579, ppl=5.98, wps=5767.5, ups=0.09, wpb=64788, bsz=128, num_updates=12469, lr=9.99082e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=143768
2021-06-20 10:35:05 | INFO | train_inner | epoch 005: 536 / 3002 loss=2.562, ppl=5.9, wps=5783.9, ups=0.09, wpb=64820, bsz=128, num_updates=12470, lr=9.99082e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=143779
2021-06-20 10:35:16 | INFO | train_inner | epoch 005: 537 / 3002 loss=2.464, ppl=5.52, wps=5780.1, ups=0.09, wpb=64884, bsz=128, num_updates=12471, lr=9.99082e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=143791
2021-06-20 10:35:27 | INFO | train_inner | epoch 005: 538 / 3002 loss=2.469, ppl=5.54, wps=5851, ups=0.09, wpb=64843, bsz=128, num_updates=12472, lr=9.99082e-05, gnorm=1.87, loss_scale=4, train_wall=11, gb_free=2.8, wall=143802
2021-06-20 10:35:39 | INFO | train_inner | epoch 005: 539 / 3002 loss=2.661, ppl=6.32, wps=5774.7, ups=0.09, wpb=64750, bsz=128, num_updates=12473, lr=9.99082e-05, gnorm=2.062, loss_scale=4, train_wall=11, gb_free=2.8, wall=143813
2021-06-20 10:35:50 | INFO | train_inner | epoch 005: 540 / 3002 loss=2.544, ppl=5.83, wps=5861.2, ups=0.09, wpb=64826, bsz=128, num_updates=12474, lr=9.99082e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=143824
2021-06-20 10:36:01 | INFO | train_inner | epoch 005: 541 / 3002 loss=2.608, ppl=6.1, wps=5793, ups=0.09, wpb=64828, bsz=128, num_updates=12475, lr=9.99082e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=143835
2021-06-20 10:36:12 | INFO | train_inner | epoch 005: 542 / 3002 loss=2.59, ppl=6.02, wps=5886.7, ups=0.09, wpb=64822, bsz=128, num_updates=12476, lr=9.99082e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=143846
2021-06-20 10:36:23 | INFO | train_inner | epoch 005: 543 / 3002 loss=2.552, ppl=5.86, wps=5728.7, ups=0.09, wpb=64806, bsz=128, num_updates=12477, lr=9.99082e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=143857
2021-06-20 10:36:34 | INFO | train_inner | epoch 005: 544 / 3002 loss=2.595, ppl=6.04, wps=5874.7, ups=0.09, wpb=64809, bsz=128, num_updates=12478, lr=9.99082e-05, gnorm=2.105, loss_scale=4, train_wall=11, gb_free=2.8, wall=143868
2021-06-20 10:36:45 | INFO | train_inner | epoch 005: 545 / 3002 loss=2.629, ppl=6.19, wps=5967.1, ups=0.09, wpb=64861, bsz=128, num_updates=12479, lr=9.99082e-05, gnorm=2.031, loss_scale=4, train_wall=10, gb_free=2.8, wall=143879
2021-06-20 10:36:56 | INFO | train_inner | epoch 005: 546 / 3002 loss=2.531, ppl=5.78, wps=5796, ups=0.09, wpb=64784, bsz=128, num_updates=12480, lr=9.99082e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=143891
2021-06-20 10:37:07 | INFO | train_inner | epoch 005: 547 / 3002 loss=2.434, ppl=5.4, wps=5863.2, ups=0.09, wpb=64849, bsz=128, num_updates=12481, lr=9.99081e-05, gnorm=1.909, loss_scale=4, train_wall=11, gb_free=2.8, wall=143902
2021-06-20 10:37:18 | INFO | train_inner | epoch 005: 548 / 3002 loss=2.424, ppl=5.37, wps=5776.8, ups=0.09, wpb=64924, bsz=128, num_updates=12482, lr=9.99081e-05, gnorm=3.802, loss_scale=4, train_wall=11, gb_free=2.8, wall=143913
2021-06-20 10:37:30 | INFO | train_inner | epoch 005: 549 / 3002 loss=2.667, ppl=6.35, wps=5734.4, ups=0.09, wpb=64806, bsz=128, num_updates=12483, lr=9.99081e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=143924
2021-06-20 10:37:41 | INFO | train_inner | epoch 005: 550 / 3002 loss=2.545, ppl=5.84, wps=5756.7, ups=0.09, wpb=64838, bsz=128, num_updates=12484, lr=9.99081e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=143935
2021-06-20 10:37:52 | INFO | train_inner | epoch 005: 551 / 3002 loss=2.509, ppl=5.69, wps=5809, ups=0.09, wpb=64877, bsz=128, num_updates=12485, lr=9.99081e-05, gnorm=1.932, loss_scale=4, train_wall=11, gb_free=2.8, wall=143947
2021-06-20 10:38:03 | INFO | train_inner | epoch 005: 552 / 3002 loss=2.583, ppl=5.99, wps=5797.8, ups=0.09, wpb=64921, bsz=128, num_updates=12486, lr=9.99081e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=143958
2021-06-20 10:38:15 | INFO | train_inner | epoch 005: 553 / 3002 loss=2.767, ppl=6.81, wps=5838.1, ups=0.09, wpb=64875, bsz=128, num_updates=12487, lr=9.99081e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=143969
2021-06-20 10:38:26 | INFO | train_inner | epoch 005: 554 / 3002 loss=2.491, ppl=5.62, wps=5811, ups=0.09, wpb=64823, bsz=128, num_updates=12488, lr=9.99081e-05, gnorm=2.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=143980
2021-06-20 10:38:37 | INFO | train_inner | epoch 005: 555 / 3002 loss=2.564, ppl=5.91, wps=5936.4, ups=0.09, wpb=64851, bsz=128, num_updates=12489, lr=9.99081e-05, gnorm=2.106, loss_scale=4, train_wall=10, gb_free=2.8, wall=143991
2021-06-20 10:38:48 | INFO | train_inner | epoch 005: 556 / 3002 loss=2.565, ppl=5.92, wps=5862.3, ups=0.09, wpb=64839, bsz=128, num_updates=12490, lr=9.99081e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=144002
2021-06-20 10:38:59 | INFO | train_inner | epoch 005: 557 / 3002 loss=2.437, ppl=5.41, wps=5892.7, ups=0.09, wpb=64802, bsz=128, num_updates=12491, lr=9.99081e-05, gnorm=1.919, loss_scale=4, train_wall=11, gb_free=2.8, wall=144013
2021-06-20 10:39:10 | INFO | train_inner | epoch 005: 558 / 3002 loss=2.501, ppl=5.66, wps=5752.5, ups=0.09, wpb=64785, bsz=128, num_updates=12492, lr=9.99081e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=144024
2021-06-20 10:39:21 | INFO | train_inner | epoch 005: 559 / 3002 loss=2.584, ppl=6, wps=5877, ups=0.09, wpb=64788, bsz=128, num_updates=12493, lr=9.9908e-05, gnorm=2.222, loss_scale=4, train_wall=11, gb_free=2.8, wall=144035
2021-06-20 10:39:32 | INFO | train_inner | epoch 005: 560 / 3002 loss=2.655, ppl=6.3, wps=5782.1, ups=0.09, wpb=64789, bsz=128, num_updates=12494, lr=9.9908e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=144046
2021-06-20 10:39:43 | INFO | train_inner | epoch 005: 561 / 3002 loss=2.641, ppl=6.24, wps=5786.9, ups=0.09, wpb=64798, bsz=128, num_updates=12495, lr=9.9908e-05, gnorm=2.025, loss_scale=4, train_wall=11, gb_free=2.8, wall=144058
2021-06-20 10:39:55 | INFO | train_inner | epoch 005: 562 / 3002 loss=2.601, ppl=6.07, wps=5785.4, ups=0.09, wpb=64819, bsz=128, num_updates=12496, lr=9.9908e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=144069
2021-06-20 10:40:06 | INFO | train_inner | epoch 005: 563 / 3002 loss=2.458, ppl=5.49, wps=5782.5, ups=0.09, wpb=64861, bsz=128, num_updates=12497, lr=9.9908e-05, gnorm=2.062, loss_scale=4, train_wall=11, gb_free=2.8, wall=144080
2021-06-20 10:40:17 | INFO | train_inner | epoch 005: 564 / 3002 loss=2.62, ppl=6.15, wps=5891.7, ups=0.09, wpb=64845, bsz=128, num_updates=12498, lr=9.9908e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=144091
2021-06-20 10:40:28 | INFO | train_inner | epoch 005: 565 / 3002 loss=2.595, ppl=6.04, wps=5874.2, ups=0.09, wpb=64877, bsz=128, num_updates=12499, lr=9.9908e-05, gnorm=2.209, loss_scale=4, train_wall=11, gb_free=2.8, wall=144102
2021-06-20 10:40:39 | INFO | train_inner | epoch 005: 566 / 3002 loss=2.491, ppl=5.62, wps=5871.6, ups=0.09, wpb=64862, bsz=128, num_updates=12500, lr=9.9908e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=144113
2021-06-20 10:40:50 | INFO | train_inner | epoch 005: 567 / 3002 loss=2.476, ppl=5.56, wps=5886.7, ups=0.09, wpb=64849, bsz=128, num_updates=12501, lr=9.9908e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=144124
2021-06-20 10:41:01 | INFO | train_inner | epoch 005: 568 / 3002 loss=2.557, ppl=5.88, wps=5811.3, ups=0.09, wpb=64827, bsz=128, num_updates=12502, lr=9.9908e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=144135
2021-06-20 10:41:12 | INFO | train_inner | epoch 005: 569 / 3002 loss=2.429, ppl=5.38, wps=5862.5, ups=0.09, wpb=64815, bsz=128, num_updates=12503, lr=9.9908e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=144146
2021-06-20 10:41:23 | INFO | train_inner | epoch 005: 570 / 3002 loss=2.469, ppl=5.54, wps=5913.7, ups=0.09, wpb=64828, bsz=128, num_updates=12504, lr=9.9908e-05, gnorm=2.036, loss_scale=4, train_wall=10, gb_free=2.8, wall=144157
2021-06-20 10:41:34 | INFO | train_inner | epoch 005: 571 / 3002 loss=2.477, ppl=5.57, wps=5790.7, ups=0.09, wpb=64829, bsz=128, num_updates=12505, lr=9.9908e-05, gnorm=2.159, loss_scale=4, train_wall=11, gb_free=2.8, wall=144169
2021-06-20 10:41:45 | INFO | train_inner | epoch 005: 572 / 3002 loss=2.476, ppl=5.56, wps=5811.5, ups=0.09, wpb=64873, bsz=128, num_updates=12506, lr=9.99079e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=144180
2021-06-20 10:41:56 | INFO | train_inner | epoch 005: 573 / 3002 loss=2.707, ppl=6.53, wps=5956.9, ups=0.09, wpb=64825, bsz=128, num_updates=12507, lr=9.99079e-05, gnorm=2.007, loss_scale=4, train_wall=10, gb_free=2.8, wall=144191
2021-06-20 10:42:07 | INFO | train_inner | epoch 005: 574 / 3002 loss=2.543, ppl=5.83, wps=5948.8, ups=0.09, wpb=64831, bsz=128, num_updates=12508, lr=9.99079e-05, gnorm=1.979, loss_scale=4, train_wall=10, gb_free=2.8, wall=144202
2021-06-20 10:42:18 | INFO | train_inner | epoch 005: 575 / 3002 loss=2.613, ppl=6.12, wps=5879.1, ups=0.09, wpb=64802, bsz=128, num_updates=12509, lr=9.99079e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=144213
2021-06-20 10:42:29 | INFO | train_inner | epoch 005: 576 / 3002 loss=2.416, ppl=5.34, wps=5802.8, ups=0.09, wpb=64948, bsz=128, num_updates=12510, lr=9.99079e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=144224
2021-06-20 10:42:41 | INFO | train_inner | epoch 005: 577 / 3002 loss=2.505, ppl=5.68, wps=5759.5, ups=0.09, wpb=64833, bsz=128, num_updates=12511, lr=9.99079e-05, gnorm=1.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=144235
2021-06-20 10:42:52 | INFO | train_inner | epoch 005: 578 / 3002 loss=2.778, ppl=6.86, wps=5804.5, ups=0.09, wpb=64749, bsz=128, num_updates=12512, lr=9.99079e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=144246
2021-06-20 10:43:03 | INFO | train_inner | epoch 005: 579 / 3002 loss=2.531, ppl=5.78, wps=5790.9, ups=0.09, wpb=64811, bsz=128, num_updates=12513, lr=9.99079e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=144257
2021-06-20 10:43:14 | INFO | train_inner | epoch 005: 580 / 3002 loss=2.574, ppl=5.95, wps=5792.1, ups=0.09, wpb=64834, bsz=128, num_updates=12514, lr=9.99079e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=144269
2021-06-20 10:43:25 | INFO | train_inner | epoch 005: 581 / 3002 loss=2.542, ppl=5.82, wps=5822.2, ups=0.09, wpb=64719, bsz=128, num_updates=12515, lr=9.99079e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=144280
2021-06-20 10:43:36 | INFO | train_inner | epoch 005: 582 / 3002 loss=2.573, ppl=5.95, wps=5914.8, ups=0.09, wpb=64956, bsz=128, num_updates=12516, lr=9.99079e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=144291
2021-06-20 10:43:47 | INFO | train_inner | epoch 005: 583 / 3002 loss=2.537, ppl=5.8, wps=5922, ups=0.09, wpb=64824, bsz=128, num_updates=12517, lr=9.99079e-05, gnorm=1.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=144302
2021-06-20 10:43:58 | INFO | train_inner | epoch 005: 584 / 3002 loss=2.509, ppl=5.69, wps=5880.7, ups=0.09, wpb=64873, bsz=128, num_updates=12518, lr=9.99078e-05, gnorm=1.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=144313
2021-06-20 10:44:09 | INFO | train_inner | epoch 005: 585 / 3002 loss=2.333, ppl=5.04, wps=5947, ups=0.09, wpb=64880, bsz=128, num_updates=12519, lr=9.99078e-05, gnorm=2.006, loss_scale=4, train_wall=10, gb_free=2.8, wall=144324
2021-06-20 10:44:20 | INFO | train_inner | epoch 005: 586 / 3002 loss=2.576, ppl=5.96, wps=5857.6, ups=0.09, wpb=64830, bsz=128, num_updates=12520, lr=9.99078e-05, gnorm=2.031, loss_scale=4, train_wall=11, gb_free=2.8, wall=144335
2021-06-20 10:44:31 | INFO | train_inner | epoch 005: 587 / 3002 loss=2.586, ppl=6, wps=5891.5, ups=0.09, wpb=64814, bsz=128, num_updates=12521, lr=9.99078e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=144346
2021-06-20 10:44:42 | INFO | train_inner | epoch 005: 588 / 3002 loss=2.51, ppl=5.7, wps=5812, ups=0.09, wpb=64939, bsz=128, num_updates=12522, lr=9.99078e-05, gnorm=2.178, loss_scale=4, train_wall=11, gb_free=2.8, wall=144357
2021-06-20 10:44:54 | INFO | train_inner | epoch 005: 589 / 3002 loss=2.414, ppl=5.33, wps=5767, ups=0.09, wpb=64836, bsz=128, num_updates=12523, lr=9.99078e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=144368
2021-06-20 10:45:05 | INFO | train_inner | epoch 005: 590 / 3002 loss=2.558, ppl=5.89, wps=5802.8, ups=0.09, wpb=64860, bsz=128, num_updates=12524, lr=9.99078e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=144379
2021-06-20 10:45:16 | INFO | train_inner | epoch 005: 591 / 3002 loss=2.437, ppl=5.42, wps=5753.7, ups=0.09, wpb=64774, bsz=128, num_updates=12525, lr=9.99078e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=144390
2021-06-20 10:45:27 | INFO | train_inner | epoch 005: 592 / 3002 loss=2.436, ppl=5.41, wps=5822, ups=0.09, wpb=64807, bsz=128, num_updates=12526, lr=9.99078e-05, gnorm=1.987, loss_scale=4, train_wall=11, gb_free=2.8, wall=144402
2021-06-20 10:45:38 | INFO | train_inner | epoch 005: 593 / 3002 loss=2.407, ppl=5.3, wps=5884.3, ups=0.09, wpb=64785, bsz=128, num_updates=12527, lr=9.99078e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=144413
2021-06-20 10:45:49 | INFO | train_inner | epoch 005: 594 / 3002 loss=2.447, ppl=5.45, wps=5930.5, ups=0.09, wpb=64866, bsz=128, num_updates=12528, lr=9.99078e-05, gnorm=2.035, loss_scale=4, train_wall=10, gb_free=2.8, wall=144424
2021-06-20 10:46:00 | INFO | train_inner | epoch 005: 595 / 3002 loss=2.666, ppl=6.34, wps=5820.8, ups=0.09, wpb=64800, bsz=128, num_updates=12529, lr=9.99078e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=144435
2021-06-20 10:46:11 | INFO | train_inner | epoch 005: 596 / 3002 loss=2.42, ppl=5.35, wps=5818.6, ups=0.09, wpb=64898, bsz=128, num_updates=12530, lr=9.99078e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=144446
2021-06-20 10:46:23 | INFO | train_inner | epoch 005: 597 / 3002 loss=2.554, ppl=5.87, wps=5791.4, ups=0.09, wpb=64742, bsz=128, num_updates=12531, lr=9.99077e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=144457
2021-06-20 10:46:34 | INFO | train_inner | epoch 005: 598 / 3002 loss=2.451, ppl=5.47, wps=5788.9, ups=0.09, wpb=64818, bsz=128, num_updates=12532, lr=9.99077e-05, gnorm=1.926, loss_scale=4, train_wall=11, gb_free=2.8, wall=144468
2021-06-20 10:46:45 | INFO | train_inner | epoch 005: 599 / 3002 loss=2.394, ppl=5.26, wps=5857, ups=0.09, wpb=64791, bsz=128, num_updates=12533, lr=9.99077e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=144479
2021-06-20 10:46:56 | INFO | train_inner | epoch 005: 600 / 3002 loss=2.531, ppl=5.78, wps=5863.8, ups=0.09, wpb=64819, bsz=128, num_updates=12534, lr=9.99077e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=144490
2021-06-20 10:47:07 | INFO | train_inner | epoch 005: 601 / 3002 loss=2.516, ppl=5.72, wps=5802.6, ups=0.09, wpb=64724, bsz=128, num_updates=12535, lr=9.99077e-05, gnorm=1.911, loss_scale=4, train_wall=11, gb_free=2.8, wall=144501
2021-06-20 10:47:18 | INFO | train_inner | epoch 005: 602 / 3002 loss=2.651, ppl=6.28, wps=5824.3, ups=0.09, wpb=64891, bsz=128, num_updates=12536, lr=9.99077e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=144513
2021-06-20 10:47:29 | INFO | train_inner | epoch 005: 603 / 3002 loss=2.405, ppl=5.3, wps=5899.3, ups=0.09, wpb=64902, bsz=128, num_updates=12537, lr=9.99077e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=144524
2021-06-20 10:47:41 | INFO | train_inner | epoch 005: 604 / 3002 loss=2.507, ppl=5.69, wps=5755.3, ups=0.09, wpb=64902, bsz=128, num_updates=12538, lr=9.99077e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=144535
2021-06-20 10:47:52 | INFO | train_inner | epoch 005: 605 / 3002 loss=2.377, ppl=5.2, wps=5896.9, ups=0.09, wpb=64814, bsz=128, num_updates=12539, lr=9.99077e-05, gnorm=1.924, loss_scale=4, train_wall=11, gb_free=2.8, wall=144546
2021-06-20 10:48:03 | INFO | train_inner | epoch 005: 606 / 3002 loss=2.564, ppl=5.91, wps=5864.9, ups=0.09, wpb=64897, bsz=128, num_updates=12540, lr=9.99077e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=144557
2021-06-20 10:48:14 | INFO | train_inner | epoch 005: 607 / 3002 loss=2.512, ppl=5.7, wps=5757.3, ups=0.09, wpb=64881, bsz=128, num_updates=12541, lr=9.99077e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=144568
2021-06-20 10:48:25 | INFO | train_inner | epoch 005: 608 / 3002 loss=2.6, ppl=6.06, wps=5855.2, ups=0.09, wpb=64832, bsz=128, num_updates=12542, lr=9.99077e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=144579
2021-06-20 10:48:36 | INFO | train_inner | epoch 005: 609 / 3002 loss=2.447, ppl=5.45, wps=5788.5, ups=0.09, wpb=64810, bsz=128, num_updates=12543, lr=9.99076e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=144590
2021-06-20 10:48:47 | INFO | train_inner | epoch 005: 610 / 3002 loss=2.468, ppl=5.53, wps=5921.3, ups=0.09, wpb=64894, bsz=128, num_updates=12544, lr=9.99076e-05, gnorm=2.008, loss_scale=4, train_wall=10, gb_free=2.8, wall=144601
2021-06-20 10:48:58 | INFO | train_inner | epoch 005: 611 / 3002 loss=2.497, ppl=5.64, wps=5786.9, ups=0.09, wpb=64826, bsz=128, num_updates=12545, lr=9.99076e-05, gnorm=2.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=144613
2021-06-20 10:49:09 | INFO | train_inner | epoch 005: 612 / 3002 loss=2.489, ppl=5.61, wps=5869.8, ups=0.09, wpb=64864, bsz=128, num_updates=12546, lr=9.99076e-05, gnorm=2.084, loss_scale=4, train_wall=11, gb_free=2.8, wall=144624
2021-06-20 10:49:21 | INFO | train_inner | epoch 005: 613 / 3002 loss=2.459, ppl=5.5, wps=5790.1, ups=0.09, wpb=64753, bsz=128, num_updates=12547, lr=9.99076e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=144635
2021-06-20 10:49:32 | INFO | train_inner | epoch 005: 614 / 3002 loss=2.491, ppl=5.62, wps=5885.3, ups=0.09, wpb=64867, bsz=128, num_updates=12548, lr=9.99076e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=144646
2021-06-20 10:49:43 | INFO | train_inner | epoch 005: 615 / 3002 loss=2.542, ppl=5.82, wps=5802.7, ups=0.09, wpb=64832, bsz=128, num_updates=12549, lr=9.99076e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=144657
2021-06-20 10:49:54 | INFO | train_inner | epoch 005: 616 / 3002 loss=2.634, ppl=6.21, wps=5873.2, ups=0.09, wpb=64800, bsz=128, num_updates=12550, lr=9.99076e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=144668
2021-06-20 10:50:05 | INFO | train_inner | epoch 005: 617 / 3002 loss=2.486, ppl=5.6, wps=5874.8, ups=0.09, wpb=64860, bsz=128, num_updates=12551, lr=9.99076e-05, gnorm=2.02, loss_scale=4, train_wall=11, gb_free=2.8, wall=144679
2021-06-20 10:50:16 | INFO | train_inner | epoch 005: 618 / 3002 loss=2.433, ppl=5.4, wps=5959.9, ups=0.09, wpb=64851, bsz=128, num_updates=12552, lr=9.99076e-05, gnorm=1.939, loss_scale=4, train_wall=10, gb_free=2.8, wall=144690
2021-06-20 10:50:27 | INFO | train_inner | epoch 005: 619 / 3002 loss=2.627, ppl=6.18, wps=5859.2, ups=0.09, wpb=64792, bsz=128, num_updates=12553, lr=9.99076e-05, gnorm=1.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=144701
2021-06-20 10:50:38 | INFO | train_inner | epoch 005: 620 / 3002 loss=2.659, ppl=6.32, wps=5930.2, ups=0.09, wpb=64825, bsz=128, num_updates=12554, lr=9.99076e-05, gnorm=3.043, loss_scale=4, train_wall=10, gb_free=2.8, wall=144712
2021-06-20 10:50:49 | INFO | train_inner | epoch 005: 621 / 3002 loss=2.508, ppl=5.69, wps=5797.7, ups=0.09, wpb=64837, bsz=128, num_updates=12555, lr=9.99076e-05, gnorm=1.909, loss_scale=4, train_wall=11, gb_free=2.8, wall=144723
2021-06-20 10:51:00 | INFO | train_inner | epoch 005: 622 / 3002 loss=2.427, ppl=5.38, wps=5742.8, ups=0.09, wpb=64752, bsz=128, num_updates=12556, lr=9.99075e-05, gnorm=1.911, loss_scale=4, train_wall=11, gb_free=2.8, wall=144734
2021-06-20 10:51:11 | INFO | train_inner | epoch 005: 623 / 3002 loss=2.659, ppl=6.32, wps=5713.7, ups=0.09, wpb=64773, bsz=128, num_updates=12557, lr=9.99075e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=144746
2021-06-20 10:51:23 | INFO | train_inner | epoch 005: 624 / 3002 loss=2.382, ppl=5.21, wps=5784.3, ups=0.09, wpb=64857, bsz=128, num_updates=12558, lr=9.99075e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=144757
2021-06-20 10:51:34 | INFO | train_inner | epoch 005: 625 / 3002 loss=2.514, ppl=5.71, wps=5884.7, ups=0.09, wpb=64781, bsz=128, num_updates=12559, lr=9.99075e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=144768
2021-06-20 10:51:45 | INFO | train_inner | epoch 005: 626 / 3002 loss=2.432, ppl=5.4, wps=5895.5, ups=0.09, wpb=64893, bsz=128, num_updates=12560, lr=9.99075e-05, gnorm=3.563, loss_scale=4, train_wall=11, gb_free=2.8, wall=144779
2021-06-20 10:51:56 | INFO | train_inner | epoch 005: 627 / 3002 loss=2.556, ppl=5.88, wps=5807.8, ups=0.09, wpb=64827, bsz=128, num_updates=12561, lr=9.99075e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=144790
2021-06-20 10:52:07 | INFO | train_inner | epoch 005: 628 / 3002 loss=2.545, ppl=5.83, wps=5964, ups=0.09, wpb=64793, bsz=128, num_updates=12562, lr=9.99075e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=144801
2021-06-20 10:52:18 | INFO | train_inner | epoch 005: 629 / 3002 loss=2.399, ppl=5.27, wps=5784.6, ups=0.09, wpb=64830, bsz=128, num_updates=12563, lr=9.99075e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=144812
2021-06-20 10:52:29 | INFO | train_inner | epoch 005: 630 / 3002 loss=2.549, ppl=5.85, wps=5923.1, ups=0.09, wpb=64797, bsz=128, num_updates=12564, lr=9.99075e-05, gnorm=1.913, loss_scale=4, train_wall=10, gb_free=2.8, wall=144823
2021-06-20 10:52:40 | INFO | train_inner | epoch 005: 631 / 3002 loss=2.507, ppl=5.69, wps=5929.7, ups=0.09, wpb=64862, bsz=128, num_updates=12565, lr=9.99075e-05, gnorm=1.936, loss_scale=4, train_wall=10, gb_free=2.8, wall=144834
2021-06-20 10:52:51 | INFO | train_inner | epoch 005: 632 / 3002 loss=2.586, ppl=6, wps=5738, ups=0.09, wpb=64895, bsz=128, num_updates=12566, lr=9.99075e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=144845
2021-06-20 10:53:02 | INFO | train_inner | epoch 005: 633 / 3002 loss=2.519, ppl=5.73, wps=5748.4, ups=0.09, wpb=64818, bsz=128, num_updates=12567, lr=9.99075e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=144857
2021-06-20 10:53:13 | INFO | train_inner | epoch 005: 634 / 3002 loss=2.545, ppl=5.83, wps=5921.6, ups=0.09, wpb=64825, bsz=128, num_updates=12568, lr=9.99074e-05, gnorm=1.992, loss_scale=4, train_wall=10, gb_free=2.8, wall=144868
2021-06-20 10:53:25 | INFO | train_inner | epoch 005: 635 / 3002 loss=2.612, ppl=6.11, wps=5781.4, ups=0.09, wpb=64898, bsz=128, num_updates=12569, lr=9.99074e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=144879
2021-06-20 10:53:36 | INFO | train_inner | epoch 005: 636 / 3002 loss=2.425, ppl=5.37, wps=5711.3, ups=0.09, wpb=64818, bsz=128, num_updates=12570, lr=9.99074e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=144890
2021-06-20 10:53:47 | INFO | train_inner | epoch 005: 637 / 3002 loss=2.498, ppl=5.65, wps=5783.6, ups=0.09, wpb=64888, bsz=128, num_updates=12571, lr=9.99074e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=144901
2021-06-20 10:53:58 | INFO | train_inner | epoch 005: 638 / 3002 loss=2.462, ppl=5.51, wps=5907.3, ups=0.09, wpb=64907, bsz=128, num_updates=12572, lr=9.99074e-05, gnorm=2.1, loss_scale=4, train_wall=11, gb_free=2.8, wall=144912
2021-06-20 10:54:09 | INFO | train_inner | epoch 005: 639 / 3002 loss=2.444, ppl=5.44, wps=5906.6, ups=0.09, wpb=64789, bsz=128, num_updates=12573, lr=9.99074e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=144923
2021-06-20 10:54:20 | INFO | train_inner | epoch 005: 640 / 3002 loss=2.489, ppl=5.62, wps=5785.9, ups=0.09, wpb=64899, bsz=128, num_updates=12574, lr=9.99074e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=144935
2021-06-20 10:54:32 | INFO | train_inner | epoch 005: 641 / 3002 loss=2.525, ppl=5.76, wps=5773.8, ups=0.09, wpb=64857, bsz=128, num_updates=12575, lr=9.99074e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=144946
2021-06-20 10:54:43 | INFO | train_inner | epoch 005: 642 / 3002 loss=2.582, ppl=5.99, wps=5801.6, ups=0.09, wpb=64788, bsz=128, num_updates=12576, lr=9.99074e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=144957
2021-06-20 10:54:54 | INFO | train_inner | epoch 005: 643 / 3002 loss=2.53, ppl=5.78, wps=5890.1, ups=0.09, wpb=64828, bsz=128, num_updates=12577, lr=9.99074e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=144968
2021-06-20 10:55:05 | INFO | train_inner | epoch 005: 644 / 3002 loss=2.474, ppl=5.56, wps=5787.3, ups=0.09, wpb=64878, bsz=128, num_updates=12578, lr=9.99074e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=144979
2021-06-20 10:55:16 | INFO | train_inner | epoch 005: 645 / 3002 loss=2.525, ppl=5.75, wps=5886.5, ups=0.09, wpb=64808, bsz=128, num_updates=12579, lr=9.99074e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=144990
2021-06-20 10:55:27 | INFO | train_inner | epoch 005: 646 / 3002 loss=2.419, ppl=5.35, wps=5753.1, ups=0.09, wpb=64807, bsz=128, num_updates=12580, lr=9.99074e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=145002
2021-06-20 10:55:38 | INFO | train_inner | epoch 005: 647 / 3002 loss=2.608, ppl=6.1, wps=5838.5, ups=0.09, wpb=64862, bsz=128, num_updates=12581, lr=9.99073e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=145013
2021-06-20 10:55:49 | INFO | train_inner | epoch 005: 648 / 3002 loss=2.491, ppl=5.62, wps=5842.6, ups=0.09, wpb=64874, bsz=128, num_updates=12582, lr=9.99073e-05, gnorm=2.074, loss_scale=4, train_wall=11, gb_free=2.8, wall=145024
2021-06-20 10:56:00 | INFO | train_inner | epoch 005: 649 / 3002 loss=2.516, ppl=5.72, wps=5974, ups=0.09, wpb=64828, bsz=128, num_updates=12583, lr=9.99073e-05, gnorm=2.123, loss_scale=4, train_wall=10, gb_free=2.8, wall=145035
2021-06-20 10:56:11 | INFO | train_inner | epoch 005: 650 / 3002 loss=2.448, ppl=5.46, wps=5830.3, ups=0.09, wpb=64887, bsz=128, num_updates=12584, lr=9.99073e-05, gnorm=1.937, loss_scale=4, train_wall=11, gb_free=2.8, wall=145046
2021-06-20 10:56:23 | INFO | train_inner | epoch 005: 651 / 3002 loss=2.631, ppl=6.19, wps=5801.7, ups=0.09, wpb=64728, bsz=128, num_updates=12585, lr=9.99073e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=145057
2021-06-20 10:56:34 | INFO | train_inner | epoch 005: 652 / 3002 loss=2.429, ppl=5.39, wps=5810, ups=0.09, wpb=64878, bsz=128, num_updates=12586, lr=9.99073e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=145068
2021-06-20 10:56:45 | INFO | train_inner | epoch 005: 653 / 3002 loss=2.276, ppl=4.84, wps=5901.9, ups=0.09, wpb=64891, bsz=128, num_updates=12587, lr=9.99073e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=145079
2021-06-20 10:56:56 | INFO | train_inner | epoch 005: 654 / 3002 loss=2.538, ppl=5.81, wps=5829.2, ups=0.09, wpb=64806, bsz=128, num_updates=12588, lr=9.99073e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=145090
2021-06-20 10:57:07 | INFO | train_inner | epoch 005: 655 / 3002 loss=2.627, ppl=6.18, wps=5891.5, ups=0.09, wpb=64816, bsz=128, num_updates=12589, lr=9.99073e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=145101
2021-06-20 10:57:18 | INFO | train_inner | epoch 005: 656 / 3002 loss=2.42, ppl=5.35, wps=5892.1, ups=0.09, wpb=64894, bsz=128, num_updates=12590, lr=9.99073e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=145112
2021-06-20 10:57:29 | INFO | train_inner | epoch 005: 657 / 3002 loss=2.581, ppl=5.98, wps=5853.9, ups=0.09, wpb=64831, bsz=128, num_updates=12591, lr=9.99073e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=145123
2021-06-20 10:57:40 | INFO | train_inner | epoch 005: 658 / 3002 loss=2.509, ppl=5.69, wps=5868.8, ups=0.09, wpb=64898, bsz=128, num_updates=12592, lr=9.99073e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=145134
2021-06-20 10:57:51 | INFO | train_inner | epoch 005: 659 / 3002 loss=2.516, ppl=5.72, wps=5789.4, ups=0.09, wpb=64915, bsz=128, num_updates=12593, lr=9.99072e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=145146
2021-06-20 10:58:02 | INFO | train_inner | epoch 005: 660 / 3002 loss=2.422, ppl=5.36, wps=5754.2, ups=0.09, wpb=64720, bsz=128, num_updates=12594, lr=9.99072e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=145157
2021-06-20 10:58:14 | INFO | train_inner | epoch 005: 661 / 3002 loss=2.42, ppl=5.35, wps=5872.3, ups=0.09, wpb=64934, bsz=128, num_updates=12595, lr=9.99072e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=145168
2021-06-20 10:58:25 | INFO | train_inner | epoch 005: 662 / 3002 loss=2.462, ppl=5.51, wps=5886.6, ups=0.09, wpb=64790, bsz=128, num_updates=12596, lr=9.99072e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=145179
2021-06-20 10:58:36 | INFO | train_inner | epoch 005: 663 / 3002 loss=2.608, ppl=6.1, wps=5744.2, ups=0.09, wpb=64781, bsz=128, num_updates=12597, lr=9.99072e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=145190
2021-06-20 10:58:47 | INFO | train_inner | epoch 005: 664 / 3002 loss=2.374, ppl=5.18, wps=5812.5, ups=0.09, wpb=64814, bsz=128, num_updates=12598, lr=9.99072e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=145201
2021-06-20 10:58:58 | INFO | train_inner | epoch 005: 665 / 3002 loss=2.617, ppl=6.13, wps=5879.6, ups=0.09, wpb=64882, bsz=128, num_updates=12599, lr=9.99072e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=145212
2021-06-20 10:59:09 | INFO | train_inner | epoch 005: 666 / 3002 loss=2.521, ppl=5.74, wps=5944.4, ups=0.09, wpb=64776, bsz=128, num_updates=12600, lr=9.99072e-05, gnorm=2.034, loss_scale=8, train_wall=10, gb_free=2.8, wall=145223
2021-06-20 10:59:20 | INFO | train_inner | epoch 005: 667 / 3002 loss=2.494, ppl=5.63, wps=5864.3, ups=0.09, wpb=64898, bsz=128, num_updates=12601, lr=9.99072e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=145234
2021-06-20 10:59:31 | INFO | train_inner | epoch 005: 668 / 3002 loss=2.467, ppl=5.53, wps=5835.4, ups=0.09, wpb=64815, bsz=128, num_updates=12602, lr=9.99072e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=145245
2021-06-20 10:59:42 | INFO | train_inner | epoch 005: 669 / 3002 loss=2.567, ppl=5.93, wps=5779, ups=0.09, wpb=64851, bsz=128, num_updates=12603, lr=9.99072e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=145257
2021-06-20 10:59:53 | INFO | train_inner | epoch 005: 670 / 3002 loss=2.675, ppl=6.39, wps=5897.8, ups=0.09, wpb=64811, bsz=128, num_updates=12604, lr=9.99072e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=145268
2021-06-20 11:00:04 | INFO | train_inner | epoch 005: 671 / 3002 loss=2.606, ppl=6.09, wps=5845, ups=0.09, wpb=64781, bsz=128, num_updates=12605, lr=9.99072e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=145279
2021-06-20 11:00:16 | INFO | train_inner | epoch 005: 672 / 3002 loss=2.427, ppl=5.38, wps=5721.6, ups=0.09, wpb=64416, bsz=128, num_updates=12606, lr=9.99071e-05, gnorm=2.062, loss_scale=8, train_wall=11, gb_free=2.8, wall=145290
2021-06-20 11:00:26 | INFO | train_inner | epoch 005: 673 / 3002 loss=2.577, ppl=5.97, wps=5985.1, ups=0.09, wpb=64908, bsz=128, num_updates=12607, lr=9.99071e-05, gnorm=2.029, loss_scale=8, train_wall=10, gb_free=2.8, wall=145301
2021-06-20 11:00:38 | INFO | train_inner | epoch 005: 674 / 3002 loss=2.571, ppl=5.94, wps=5878.1, ups=0.09, wpb=64873, bsz=128, num_updates=12608, lr=9.99071e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=145312
2021-06-20 11:00:49 | INFO | train_inner | epoch 005: 675 / 3002 loss=2.567, ppl=5.93, wps=5809.4, ups=0.09, wpb=64881, bsz=128, num_updates=12609, lr=9.99071e-05, gnorm=2.084, loss_scale=8, train_wall=11, gb_free=2.8, wall=145323
2021-06-20 11:01:00 | INFO | train_inner | epoch 005: 676 / 3002 loss=2.416, ppl=5.34, wps=5832.1, ups=0.09, wpb=64848, bsz=128, num_updates=12610, lr=9.99071e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=145334
2021-06-20 11:01:11 | INFO | train_inner | epoch 005: 677 / 3002 loss=2.404, ppl=5.29, wps=5846.9, ups=0.09, wpb=64842, bsz=128, num_updates=12611, lr=9.99071e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=145345
2021-06-20 11:01:22 | INFO | train_inner | epoch 005: 678 / 3002 loss=2.494, ppl=5.63, wps=5754.6, ups=0.09, wpb=64795, bsz=128, num_updates=12612, lr=9.99071e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=145356
2021-06-20 11:01:33 | INFO | train_inner | epoch 005: 679 / 3002 loss=2.439, ppl=5.42, wps=5989.5, ups=0.09, wpb=64851, bsz=128, num_updates=12613, lr=9.99071e-05, gnorm=1.934, loss_scale=8, train_wall=10, gb_free=2.8, wall=145367
2021-06-20 11:01:44 | INFO | train_inner | epoch 005: 680 / 3002 loss=2.537, ppl=5.8, wps=5827, ups=0.09, wpb=64827, bsz=128, num_updates=12614, lr=9.99071e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=145378
2021-06-20 11:01:55 | INFO | train_inner | epoch 005: 681 / 3002 loss=2.519, ppl=5.73, wps=5760.9, ups=0.09, wpb=64825, bsz=128, num_updates=12615, lr=9.99071e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=145390
2021-06-20 11:02:06 | INFO | train_inner | epoch 005: 682 / 3002 loss=2.677, ppl=6.4, wps=5825.4, ups=0.09, wpb=64760, bsz=128, num_updates=12616, lr=9.99071e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=145401
2021-06-20 11:02:18 | INFO | train_inner | epoch 005: 683 / 3002 loss=2.502, ppl=5.66, wps=5864, ups=0.09, wpb=64819, bsz=128, num_updates=12617, lr=9.99071e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=145412
2021-06-20 11:02:29 | INFO | train_inner | epoch 005: 684 / 3002 loss=2.629, ppl=6.19, wps=5818.4, ups=0.09, wpb=64837, bsz=128, num_updates=12618, lr=9.9907e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=145423
2021-06-20 11:02:40 | INFO | train_inner | epoch 005: 685 / 3002 loss=2.483, ppl=5.59, wps=5900.7, ups=0.09, wpb=64794, bsz=128, num_updates=12619, lr=9.9907e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=145434
2021-06-20 11:02:51 | INFO | train_inner | epoch 005: 686 / 3002 loss=2.482, ppl=5.59, wps=5782.9, ups=0.09, wpb=64841, bsz=128, num_updates=12620, lr=9.9907e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=145445
2021-06-20 11:03:02 | INFO | train_inner | epoch 005: 687 / 3002 loss=2.471, ppl=5.54, wps=5816.6, ups=0.09, wpb=64831, bsz=128, num_updates=12621, lr=9.9907e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=145456
2021-06-20 11:03:13 | INFO | train_inner | epoch 005: 688 / 3002 loss=2.54, ppl=5.82, wps=5770.3, ups=0.09, wpb=64812, bsz=128, num_updates=12622, lr=9.9907e-05, gnorm=2.049, loss_scale=8, train_wall=11, gb_free=2.8, wall=145468
2021-06-20 11:03:25 | INFO | train_inner | epoch 005: 689 / 3002 loss=2.375, ppl=5.19, wps=5733.4, ups=0.09, wpb=64805, bsz=128, num_updates=12623, lr=9.9907e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=145479
2021-06-20 11:03:36 | INFO | train_inner | epoch 005: 690 / 3002 loss=2.531, ppl=5.78, wps=5914.4, ups=0.09, wpb=64823, bsz=128, num_updates=12624, lr=9.9907e-05, gnorm=2.037, loss_scale=8, train_wall=10, gb_free=2.8, wall=145490
2021-06-20 11:03:47 | INFO | train_inner | epoch 005: 691 / 3002 loss=2.328, ppl=5.02, wps=5815.7, ups=0.09, wpb=64772, bsz=128, num_updates=12625, lr=9.9907e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=145501
2021-06-20 11:03:58 | INFO | train_inner | epoch 005: 692 / 3002 loss=2.539, ppl=5.81, wps=5853.9, ups=0.09, wpb=64715, bsz=128, num_updates=12626, lr=9.9907e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=145512
2021-06-20 11:04:09 | INFO | train_inner | epoch 005: 693 / 3002 loss=2.507, ppl=5.68, wps=5927.9, ups=0.09, wpb=64908, bsz=128, num_updates=12627, lr=9.9907e-05, gnorm=2.032, loss_scale=8, train_wall=10, gb_free=2.8, wall=145523
2021-06-20 11:04:20 | INFO | train_inner | epoch 005: 694 / 3002 loss=2.458, ppl=5.5, wps=5802.8, ups=0.09, wpb=64772, bsz=128, num_updates=12628, lr=9.9907e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=145534
2021-06-20 11:04:31 | INFO | train_inner | epoch 005: 695 / 3002 loss=2.676, ppl=6.39, wps=5835, ups=0.09, wpb=64824, bsz=128, num_updates=12629, lr=9.9907e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=145545
2021-06-20 11:04:42 | INFO | train_inner | epoch 005: 696 / 3002 loss=2.576, ppl=5.96, wps=5857.8, ups=0.09, wpb=64851, bsz=128, num_updates=12630, lr=9.9907e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=145556
2021-06-20 11:04:53 | INFO | train_inner | epoch 005: 697 / 3002 loss=2.358, ppl=5.12, wps=5725.7, ups=0.09, wpb=64807, bsz=128, num_updates=12631, lr=9.99069e-05, gnorm=1.873, loss_scale=8, train_wall=11, gb_free=2.8, wall=145568
2021-06-20 11:05:04 | INFO | train_inner | epoch 005: 698 / 3002 loss=2.61, ppl=6.1, wps=5987.9, ups=0.09, wpb=64792, bsz=128, num_updates=12632, lr=9.99069e-05, gnorm=1.974, loss_scale=8, train_wall=10, gb_free=2.8, wall=145578
2021-06-20 11:05:15 | INFO | train_inner | epoch 005: 699 / 3002 loss=2.659, ppl=6.32, wps=5765.1, ups=0.09, wpb=64856, bsz=128, num_updates=12633, lr=9.99069e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=145590
2021-06-20 11:05:26 | INFO | train_inner | epoch 005: 700 / 3002 loss=2.394, ppl=5.26, wps=5841.2, ups=0.09, wpb=64844, bsz=128, num_updates=12634, lr=9.99069e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=145601
2021-06-20 11:05:37 | INFO | train_inner | epoch 005: 701 / 3002 loss=2.505, ppl=5.68, wps=5903.4, ups=0.09, wpb=64855, bsz=128, num_updates=12635, lr=9.99069e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=145612
2021-06-20 11:05:49 | INFO | train_inner | epoch 005: 702 / 3002 loss=2.639, ppl=6.23, wps=5802.3, ups=0.09, wpb=64827, bsz=128, num_updates=12636, lr=9.99069e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=145623
2021-06-20 11:06:00 | INFO | train_inner | epoch 005: 703 / 3002 loss=2.554, ppl=5.87, wps=5904.8, ups=0.09, wpb=64713, bsz=128, num_updates=12637, lr=9.99069e-05, gnorm=2.785, loss_scale=8, train_wall=11, gb_free=2.8, wall=145634
2021-06-20 11:06:11 | INFO | train_inner | epoch 005: 704 / 3002 loss=2.595, ppl=6.04, wps=5931.1, ups=0.09, wpb=64872, bsz=128, num_updates=12638, lr=9.99069e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=145645
2021-06-20 11:06:21 | INFO | train_inner | epoch 005: 705 / 3002 loss=2.427, ppl=5.38, wps=5945.2, ups=0.09, wpb=64834, bsz=128, num_updates=12639, lr=9.99069e-05, gnorm=1.922, loss_scale=8, train_wall=10, gb_free=2.8, wall=145656
2021-06-20 11:06:32 | INFO | train_inner | epoch 005: 706 / 3002 loss=2.608, ppl=6.1, wps=5930.2, ups=0.09, wpb=64851, bsz=128, num_updates=12640, lr=9.99069e-05, gnorm=2.049, loss_scale=8, train_wall=10, gb_free=2.8, wall=145667
2021-06-20 11:06:44 | INFO | train_inner | epoch 005: 707 / 3002 loss=2.478, ppl=5.57, wps=5718.6, ups=0.09, wpb=64892, bsz=128, num_updates=12641, lr=9.99069e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=145678
2021-06-20 11:06:55 | INFO | train_inner | epoch 005: 708 / 3002 loss=2.674, ppl=6.38, wps=5803.4, ups=0.09, wpb=64761, bsz=128, num_updates=12642, lr=9.99069e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=145689
2021-06-20 11:07:06 | INFO | train_inner | epoch 005: 709 / 3002 loss=2.591, ppl=6.02, wps=5818, ups=0.09, wpb=64816, bsz=128, num_updates=12643, lr=9.99068e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=145700
2021-06-20 11:07:17 | INFO | train_inner | epoch 005: 710 / 3002 loss=2.581, ppl=5.98, wps=5802.5, ups=0.09, wpb=64828, bsz=128, num_updates=12644, lr=9.99068e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=145712
2021-06-20 11:07:29 | INFO | train_inner | epoch 005: 711 / 3002 loss=2.395, ppl=5.26, wps=5728.9, ups=0.09, wpb=64808, bsz=128, num_updates=12645, lr=9.99068e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=145723
2021-06-20 11:07:40 | INFO | train_inner | epoch 005: 712 / 3002 loss=2.57, ppl=5.94, wps=5755.2, ups=0.09, wpb=64820, bsz=128, num_updates=12646, lr=9.99068e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=145734
2021-06-20 11:07:51 | INFO | train_inner | epoch 005: 713 / 3002 loss=2.486, ppl=5.6, wps=5849.9, ups=0.09, wpb=64825, bsz=128, num_updates=12647, lr=9.99068e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=145745
2021-06-20 11:08:02 | INFO | train_inner | epoch 005: 714 / 3002 loss=2.526, ppl=5.76, wps=5835.7, ups=0.09, wpb=64975, bsz=128, num_updates=12648, lr=9.99068e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=145756
2021-06-20 11:08:13 | INFO | train_inner | epoch 005: 715 / 3002 loss=2.495, ppl=5.64, wps=5757.2, ups=0.09, wpb=64760, bsz=128, num_updates=12649, lr=9.99068e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=145768
2021-06-20 11:08:24 | INFO | train_inner | epoch 005: 716 / 3002 loss=2.562, ppl=5.91, wps=5763.2, ups=0.09, wpb=64870, bsz=128, num_updates=12650, lr=9.99068e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=145779
2021-06-20 11:08:36 | INFO | train_inner | epoch 005: 717 / 3002 loss=2.445, ppl=5.44, wps=5778.6, ups=0.09, wpb=64824, bsz=128, num_updates=12651, lr=9.99068e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=145790
2021-06-20 11:08:47 | INFO | train_inner | epoch 005: 718 / 3002 loss=2.512, ppl=5.7, wps=5840.7, ups=0.09, wpb=64731, bsz=128, num_updates=12652, lr=9.99068e-05, gnorm=2.124, loss_scale=8, train_wall=11, gb_free=2.8, wall=145801
2021-06-20 11:08:58 | INFO | train_inner | epoch 005: 719 / 3002 loss=2.503, ppl=5.67, wps=5912.7, ups=0.09, wpb=64783, bsz=128, num_updates=12653, lr=9.99068e-05, gnorm=1.951, loss_scale=8, train_wall=10, gb_free=2.8, wall=145812
2021-06-20 11:09:09 | INFO | train_inner | epoch 005: 720 / 3002 loss=2.618, ppl=6.14, wps=5875.6, ups=0.09, wpb=64853, bsz=128, num_updates=12654, lr=9.99068e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=145823
2021-06-20 11:09:20 | INFO | train_inner | epoch 005: 721 / 3002 loss=2.518, ppl=5.73, wps=5850.7, ups=0.09, wpb=64858, bsz=128, num_updates=12655, lr=9.99068e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=145834
2021-06-20 11:09:31 | INFO | train_inner | epoch 005: 722 / 3002 loss=2.5, ppl=5.66, wps=5788.2, ups=0.09, wpb=64767, bsz=128, num_updates=12656, lr=9.99067e-05, gnorm=2.019, loss_scale=8, train_wall=11, gb_free=2.8, wall=145845
2021-06-20 11:09:42 | INFO | train_inner | epoch 005: 723 / 3002 loss=2.52, ppl=5.74, wps=5867.5, ups=0.09, wpb=64868, bsz=128, num_updates=12657, lr=9.99067e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=145856
2021-06-20 11:09:53 | INFO | train_inner | epoch 005: 724 / 3002 loss=2.556, ppl=5.88, wps=5799.6, ups=0.09, wpb=64830, bsz=128, num_updates=12658, lr=9.99067e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=145868
2021-06-20 11:10:04 | INFO | train_inner | epoch 005: 725 / 3002 loss=2.564, ppl=5.91, wps=5821.5, ups=0.09, wpb=64727, bsz=128, num_updates=12659, lr=9.99067e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=145879
2021-06-20 11:10:15 | INFO | train_inner | epoch 005: 726 / 3002 loss=2.622, ppl=6.16, wps=5906.2, ups=0.09, wpb=64808, bsz=128, num_updates=12660, lr=9.99067e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=145890
2021-06-20 11:10:26 | INFO | train_inner | epoch 005: 727 / 3002 loss=2.576, ppl=5.96, wps=5863.9, ups=0.09, wpb=64915, bsz=128, num_updates=12661, lr=9.99067e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=145901
2021-06-20 11:10:38 | INFO | train_inner | epoch 005: 728 / 3002 loss=2.615, ppl=6.13, wps=5796.8, ups=0.09, wpb=64789, bsz=128, num_updates=12662, lr=9.99067e-05, gnorm=2.098, loss_scale=8, train_wall=11, gb_free=2.8, wall=145912
2021-06-20 11:10:48 | INFO | train_inner | epoch 005: 729 / 3002 loss=2.614, ppl=6.12, wps=5995.9, ups=0.09, wpb=64828, bsz=128, num_updates=12663, lr=9.99067e-05, gnorm=2, loss_scale=8, train_wall=10, gb_free=2.8, wall=145923
2021-06-20 11:10:59 | INFO | train_inner | epoch 005: 730 / 3002 loss=2.44, ppl=5.43, wps=5936.4, ups=0.09, wpb=64906, bsz=128, num_updates=12664, lr=9.99067e-05, gnorm=1.906, loss_scale=8, train_wall=10, gb_free=2.8, wall=145934
2021-06-20 11:11:11 | INFO | train_inner | epoch 005: 731 / 3002 loss=2.694, ppl=6.47, wps=5723.9, ups=0.09, wpb=64802, bsz=128, num_updates=12665, lr=9.99067e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=145945
2021-06-20 11:11:22 | INFO | train_inner | epoch 005: 732 / 3002 loss=2.509, ppl=5.69, wps=5693.1, ups=0.09, wpb=64864, bsz=128, num_updates=12666, lr=9.99067e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=145956
2021-06-20 11:11:33 | INFO | train_inner | epoch 005: 733 / 3002 loss=2.595, ppl=6.04, wps=5777.1, ups=0.09, wpb=64822, bsz=128, num_updates=12667, lr=9.99067e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=145968
2021-06-20 11:11:45 | INFO | train_inner | epoch 005: 734 / 3002 loss=2.442, ppl=5.43, wps=5794, ups=0.09, wpb=64803, bsz=128, num_updates=12668, lr=9.99066e-05, gnorm=2.155, loss_scale=8, train_wall=11, gb_free=2.8, wall=145979
2021-06-20 11:11:56 | INFO | train_inner | epoch 005: 735 / 3002 loss=2.673, ppl=6.38, wps=5830.3, ups=0.09, wpb=64814, bsz=128, num_updates=12669, lr=9.99066e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=145990
2021-06-20 11:12:06 | INFO | train_inner | epoch 005: 736 / 3002 loss=2.513, ppl=5.71, wps=6041.5, ups=0.09, wpb=64863, bsz=128, num_updates=12670, lr=9.99066e-05, gnorm=2.014, loss_scale=8, train_wall=10, gb_free=2.8, wall=146001
2021-06-20 11:12:17 | INFO | train_inner | epoch 005: 737 / 3002 loss=2.485, ppl=5.6, wps=5825.6, ups=0.09, wpb=64855, bsz=128, num_updates=12671, lr=9.99066e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=146012
2021-06-20 11:12:29 | INFO | train_inner | epoch 005: 738 / 3002 loss=2.517, ppl=5.72, wps=5755.6, ups=0.09, wpb=64872, bsz=128, num_updates=12672, lr=9.99066e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=146023
2021-06-20 11:12:40 | INFO | train_inner | epoch 005: 739 / 3002 loss=2.469, ppl=5.54, wps=5808.4, ups=0.09, wpb=64877, bsz=128, num_updates=12673, lr=9.99066e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=146034
2021-06-20 11:12:51 | INFO | train_inner | epoch 005: 740 / 3002 loss=2.481, ppl=5.58, wps=5854.1, ups=0.09, wpb=64934, bsz=128, num_updates=12674, lr=9.99066e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=146045
2021-06-20 11:13:02 | INFO | train_inner | epoch 005: 741 / 3002 loss=2.581, ppl=5.99, wps=5857.5, ups=0.09, wpb=64912, bsz=128, num_updates=12675, lr=9.99066e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=146056
2021-06-20 11:13:13 | INFO | train_inner | epoch 005: 742 / 3002 loss=2.659, ppl=6.32, wps=5863.1, ups=0.09, wpb=64734, bsz=128, num_updates=12676, lr=9.99066e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=146067
2021-06-20 11:13:24 | INFO | train_inner | epoch 005: 743 / 3002 loss=2.391, ppl=5.25, wps=5766.3, ups=0.09, wpb=64851, bsz=128, num_updates=12677, lr=9.99066e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=146079
2021-06-20 11:13:36 | INFO | train_inner | epoch 005: 744 / 3002 loss=2.579, ppl=5.98, wps=5791.2, ups=0.09, wpb=64853, bsz=128, num_updates=12678, lr=9.99066e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=146090
2021-06-20 11:13:47 | INFO | train_inner | epoch 005: 745 / 3002 loss=2.538, ppl=5.81, wps=5897, ups=0.09, wpb=64814, bsz=128, num_updates=12679, lr=9.99066e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=146101
2021-06-20 11:13:58 | INFO | train_inner | epoch 005: 746 / 3002 loss=2.514, ppl=5.71, wps=5750.8, ups=0.09, wpb=64744, bsz=128, num_updates=12680, lr=9.99066e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=146112
2021-06-20 11:14:09 | INFO | train_inner | epoch 005: 747 / 3002 loss=2.59, ppl=6.02, wps=5813.3, ups=0.09, wpb=64746, bsz=128, num_updates=12681, lr=9.99065e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=146123
2021-06-20 11:14:20 | INFO | train_inner | epoch 005: 748 / 3002 loss=2.519, ppl=5.73, wps=5807, ups=0.09, wpb=64817, bsz=128, num_updates=12682, lr=9.99065e-05, gnorm=2.789, loss_scale=8, train_wall=11, gb_free=2.8, wall=146134
2021-06-20 11:14:31 | INFO | train_inner | epoch 005: 749 / 3002 loss=2.536, ppl=5.8, wps=5773.3, ups=0.09, wpb=64722, bsz=128, num_updates=12683, lr=9.99065e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=146146
2021-06-20 11:14:42 | INFO | train_inner | epoch 005: 750 / 3002 loss=2.418, ppl=5.35, wps=5900.4, ups=0.09, wpb=64887, bsz=128, num_updates=12684, lr=9.99065e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=146157
2021-06-20 11:14:54 | INFO | train_inner | epoch 005: 751 / 3002 loss=2.418, ppl=5.35, wps=5750.4, ups=0.09, wpb=64771, bsz=128, num_updates=12685, lr=9.99065e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=146168
2021-06-20 11:15:05 | INFO | train_inner | epoch 005: 752 / 3002 loss=2.44, ppl=5.43, wps=5768.1, ups=0.09, wpb=64886, bsz=128, num_updates=12686, lr=9.99065e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=146179
2021-06-20 11:15:16 | INFO | train_inner | epoch 005: 753 / 3002 loss=2.509, ppl=5.69, wps=5773.8, ups=0.09, wpb=64825, bsz=128, num_updates=12687, lr=9.99065e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=146190
2021-06-20 11:15:27 | INFO | train_inner | epoch 005: 754 / 3002 loss=2.539, ppl=5.81, wps=5776.9, ups=0.09, wpb=64808, bsz=128, num_updates=12688, lr=9.99065e-05, gnorm=1.923, loss_scale=8, train_wall=11, gb_free=2.8, wall=146202
2021-06-20 11:15:38 | INFO | train_inner | epoch 005: 755 / 3002 loss=2.314, ppl=4.97, wps=5824.2, ups=0.09, wpb=64902, bsz=128, num_updates=12689, lr=9.99065e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=146213
2021-06-20 11:15:50 | INFO | train_inner | epoch 005: 756 / 3002 loss=2.47, ppl=5.54, wps=5800.1, ups=0.09, wpb=64808, bsz=128, num_updates=12690, lr=9.99065e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=146224
2021-06-20 11:16:01 | INFO | train_inner | epoch 005: 757 / 3002 loss=2.654, ppl=6.29, wps=5800.2, ups=0.09, wpb=64782, bsz=128, num_updates=12691, lr=9.99065e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=146235
2021-06-20 11:16:12 | INFO | train_inner | epoch 005: 758 / 3002 loss=2.554, ppl=5.87, wps=5854.7, ups=0.09, wpb=64742, bsz=128, num_updates=12692, lr=9.99065e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=146246
2021-06-20 11:16:23 | INFO | train_inner | epoch 005: 759 / 3002 loss=2.41, ppl=5.32, wps=5857.2, ups=0.09, wpb=64787, bsz=128, num_updates=12693, lr=9.99064e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=146257
2021-06-20 11:16:34 | INFO | train_inner | epoch 005: 760 / 3002 loss=2.525, ppl=5.75, wps=5859.1, ups=0.09, wpb=64906, bsz=128, num_updates=12694, lr=9.99064e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=146268
2021-06-20 11:16:45 | INFO | train_inner | epoch 005: 761 / 3002 loss=2.522, ppl=5.74, wps=5809.7, ups=0.09, wpb=64793, bsz=128, num_updates=12695, lr=9.99064e-05, gnorm=2.115, loss_scale=8, train_wall=11, gb_free=2.8, wall=146279
2021-06-20 11:16:56 | INFO | train_inner | epoch 005: 762 / 3002 loss=2.561, ppl=5.9, wps=5843.9, ups=0.09, wpb=64856, bsz=128, num_updates=12696, lr=9.99064e-05, gnorm=2.173, loss_scale=8, train_wall=11, gb_free=2.8, wall=146291
2021-06-20 11:17:07 | INFO | train_inner | epoch 005: 763 / 3002 loss=2.613, ppl=6.12, wps=5961.3, ups=0.09, wpb=64852, bsz=128, num_updates=12697, lr=9.99064e-05, gnorm=2.021, loss_scale=8, train_wall=10, gb_free=2.8, wall=146301
2021-06-20 11:17:18 | INFO | train_inner | epoch 005: 764 / 3002 loss=2.567, ppl=5.93, wps=5955.3, ups=0.09, wpb=64912, bsz=128, num_updates=12698, lr=9.99064e-05, gnorm=1.957, loss_scale=8, train_wall=10, gb_free=2.8, wall=146312
2021-06-20 11:17:29 | INFO | train_inner | epoch 005: 765 / 3002 loss=2.568, ppl=5.93, wps=5855.1, ups=0.09, wpb=64728, bsz=128, num_updates=12699, lr=9.99064e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=146323
2021-06-20 11:17:40 | INFO | train_inner | epoch 005: 766 / 3002 loss=2.516, ppl=5.72, wps=5877.1, ups=0.09, wpb=64886, bsz=128, num_updates=12700, lr=9.99064e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=146334
2021-06-20 11:17:51 | INFO | train_inner | epoch 005: 767 / 3002 loss=2.637, ppl=6.22, wps=5913.6, ups=0.09, wpb=64794, bsz=128, num_updates=12701, lr=9.99064e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=146345
2021-06-20 11:18:02 | INFO | train_inner | epoch 005: 768 / 3002 loss=2.504, ppl=5.67, wps=5928.6, ups=0.09, wpb=64919, bsz=128, num_updates=12702, lr=9.99064e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=146356
2021-06-20 11:18:13 | INFO | train_inner | epoch 005: 769 / 3002 loss=2.47, ppl=5.54, wps=5909.5, ups=0.09, wpb=64898, bsz=128, num_updates=12703, lr=9.99064e-05, gnorm=2.052, loss_scale=8, train_wall=11, gb_free=2.8, wall=146367
2021-06-20 11:18:24 | INFO | train_inner | epoch 005: 770 / 3002 loss=2.482, ppl=5.59, wps=5866, ups=0.09, wpb=64730, bsz=128, num_updates=12704, lr=9.99064e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=146378
2021-06-20 11:18:35 | INFO | train_inner | epoch 005: 771 / 3002 loss=2.421, ppl=5.36, wps=5870.9, ups=0.09, wpb=64748, bsz=128, num_updates=12705, lr=9.99064e-05, gnorm=2.073, loss_scale=8, train_wall=11, gb_free=2.8, wall=146389
2021-06-20 11:18:46 | INFO | train_inner | epoch 005: 772 / 3002 loss=2.459, ppl=5.5, wps=5792.9, ups=0.09, wpb=64833, bsz=128, num_updates=12706, lr=9.99063e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=146401
2021-06-20 11:18:57 | INFO | train_inner | epoch 005: 773 / 3002 loss=2.673, ppl=6.38, wps=5893.3, ups=0.09, wpb=64738, bsz=128, num_updates=12707, lr=9.99063e-05, gnorm=2.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=146412
2021-06-20 11:19:08 | INFO | train_inner | epoch 005: 774 / 3002 loss=2.481, ppl=5.58, wps=5800.2, ups=0.09, wpb=64853, bsz=128, num_updates=12708, lr=9.99063e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=146423
2021-06-20 11:19:20 | INFO | train_inner | epoch 005: 775 / 3002 loss=2.539, ppl=5.81, wps=5838.9, ups=0.09, wpb=64784, bsz=128, num_updates=12709, lr=9.99063e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=146434
2021-06-20 11:19:31 | INFO | train_inner | epoch 005: 776 / 3002 loss=2.499, ppl=5.65, wps=5857, ups=0.09, wpb=64816, bsz=128, num_updates=12710, lr=9.99063e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=146445
2021-06-20 11:19:42 | INFO | train_inner | epoch 005: 777 / 3002 loss=2.517, ppl=5.72, wps=5755.5, ups=0.09, wpb=64847, bsz=128, num_updates=12711, lr=9.99063e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=146456
2021-06-20 11:19:53 | INFO | train_inner | epoch 005: 778 / 3002 loss=2.453, ppl=5.48, wps=5963.9, ups=0.09, wpb=64966, bsz=128, num_updates=12712, lr=9.99063e-05, gnorm=1.997, loss_scale=8, train_wall=10, gb_free=2.8, wall=146467
2021-06-20 11:20:04 | INFO | train_inner | epoch 005: 779 / 3002 loss=2.419, ppl=5.35, wps=5957.6, ups=0.09, wpb=64910, bsz=128, num_updates=12713, lr=9.99063e-05, gnorm=2.059, loss_scale=16, train_wall=10, gb_free=2.8, wall=146478
2021-06-20 11:20:15 | INFO | train_inner | epoch 005: 780 / 3002 loss=2.495, ppl=5.64, wps=5849, ups=0.09, wpb=64824, bsz=128, num_updates=12714, lr=9.99063e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=146489
2021-06-20 11:20:26 | INFO | train_inner | epoch 005: 781 / 3002 loss=2.515, ppl=5.71, wps=5853, ups=0.09, wpb=64834, bsz=128, num_updates=12715, lr=9.99063e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=146500
2021-06-20 11:20:37 | INFO | train_inner | epoch 005: 782 / 3002 loss=2.496, ppl=5.64, wps=5827.9, ups=0.09, wpb=64819, bsz=128, num_updates=12716, lr=9.99063e-05, gnorm=2.072, loss_scale=16, train_wall=11, gb_free=2.8, wall=146511
2021-06-20 11:20:48 | INFO | train_inner | epoch 005: 783 / 3002 loss=2.607, ppl=6.09, wps=5809.7, ups=0.09, wpb=64807, bsz=128, num_updates=12717, lr=9.99063e-05, gnorm=2.039, loss_scale=16, train_wall=11, gb_free=2.8, wall=146522
2021-06-20 11:20:59 | INFO | train_inner | epoch 005: 784 / 3002 loss=2.539, ppl=5.81, wps=5708.9, ups=0.09, wpb=64803, bsz=128, num_updates=12718, lr=9.99062e-05, gnorm=1.935, loss_scale=16, train_wall=11, gb_free=2.8, wall=146534
2021-06-20 11:21:11 | INFO | train_inner | epoch 005: 785 / 3002 loss=2.552, ppl=5.87, wps=5747.2, ups=0.09, wpb=64855, bsz=128, num_updates=12719, lr=9.99062e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=146545
2021-06-20 11:21:22 | INFO | train_inner | epoch 005: 786 / 3002 loss=2.521, ppl=5.74, wps=5736.2, ups=0.09, wpb=64839, bsz=128, num_updates=12720, lr=9.99062e-05, gnorm=2.067, loss_scale=16, train_wall=11, gb_free=2.8, wall=146556
2021-06-20 11:21:33 | INFO | train_inner | epoch 005: 787 / 3002 loss=2.759, ppl=6.77, wps=5874.6, ups=0.09, wpb=64763, bsz=128, num_updates=12721, lr=9.99062e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=146567
2021-06-20 11:21:44 | INFO | train_inner | epoch 005: 788 / 3002 loss=2.513, ppl=5.71, wps=5711.6, ups=0.09, wpb=64761, bsz=128, num_updates=12722, lr=9.99062e-05, gnorm=1.992, loss_scale=16, train_wall=11, gb_free=2.8, wall=146579
2021-06-20 11:21:56 | INFO | train_inner | epoch 005: 789 / 3002 loss=2.535, ppl=5.8, wps=5796.7, ups=0.09, wpb=64803, bsz=128, num_updates=12723, lr=9.99062e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=146590
2021-06-20 11:22:07 | INFO | train_inner | epoch 005: 790 / 3002 loss=2.622, ppl=6.16, wps=5893.1, ups=0.09, wpb=64837, bsz=128, num_updates=12724, lr=9.99062e-05, gnorm=2.148, loss_scale=16, train_wall=11, gb_free=2.8, wall=146601
2021-06-20 11:22:18 | INFO | train_inner | epoch 005: 791 / 3002 loss=2.677, ppl=6.39, wps=5726.9, ups=0.09, wpb=64863, bsz=128, num_updates=12725, lr=9.99062e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=146612
2021-06-20 11:22:29 | INFO | train_inner | epoch 005: 792 / 3002 loss=2.567, ppl=5.93, wps=5771.7, ups=0.09, wpb=64836, bsz=128, num_updates=12726, lr=9.99062e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=146623
2021-06-20 11:22:40 | INFO | train_inner | epoch 005: 793 / 3002 loss=2.529, ppl=5.77, wps=5754.7, ups=0.09, wpb=64852, bsz=128, num_updates=12727, lr=9.99062e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=146635
2021-06-20 11:22:52 | INFO | train_inner | epoch 005: 794 / 3002 loss=2.468, ppl=5.53, wps=5801.1, ups=0.09, wpb=64829, bsz=128, num_updates=12728, lr=9.99062e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=146646
2021-06-20 11:23:03 | INFO | train_inner | epoch 005: 795 / 3002 loss=2.597, ppl=6.05, wps=5846.1, ups=0.09, wpb=64894, bsz=128, num_updates=12729, lr=9.99062e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=146657
2021-06-20 11:23:14 | INFO | train_inner | epoch 005: 796 / 3002 loss=2.645, ppl=6.26, wps=5919.2, ups=0.09, wpb=64897, bsz=128, num_updates=12730, lr=9.99062e-05, gnorm=1.984, loss_scale=16, train_wall=10, gb_free=2.8, wall=146668
2021-06-20 11:23:25 | INFO | train_inner | epoch 005: 797 / 3002 loss=2.358, ppl=5.13, wps=5764.6, ups=0.09, wpb=64780, bsz=128, num_updates=12731, lr=9.99061e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=146679
2021-06-20 11:23:36 | INFO | train_inner | epoch 005: 798 / 3002 loss=2.661, ppl=6.32, wps=5733.5, ups=0.09, wpb=64910, bsz=128, num_updates=12732, lr=9.99061e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=146691
2021-06-20 11:23:47 | INFO | train_inner | epoch 005: 799 / 3002 loss=2.515, ppl=5.72, wps=5797.7, ups=0.09, wpb=64863, bsz=128, num_updates=12733, lr=9.99061e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=146702
2021-06-20 11:23:58 | INFO | train_inner | epoch 005: 800 / 3002 loss=2.593, ppl=6.03, wps=5895.1, ups=0.09, wpb=64823, bsz=128, num_updates=12734, lr=9.99061e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=146713
2021-06-20 11:24:10 | INFO | train_inner | epoch 005: 801 / 3002 loss=2.405, ppl=5.3, wps=5834.8, ups=0.09, wpb=64860, bsz=128, num_updates=12735, lr=9.99061e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=146724
2021-06-20 11:24:21 | INFO | train_inner | epoch 005: 802 / 3002 loss=2.457, ppl=5.49, wps=5771.6, ups=0.09, wpb=64865, bsz=128, num_updates=12736, lr=9.99061e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=146735
2021-06-20 11:24:32 | INFO | train_inner | epoch 005: 803 / 3002 loss=2.611, ppl=6.11, wps=5897.1, ups=0.09, wpb=64763, bsz=128, num_updates=12737, lr=9.99061e-05, gnorm=2.231, loss_scale=16, train_wall=11, gb_free=2.8, wall=146746
2021-06-20 11:24:43 | INFO | train_inner | epoch 005: 804 / 3002 loss=2.582, ppl=5.99, wps=5906.3, ups=0.09, wpb=64861, bsz=128, num_updates=12738, lr=9.99061e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=146757
2021-06-20 11:24:54 | INFO | train_inner | epoch 005: 805 / 3002 loss=2.511, ppl=5.7, wps=5829.1, ups=0.09, wpb=64823, bsz=128, num_updates=12739, lr=9.99061e-05, gnorm=2.128, loss_scale=16, train_wall=11, gb_free=2.8, wall=146768
2021-06-20 11:25:05 | INFO | train_inner | epoch 005: 806 / 3002 loss=2.549, ppl=5.85, wps=5813.3, ups=0.09, wpb=64853, bsz=128, num_updates=12740, lr=9.99061e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=146779
2021-06-20 11:25:16 | INFO | train_inner | epoch 005: 807 / 3002 loss=2.565, ppl=5.92, wps=5797.3, ups=0.09, wpb=64834, bsz=128, num_updates=12741, lr=9.99061e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=146791
2021-06-20 11:25:27 | INFO | train_inner | epoch 005: 808 / 3002 loss=2.701, ppl=6.5, wps=5847.1, ups=0.09, wpb=64809, bsz=128, num_updates=12742, lr=9.99061e-05, gnorm=2.07, loss_scale=16, train_wall=11, gb_free=2.8, wall=146802
2021-06-20 11:25:38 | INFO | train_inner | epoch 005: 809 / 3002 loss=2.496, ppl=5.64, wps=5874.6, ups=0.09, wpb=64836, bsz=128, num_updates=12743, lr=9.9906e-05, gnorm=2.065, loss_scale=16, train_wall=11, gb_free=2.8, wall=146813
2021-06-20 11:25:49 | INFO | train_inner | epoch 005: 810 / 3002 loss=2.521, ppl=5.74, wps=5798.7, ups=0.09, wpb=64775, bsz=128, num_updates=12744, lr=9.9906e-05, gnorm=2.093, loss_scale=16, train_wall=11, gb_free=2.8, wall=146824
2021-06-20 11:26:00 | INFO | train_inner | epoch 005: 811 / 3002 loss=2.392, ppl=5.25, wps=5888.4, ups=0.09, wpb=64823, bsz=128, num_updates=12745, lr=9.9906e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=146835
2021-06-20 11:26:12 | INFO | train_inner | epoch 005: 812 / 3002 loss=2.499, ppl=5.65, wps=5806, ups=0.09, wpb=64878, bsz=128, num_updates=12746, lr=9.9906e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=146846
2021-06-20 11:26:23 | INFO | train_inner | epoch 005: 813 / 3002 loss=2.383, ppl=5.22, wps=5865.2, ups=0.09, wpb=64772, bsz=128, num_updates=12747, lr=9.9906e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=146857
2021-06-20 11:26:34 | INFO | train_inner | epoch 005: 814 / 3002 loss=2.387, ppl=5.23, wps=5787.5, ups=0.09, wpb=64892, bsz=128, num_updates=12748, lr=9.9906e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=146868
2021-06-20 11:26:45 | INFO | train_inner | epoch 005: 815 / 3002 loss=2.544, ppl=5.83, wps=5815.6, ups=0.09, wpb=64910, bsz=128, num_updates=12749, lr=9.9906e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=146879
2021-06-20 11:26:56 | INFO | train_inner | epoch 005: 816 / 3002 loss=2.483, ppl=5.59, wps=5870.3, ups=0.09, wpb=64767, bsz=128, num_updates=12750, lr=9.9906e-05, gnorm=2.024, loss_scale=16, train_wall=11, gb_free=2.8, wall=146890
2021-06-20 11:27:07 | INFO | train_inner | epoch 005: 817 / 3002 loss=2.585, ppl=6, wps=5873.3, ups=0.09, wpb=64835, bsz=128, num_updates=12751, lr=9.9906e-05, gnorm=2.077, loss_scale=16, train_wall=11, gb_free=2.8, wall=146901
2021-06-20 11:27:18 | INFO | train_inner | epoch 005: 818 / 3002 loss=2.502, ppl=5.66, wps=5873.2, ups=0.09, wpb=64800, bsz=128, num_updates=12752, lr=9.9906e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=146913
2021-06-20 11:27:29 | INFO | train_inner | epoch 005: 819 / 3002 loss=2.52, ppl=5.74, wps=5766.3, ups=0.09, wpb=64863, bsz=128, num_updates=12753, lr=9.9906e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=146924
2021-06-20 11:27:40 | INFO | train_inner | epoch 005: 820 / 3002 loss=2.494, ppl=5.63, wps=5861.7, ups=0.09, wpb=64755, bsz=128, num_updates=12754, lr=9.9906e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=146935
2021-06-20 11:27:52 | INFO | train_inner | epoch 005: 821 / 3002 loss=2.499, ppl=5.65, wps=5880.1, ups=0.09, wpb=64829, bsz=128, num_updates=12755, lr=9.9906e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=146946
2021-06-20 11:28:03 | INFO | train_inner | epoch 005: 822 / 3002 loss=2.597, ppl=6.05, wps=5886.3, ups=0.09, wpb=64820, bsz=128, num_updates=12756, lr=9.99059e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=146957
2021-06-20 11:28:14 | INFO | train_inner | epoch 005: 823 / 3002 loss=2.478, ppl=5.57, wps=5818.6, ups=0.09, wpb=64845, bsz=128, num_updates=12757, lr=9.99059e-05, gnorm=2.148, loss_scale=16, train_wall=11, gb_free=2.8, wall=146968
2021-06-20 11:28:25 | INFO | train_inner | epoch 005: 824 / 3002 loss=2.48, ppl=5.58, wps=5886.1, ups=0.09, wpb=64887, bsz=128, num_updates=12758, lr=9.99059e-05, gnorm=2.017, loss_scale=16, train_wall=11, gb_free=2.8, wall=146979
2021-06-20 11:28:36 | INFO | train_inner | epoch 005: 825 / 3002 loss=2.459, ppl=5.5, wps=5940.4, ups=0.09, wpb=64910, bsz=128, num_updates=12759, lr=9.99059e-05, gnorm=2.083, loss_scale=16, train_wall=10, gb_free=2.8, wall=146990
2021-06-20 11:28:47 | INFO | train_inner | epoch 005: 826 / 3002 loss=2.557, ppl=5.88, wps=5914.6, ups=0.09, wpb=64913, bsz=128, num_updates=12760, lr=9.99059e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=147001
2021-06-20 11:28:58 | INFO | train_inner | epoch 005: 827 / 3002 loss=2.511, ppl=5.7, wps=5886.8, ups=0.09, wpb=64832, bsz=128, num_updates=12761, lr=9.99059e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=147012
2021-06-20 11:29:09 | INFO | train_inner | epoch 005: 828 / 3002 loss=2.463, ppl=5.51, wps=5879, ups=0.09, wpb=64882, bsz=128, num_updates=12762, lr=9.99059e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=147023
2021-06-20 11:29:20 | INFO | train_inner | epoch 005: 829 / 3002 loss=2.552, ppl=5.86, wps=5936.1, ups=0.09, wpb=64831, bsz=128, num_updates=12763, lr=9.99059e-05, gnorm=2.028, loss_scale=16, train_wall=10, gb_free=2.8, wall=147034
2021-06-20 11:29:31 | INFO | train_inner | epoch 005: 830 / 3002 loss=2.406, ppl=5.3, wps=5741, ups=0.09, wpb=64899, bsz=128, num_updates=12764, lr=9.99059e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=147045
2021-06-20 11:29:42 | INFO | train_inner | epoch 005: 831 / 3002 loss=2.502, ppl=5.67, wps=5881.8, ups=0.09, wpb=64817, bsz=128, num_updates=12765, lr=9.99059e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=147056
2021-06-20 11:29:53 | INFO | train_inner | epoch 005: 832 / 3002 loss=2.522, ppl=5.74, wps=5874.4, ups=0.09, wpb=64773, bsz=128, num_updates=12766, lr=9.99059e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=147067
2021-06-20 11:30:04 | INFO | train_inner | epoch 005: 833 / 3002 loss=2.481, ppl=5.58, wps=5887.4, ups=0.09, wpb=64857, bsz=128, num_updates=12767, lr=9.99059e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=147078
2021-06-20 11:30:15 | INFO | train_inner | epoch 005: 834 / 3002 loss=2.466, ppl=5.53, wps=5910.5, ups=0.09, wpb=64805, bsz=128, num_updates=12768, lr=9.99058e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=147089
2021-06-20 11:30:26 | INFO | train_inner | epoch 005: 835 / 3002 loss=2.568, ppl=5.93, wps=5900.3, ups=0.09, wpb=64865, bsz=128, num_updates=12769, lr=9.99058e-05, gnorm=2.033, loss_scale=16, train_wall=11, gb_free=2.8, wall=147100
2021-06-20 11:30:37 | INFO | train_inner | epoch 005: 836 / 3002 loss=2.498, ppl=5.65, wps=5810.3, ups=0.09, wpb=64775, bsz=128, num_updates=12770, lr=9.99058e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=147111
2021-06-20 11:30:48 | INFO | train_inner | epoch 005: 837 / 3002 loss=2.422, ppl=5.36, wps=5932.9, ups=0.09, wpb=64816, bsz=128, num_updates=12771, lr=9.99058e-05, gnorm=2.04, loss_scale=16, train_wall=10, gb_free=2.8, wall=147122
2021-06-20 11:30:59 | INFO | train_inner | epoch 005: 838 / 3002 loss=2.604, ppl=6.08, wps=5850.3, ups=0.09, wpb=64918, bsz=128, num_updates=12772, lr=9.99058e-05, gnorm=1.992, loss_scale=16, train_wall=11, gb_free=2.8, wall=147133
2021-06-20 11:31:10 | INFO | train_inner | epoch 005: 839 / 3002 loss=2.505, ppl=5.68, wps=5808.2, ups=0.09, wpb=64807, bsz=128, num_updates=12773, lr=9.99058e-05, gnorm=2.218, loss_scale=16, train_wall=11, gb_free=2.8, wall=147145
2021-06-20 11:31:21 | INFO | train_inner | epoch 005: 840 / 3002 loss=2.611, ppl=6.11, wps=5921.5, ups=0.09, wpb=64764, bsz=128, num_updates=12774, lr=9.99058e-05, gnorm=2.059, loss_scale=16, train_wall=10, gb_free=2.8, wall=147155
2021-06-20 11:31:32 | INFO | train_inner | epoch 005: 841 / 3002 loss=2.535, ppl=5.79, wps=5797.2, ups=0.09, wpb=64723, bsz=128, num_updates=12775, lr=9.99058e-05, gnorm=2.053, loss_scale=16, train_wall=11, gb_free=2.8, wall=147167
2021-06-20 11:31:43 | INFO | train_inner | epoch 005: 842 / 3002 loss=2.571, ppl=5.94, wps=5871.6, ups=0.09, wpb=64813, bsz=128, num_updates=12776, lr=9.99058e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=147178
2021-06-20 11:31:54 | INFO | train_inner | epoch 005: 843 / 3002 loss=2.535, ppl=5.79, wps=5884.6, ups=0.09, wpb=64923, bsz=128, num_updates=12777, lr=9.99058e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=147189
2021-06-20 11:32:06 | INFO | train_inner | epoch 005: 844 / 3002 loss=2.551, ppl=5.86, wps=5744, ups=0.09, wpb=64841, bsz=128, num_updates=12778, lr=9.99058e-05, gnorm=1.898, loss_scale=16, train_wall=11, gb_free=2.8, wall=147200
2021-06-20 11:32:17 | INFO | train_inner | epoch 005: 845 / 3002 loss=2.359, ppl=5.13, wps=5975, ups=0.09, wpb=64841, bsz=128, num_updates=12779, lr=9.99058e-05, gnorm=1.885, loss_scale=16, train_wall=10, gb_free=2.8, wall=147211
2021-06-20 11:32:28 | INFO | train_inner | epoch 005: 846 / 3002 loss=2.583, ppl=5.99, wps=5839.6, ups=0.09, wpb=64874, bsz=128, num_updates=12780, lr=9.99058e-05, gnorm=2.337, loss_scale=16, train_wall=11, gb_free=2.8, wall=147222
2021-06-20 11:32:39 | INFO | train_inner | epoch 005: 847 / 3002 loss=2.452, ppl=5.47, wps=5894.8, ups=0.09, wpb=64903, bsz=128, num_updates=12781, lr=9.99057e-05, gnorm=2.136, loss_scale=16, train_wall=11, gb_free=2.8, wall=147233
2021-06-20 11:32:50 | INFO | train_inner | epoch 005: 848 / 3002 loss=2.566, ppl=5.92, wps=5935.6, ups=0.09, wpb=64792, bsz=128, num_updates=12782, lr=9.99057e-05, gnorm=1.955, loss_scale=16, train_wall=10, gb_free=2.8, wall=147244
2021-06-20 11:33:01 | INFO | train_inner | epoch 005: 849 / 3002 loss=2.528, ppl=5.77, wps=5817.4, ups=0.09, wpb=64855, bsz=128, num_updates=12783, lr=9.99057e-05, gnorm=2.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=147255
2021-06-20 11:33:12 | INFO | train_inner | epoch 005: 850 / 3002 loss=2.559, ppl=5.89, wps=5805.9, ups=0.09, wpb=64877, bsz=128, num_updates=12784, lr=9.99057e-05, gnorm=2.058, loss_scale=16, train_wall=11, gb_free=2.8, wall=147266
2021-06-20 11:33:23 | INFO | train_inner | epoch 005: 851 / 3002 loss=2.517, ppl=5.73, wps=5819.2, ups=0.09, wpb=64745, bsz=128, num_updates=12785, lr=9.99057e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=147277
2021-06-20 11:33:34 | INFO | train_inner | epoch 005: 852 / 3002 loss=2.52, ppl=5.73, wps=5818, ups=0.09, wpb=64762, bsz=128, num_updates=12786, lr=9.99057e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=147288
2021-06-20 11:33:45 | INFO | train_inner | epoch 005: 853 / 3002 loss=2.484, ppl=5.59, wps=5814.5, ups=0.09, wpb=64831, bsz=128, num_updates=12787, lr=9.99057e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=147300
2021-06-20 11:33:56 | INFO | train_inner | epoch 005: 854 / 3002 loss=2.435, ppl=5.41, wps=5805.1, ups=0.09, wpb=64836, bsz=128, num_updates=12788, lr=9.99057e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=147311
2021-06-20 11:34:07 | INFO | train_inner | epoch 005: 855 / 3002 loss=2.454, ppl=5.48, wps=5902.6, ups=0.09, wpb=64873, bsz=128, num_updates=12789, lr=9.99057e-05, gnorm=1.967, loss_scale=16, train_wall=11, gb_free=2.8, wall=147322
2021-06-20 11:34:19 | INFO | train_inner | epoch 005: 856 / 3002 loss=2.472, ppl=5.55, wps=5821.6, ups=0.09, wpb=64785, bsz=128, num_updates=12790, lr=9.99057e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=147333
2021-06-20 11:34:30 | INFO | train_inner | epoch 005: 857 / 3002 loss=2.632, ppl=6.2, wps=5799.8, ups=0.09, wpb=64788, bsz=128, num_updates=12791, lr=9.99057e-05, gnorm=3.171, loss_scale=16, train_wall=11, gb_free=2.8, wall=147344
2021-06-20 11:34:41 | INFO | train_inner | epoch 005: 858 / 3002 loss=2.335, ppl=5.05, wps=5748.7, ups=0.09, wpb=64761, bsz=128, num_updates=12792, lr=9.99057e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=147355
2021-06-20 11:34:52 | INFO | train_inner | epoch 005: 859 / 3002 loss=2.399, ppl=5.28, wps=5707.2, ups=0.09, wpb=64769, bsz=128, num_updates=12793, lr=9.99056e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=147367
2021-06-20 11:35:04 | INFO | train_inner | epoch 005: 860 / 3002 loss=2.29, ppl=4.89, wps=5815.5, ups=0.09, wpb=64761, bsz=128, num_updates=12794, lr=9.99056e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=147378
2021-06-20 11:35:14 | INFO | train_inner | epoch 005: 861 / 3002 loss=2.659, ppl=6.32, wps=5982.8, ups=0.09, wpb=64806, bsz=128, num_updates=12795, lr=9.99056e-05, gnorm=2.054, loss_scale=16, train_wall=10, gb_free=2.8, wall=147389
2021-06-20 11:35:25 | INFO | train_inner | epoch 005: 862 / 3002 loss=2.547, ppl=5.84, wps=5875.3, ups=0.09, wpb=64828, bsz=128, num_updates=12796, lr=9.99056e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=147400
2021-06-20 11:35:36 | INFO | train_inner | epoch 005: 863 / 3002 loss=2.497, ppl=5.65, wps=5822.6, ups=0.09, wpb=64783, bsz=128, num_updates=12797, lr=9.99056e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=147411
2021-06-20 11:35:48 | INFO | train_inner | epoch 005: 864 / 3002 loss=2.622, ppl=6.16, wps=5848.2, ups=0.09, wpb=64755, bsz=128, num_updates=12798, lr=9.99056e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=147422
2021-06-20 11:35:59 | INFO | train_inner | epoch 005: 865 / 3002 loss=2.495, ppl=5.64, wps=5890.4, ups=0.09, wpb=64865, bsz=128, num_updates=12799, lr=9.99056e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=147433
2021-06-20 11:36:09 | INFO | train_inner | epoch 005: 866 / 3002 loss=2.517, ppl=5.72, wps=5968.4, ups=0.09, wpb=64759, bsz=128, num_updates=12800, lr=9.99056e-05, gnorm=2.001, loss_scale=16, train_wall=10, gb_free=2.8, wall=147444
2021-06-20 11:36:20 | INFO | train_inner | epoch 005: 867 / 3002 loss=2.276, ppl=4.84, wps=5919, ups=0.09, wpb=64915, bsz=128, num_updates=12801, lr=9.99056e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=147455
2021-06-20 11:36:32 | INFO | train_inner | epoch 005: 868 / 3002 loss=2.494, ppl=5.63, wps=5830.2, ups=0.09, wpb=64870, bsz=128, num_updates=12802, lr=9.99056e-05, gnorm=1.988, loss_scale=16, train_wall=11, gb_free=2.8, wall=147466
2021-06-20 11:36:43 | INFO | train_inner | epoch 005: 869 / 3002 loss=2.56, ppl=5.9, wps=5751.6, ups=0.09, wpb=64788, bsz=128, num_updates=12803, lr=9.99056e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=147477
2021-06-20 11:36:54 | INFO | train_inner | epoch 005: 870 / 3002 loss=2.443, ppl=5.44, wps=5805.6, ups=0.09, wpb=64920, bsz=128, num_updates=12804, lr=9.99056e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=147488
2021-06-20 11:37:05 | INFO | train_inner | epoch 005: 871 / 3002 loss=2.669, ppl=6.36, wps=5844.4, ups=0.09, wpb=64797, bsz=128, num_updates=12805, lr=9.99056e-05, gnorm=2.069, loss_scale=16, train_wall=11, gb_free=2.8, wall=147499
2021-06-20 11:37:16 | INFO | train_inner | epoch 005: 872 / 3002 loss=2.682, ppl=6.42, wps=5827.7, ups=0.09, wpb=64791, bsz=128, num_updates=12806, lr=9.99055e-05, gnorm=2.044, loss_scale=16, train_wall=11, gb_free=2.8, wall=147511
2021-06-20 11:37:27 | INFO | train_inner | epoch 005: 873 / 3002 loss=2.574, ppl=5.96, wps=5861.8, ups=0.09, wpb=64941, bsz=128, num_updates=12807, lr=9.99055e-05, gnorm=2.228, loss_scale=16, train_wall=11, gb_free=2.8, wall=147522
2021-06-20 11:37:38 | INFO | train_inner | epoch 005: 874 / 3002 loss=2.809, ppl=7.01, wps=5794.4, ups=0.09, wpb=64835, bsz=128, num_updates=12808, lr=9.99055e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=147533
2021-06-20 11:37:49 | INFO | train_inner | epoch 005: 875 / 3002 loss=2.515, ppl=5.71, wps=6012.9, ups=0.09, wpb=64908, bsz=128, num_updates=12809, lr=9.99055e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=147544
2021-06-20 11:38:00 | INFO | train_inner | epoch 005: 876 / 3002 loss=2.47, ppl=5.54, wps=5877.8, ups=0.09, wpb=64828, bsz=128, num_updates=12810, lr=9.99055e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=147555
2021-06-20 11:38:12 | INFO | train_inner | epoch 005: 877 / 3002 loss=2.455, ppl=5.48, wps=5721.2, ups=0.09, wpb=64833, bsz=128, num_updates=12811, lr=9.99055e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=147566
2021-06-20 11:38:23 | INFO | train_inner | epoch 005: 878 / 3002 loss=2.522, ppl=5.74, wps=5876.1, ups=0.09, wpb=64815, bsz=128, num_updates=12812, lr=9.99055e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=147577
2021-06-20 11:38:34 | INFO | train_inner | epoch 005: 879 / 3002 loss=2.46, ppl=5.5, wps=5895.8, ups=0.09, wpb=64875, bsz=128, num_updates=12813, lr=9.99055e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=147588
2021-06-20 11:38:45 | INFO | train_inner | epoch 005: 880 / 3002 loss=2.602, ppl=6.07, wps=5753.6, ups=0.09, wpb=64876, bsz=128, num_updates=12814, lr=9.99055e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=147599
2021-06-20 11:38:56 | INFO | train_inner | epoch 005: 881 / 3002 loss=2.418, ppl=5.34, wps=5924.6, ups=0.09, wpb=64861, bsz=128, num_updates=12815, lr=9.99055e-05, gnorm=1.964, loss_scale=16, train_wall=10, gb_free=2.8, wall=147610
2021-06-20 11:39:07 | INFO | train_inner | epoch 005: 882 / 3002 loss=2.442, ppl=5.44, wps=5805.9, ups=0.09, wpb=64786, bsz=128, num_updates=12816, lr=9.99055e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=147621
2021-06-20 11:39:18 | INFO | train_inner | epoch 005: 883 / 3002 loss=2.627, ppl=6.18, wps=5826.9, ups=0.09, wpb=64734, bsz=128, num_updates=12817, lr=9.99055e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=147632
2021-06-20 11:39:29 | INFO | train_inner | epoch 005: 884 / 3002 loss=2.496, ppl=5.64, wps=5821.5, ups=0.09, wpb=64921, bsz=128, num_updates=12818, lr=9.99054e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=147644
2021-06-20 11:39:40 | INFO | train_inner | epoch 005: 885 / 3002 loss=2.587, ppl=6.01, wps=5811.5, ups=0.09, wpb=64779, bsz=128, num_updates=12819, lr=9.99054e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=147655
2021-06-20 11:39:52 | INFO | train_inner | epoch 005: 886 / 3002 loss=2.308, ppl=4.95, wps=5808.1, ups=0.09, wpb=64841, bsz=128, num_updates=12820, lr=9.99054e-05, gnorm=1.873, loss_scale=16, train_wall=11, gb_free=2.8, wall=147666
2021-06-20 11:40:03 | INFO | train_inner | epoch 005: 887 / 3002 loss=2.447, ppl=5.45, wps=5855.1, ups=0.09, wpb=64809, bsz=128, num_updates=12821, lr=9.99054e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=147677
2021-06-20 11:40:14 | INFO | train_inner | epoch 005: 888 / 3002 loss=2.681, ppl=6.41, wps=5736.8, ups=0.09, wpb=64780, bsz=128, num_updates=12822, lr=9.99054e-05, gnorm=2.141, loss_scale=16, train_wall=11, gb_free=2.8, wall=147688
2021-06-20 11:40:25 | INFO | train_inner | epoch 005: 889 / 3002 loss=2.496, ppl=5.64, wps=5876.9, ups=0.09, wpb=64821, bsz=128, num_updates=12823, lr=9.99054e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=147699
2021-06-20 11:40:36 | INFO | train_inner | epoch 005: 890 / 3002 loss=2.481, ppl=5.58, wps=5809.1, ups=0.09, wpb=64774, bsz=128, num_updates=12824, lr=9.99054e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=147710
2021-06-20 11:40:47 | INFO | train_inner | epoch 005: 891 / 3002 loss=2.538, ppl=5.81, wps=5927.9, ups=0.09, wpb=64804, bsz=128, num_updates=12825, lr=9.99054e-05, gnorm=2.005, loss_scale=16, train_wall=10, gb_free=2.8, wall=147721
2021-06-20 11:40:58 | INFO | train_inner | epoch 005: 892 / 3002 loss=2.603, ppl=6.08, wps=5960.9, ups=0.09, wpb=64907, bsz=128, num_updates=12826, lr=9.99054e-05, gnorm=1.996, loss_scale=16, train_wall=10, gb_free=2.8, wall=147732
2021-06-20 11:41:09 | INFO | train_inner | epoch 005: 893 / 3002 loss=2.585, ppl=6, wps=5874.6, ups=0.09, wpb=64750, bsz=128, num_updates=12827, lr=9.99054e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=147743
2021-06-20 11:41:20 | INFO | train_inner | epoch 005: 894 / 3002 loss=2.591, ppl=6.03, wps=5941.1, ups=0.09, wpb=64853, bsz=128, num_updates=12828, lr=9.99054e-05, gnorm=2.022, loss_scale=16, train_wall=10, gb_free=2.8, wall=147754
2021-06-20 11:41:31 | INFO | train_inner | epoch 005: 895 / 3002 loss=2.457, ppl=5.49, wps=5955.7, ups=0.09, wpb=64890, bsz=128, num_updates=12829, lr=9.99054e-05, gnorm=2.05, loss_scale=16, train_wall=10, gb_free=2.8, wall=147765
2021-06-20 11:41:42 | INFO | train_inner | epoch 005: 896 / 3002 loss=2.579, ppl=5.97, wps=5928, ups=0.09, wpb=64929, bsz=128, num_updates=12830, lr=9.99054e-05, gnorm=2.08, loss_scale=16, train_wall=11, gb_free=2.8, wall=147776
2021-06-20 11:41:53 | INFO | train_inner | epoch 005: 897 / 3002 loss=2.432, ppl=5.4, wps=5786.8, ups=0.09, wpb=64786, bsz=128, num_updates=12831, lr=9.99053e-05, gnorm=2.057, loss_scale=16, train_wall=11, gb_free=2.8, wall=147787
2021-06-20 11:42:04 | INFO | train_inner | epoch 005: 898 / 3002 loss=2.427, ppl=5.38, wps=5820.7, ups=0.09, wpb=64912, bsz=128, num_updates=12832, lr=9.99053e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=147798
2021-06-20 11:42:15 | INFO | train_inner | epoch 005: 899 / 3002 loss=2.528, ppl=5.77, wps=5817.2, ups=0.09, wpb=64835, bsz=128, num_updates=12833, lr=9.99053e-05, gnorm=2.042, loss_scale=16, train_wall=11, gb_free=2.8, wall=147810
2021-06-20 11:42:26 | INFO | train_inner | epoch 005: 900 / 3002 loss=2.392, ppl=5.25, wps=5810.3, ups=0.09, wpb=64858, bsz=128, num_updates=12834, lr=9.99053e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=147821
2021-06-20 11:42:37 | INFO | train_inner | epoch 005: 901 / 3002 loss=2.566, ppl=5.92, wps=5926.9, ups=0.09, wpb=64828, bsz=128, num_updates=12835, lr=9.99053e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=147832
2021-06-20 11:42:48 | INFO | train_inner | epoch 005: 902 / 3002 loss=2.469, ppl=5.54, wps=5918.7, ups=0.09, wpb=64814, bsz=128, num_updates=12836, lr=9.99053e-05, gnorm=1.873, loss_scale=16, train_wall=10, gb_free=2.8, wall=147843
2021-06-20 11:42:59 | INFO | train_inner | epoch 005: 903 / 3002 loss=2.576, ppl=5.96, wps=5965.4, ups=0.09, wpb=64874, bsz=128, num_updates=12837, lr=9.99053e-05, gnorm=1.934, loss_scale=16, train_wall=10, gb_free=2.8, wall=147854
2021-06-20 11:43:10 | INFO | train_inner | epoch 005: 904 / 3002 loss=2.503, ppl=5.67, wps=5899.3, ups=0.09, wpb=64792, bsz=128, num_updates=12838, lr=9.99053e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=147864
2021-06-20 11:43:21 | INFO | train_inner | epoch 005: 905 / 3002 loss=2.582, ppl=5.99, wps=5831.2, ups=0.09, wpb=64836, bsz=128, num_updates=12839, lr=9.99053e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=147876
2021-06-20 11:43:32 | INFO | train_inner | epoch 005: 906 / 3002 loss=2.455, ppl=5.48, wps=5814, ups=0.09, wpb=64818, bsz=128, num_updates=12840, lr=9.99053e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=147887
2021-06-20 11:43:43 | INFO | train_inner | epoch 005: 907 / 3002 loss=2.463, ppl=5.51, wps=5885.1, ups=0.09, wpb=64849, bsz=128, num_updates=12841, lr=9.99053e-05, gnorm=1.987, loss_scale=32, train_wall=11, gb_free=2.8, wall=147898
2021-06-20 11:43:55 | INFO | train_inner | epoch 005: 908 / 3002 loss=2.499, ppl=5.65, wps=5808.1, ups=0.09, wpb=64833, bsz=128, num_updates=12842, lr=9.99053e-05, gnorm=1.958, loss_scale=32, train_wall=11, gb_free=2.8, wall=147909
2021-06-20 11:44:06 | INFO | train_inner | epoch 005: 909 / 3002 loss=2.52, ppl=5.74, wps=5776.9, ups=0.09, wpb=64821, bsz=128, num_updates=12843, lr=9.99052e-05, gnorm=1.97, loss_scale=32, train_wall=11, gb_free=2.8, wall=147920
2021-06-20 11:44:17 | INFO | train_inner | epoch 005: 910 / 3002 loss=2.433, ppl=5.4, wps=5846.4, ups=0.09, wpb=64833, bsz=128, num_updates=12844, lr=9.99052e-05, gnorm=1.874, loss_scale=32, train_wall=11, gb_free=2.8, wall=147931
2021-06-20 11:44:28 | INFO | train_inner | epoch 005: 911 / 3002 loss=2.461, ppl=5.5, wps=5770.2, ups=0.09, wpb=64838, bsz=128, num_updates=12845, lr=9.99052e-05, gnorm=1.93, loss_scale=32, train_wall=11, gb_free=2.8, wall=147942
2021-06-20 11:44:39 | INFO | train_inner | epoch 005: 912 / 3002 loss=2.528, ppl=5.77, wps=5721.9, ups=0.09, wpb=64814, bsz=128, num_updates=12846, lr=9.99052e-05, gnorm=1.91, loss_scale=32, train_wall=11, gb_free=2.8, wall=147954
2021-06-20 11:44:51 | INFO | train_inner | epoch 005: 913 / 3002 loss=2.463, ppl=5.51, wps=5785.9, ups=0.09, wpb=64850, bsz=128, num_updates=12847, lr=9.99052e-05, gnorm=2.001, loss_scale=32, train_wall=11, gb_free=2.8, wall=147965
2021-06-20 11:45:02 | INFO | train_inner | epoch 005: 914 / 3002 loss=2.515, ppl=5.72, wps=5712.1, ups=0.09, wpb=64786, bsz=128, num_updates=12848, lr=9.99052e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=147976
2021-06-20 11:45:13 | INFO | train_inner | epoch 005: 915 / 3002 loss=2.551, ppl=5.86, wps=5898.8, ups=0.09, wpb=64856, bsz=128, num_updates=12849, lr=9.99052e-05, gnorm=2.013, loss_scale=32, train_wall=10, gb_free=2.8, wall=147987
2021-06-20 11:45:24 | INFO | train_inner | epoch 005: 916 / 3002 loss=2.564, ppl=5.91, wps=5913.9, ups=0.09, wpb=64868, bsz=128, num_updates=12850, lr=9.99052e-05, gnorm=1.936, loss_scale=32, train_wall=10, gb_free=2.8, wall=147998
2021-06-20 11:45:35 | INFO | train_inner | epoch 005: 917 / 3002 loss=2.6, ppl=6.06, wps=5821.9, ups=0.09, wpb=64824, bsz=128, num_updates=12851, lr=9.99052e-05, gnorm=1.996, loss_scale=32, train_wall=11, gb_free=2.8, wall=148009
2021-06-20 11:45:46 | INFO | train_inner | epoch 005: 918 / 3002 loss=2.477, ppl=5.57, wps=5910.9, ups=0.09, wpb=64796, bsz=128, num_updates=12852, lr=9.99052e-05, gnorm=1.894, loss_scale=32, train_wall=11, gb_free=2.8, wall=148020
2021-06-20 11:45:57 | INFO | train_inner | epoch 005: 919 / 3002 loss=2.397, ppl=5.27, wps=5811, ups=0.09, wpb=64878, bsz=128, num_updates=12853, lr=9.99052e-05, gnorm=1.966, loss_scale=32, train_wall=11, gb_free=2.8, wall=148032
2021-06-20 11:46:08 | INFO | train_inner | epoch 005: 920 / 3002 loss=2.388, ppl=5.23, wps=5828.7, ups=0.09, wpb=64914, bsz=128, num_updates=12854, lr=9.99052e-05, gnorm=1.961, loss_scale=32, train_wall=11, gb_free=2.8, wall=148043
2021-06-20 11:46:19 | INFO | train_inner | epoch 005: 921 / 3002 loss=2.493, ppl=5.63, wps=5879.2, ups=0.09, wpb=64778, bsz=128, num_updates=12855, lr=9.99052e-05, gnorm=1.979, loss_scale=32, train_wall=11, gb_free=2.8, wall=148054
2021-06-20 11:46:30 | INFO | train_inner | epoch 005: 922 / 3002 loss=2.714, ppl=6.56, wps=5927.5, ups=0.09, wpb=64874, bsz=128, num_updates=12856, lr=9.99051e-05, gnorm=2.063, loss_scale=32, train_wall=10, gb_free=2.8, wall=148065
2021-06-20 11:46:42 | INFO | train_inner | epoch 005: 923 / 3002 loss=2.439, ppl=5.42, wps=5804.4, ups=0.09, wpb=64799, bsz=128, num_updates=12857, lr=9.99051e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=148076
2021-06-20 11:46:53 | INFO | train_inner | epoch 005: 924 / 3002 loss=2.601, ppl=6.07, wps=5875.5, ups=0.09, wpb=64815, bsz=128, num_updates=12858, lr=9.99051e-05, gnorm=2.033, loss_scale=32, train_wall=11, gb_free=2.8, wall=148087
2021-06-20 11:47:04 | INFO | train_inner | epoch 005: 925 / 3002 loss=2.563, ppl=5.91, wps=5827, ups=0.09, wpb=64893, bsz=128, num_updates=12859, lr=9.99051e-05, gnorm=2.08, loss_scale=32, train_wall=11, gb_free=2.8, wall=148098
2021-06-20 11:47:15 | INFO | train_inner | epoch 005: 926 / 3002 loss=2.439, ppl=5.42, wps=5690.6, ups=0.09, wpb=64809, bsz=128, num_updates=12860, lr=9.99051e-05, gnorm=1.966, loss_scale=32, train_wall=11, gb_free=2.8, wall=148109
2021-06-20 11:47:26 | INFO | train_inner | epoch 005: 927 / 3002 loss=2.558, ppl=5.89, wps=5859.4, ups=0.09, wpb=64887, bsz=128, num_updates=12861, lr=9.99051e-05, gnorm=1.989, loss_scale=32, train_wall=11, gb_free=2.8, wall=148120
2021-06-20 11:47:37 | INFO | train_inner | epoch 005: 928 / 3002 loss=2.657, ppl=6.31, wps=5895.1, ups=0.09, wpb=64882, bsz=128, num_updates=12862, lr=9.99051e-05, gnorm=1.947, loss_scale=32, train_wall=11, gb_free=2.8, wall=148131
2021-06-20 11:47:48 | INFO | train_inner | epoch 005: 929 / 3002 loss=2.714, ppl=6.56, wps=5814.8, ups=0.09, wpb=64815, bsz=128, num_updates=12863, lr=9.99051e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=148143
2021-06-20 11:48:00 | INFO | train_inner | epoch 005: 930 / 3002 loss=2.304, ppl=4.94, wps=5772.3, ups=0.09, wpb=64820, bsz=128, num_updates=12864, lr=9.99051e-05, gnorm=1.959, loss_scale=32, train_wall=11, gb_free=2.8, wall=148154
2021-06-20 11:48:11 | INFO | train_inner | epoch 005: 931 / 3002 loss=2.396, ppl=5.26, wps=5834.9, ups=0.09, wpb=64814, bsz=128, num_updates=12865, lr=9.99051e-05, gnorm=1.925, loss_scale=32, train_wall=11, gb_free=2.8, wall=148165
2021-06-20 11:48:22 | INFO | train_inner | epoch 005: 932 / 3002 loss=2.531, ppl=5.78, wps=5802.8, ups=0.09, wpb=64733, bsz=128, num_updates=12866, lr=9.99051e-05, gnorm=1.968, loss_scale=32, train_wall=11, gb_free=2.8, wall=148176
2021-06-20 11:48:33 | INFO | train_inner | epoch 005: 933 / 3002 loss=2.612, ppl=6.11, wps=5854, ups=0.09, wpb=64832, bsz=128, num_updates=12867, lr=9.99051e-05, gnorm=2.17, loss_scale=32, train_wall=11, gb_free=2.8, wall=148187
2021-06-20 11:48:44 | INFO | train_inner | epoch 005: 934 / 3002 loss=2.702, ppl=6.51, wps=5837.5, ups=0.09, wpb=64882, bsz=128, num_updates=12868, lr=9.9905e-05, gnorm=2.046, loss_scale=32, train_wall=11, gb_free=2.8, wall=148198
2021-06-20 11:48:55 | INFO | train_inner | epoch 005: 935 / 3002 loss=2.545, ppl=5.84, wps=5868.3, ups=0.09, wpb=64781, bsz=128, num_updates=12869, lr=9.9905e-05, gnorm=1.986, loss_scale=32, train_wall=11, gb_free=2.8, wall=148209
2021-06-20 11:49:06 | INFO | train_inner | epoch 005: 936 / 3002 loss=2.438, ppl=5.42, wps=5759.6, ups=0.09, wpb=64814, bsz=128, num_updates=12870, lr=9.9905e-05, gnorm=1.983, loss_scale=32, train_wall=11, gb_free=2.8, wall=148221
2021-06-20 11:49:18 | INFO | train_inner | epoch 005: 937 / 3002 loss=2.399, ppl=5.27, wps=5768.1, ups=0.09, wpb=64789, bsz=128, num_updates=12871, lr=9.9905e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=148232
2021-06-20 11:49:29 | INFO | train_inner | epoch 005: 938 / 3002 loss=2.567, ppl=5.93, wps=5816.2, ups=0.09, wpb=64815, bsz=128, num_updates=12872, lr=9.9905e-05, gnorm=2.095, loss_scale=32, train_wall=11, gb_free=2.8, wall=148243
2021-06-20 11:49:40 | INFO | train_inner | epoch 005: 939 / 3002 loss=2.582, ppl=5.99, wps=5722.8, ups=0.09, wpb=64842, bsz=128, num_updates=12873, lr=9.9905e-05, gnorm=1.991, loss_scale=32, train_wall=11, gb_free=2.8, wall=148254
2021-06-20 11:49:51 | INFO | train_inner | epoch 005: 940 / 3002 loss=2.686, ppl=6.44, wps=5873.3, ups=0.09, wpb=64698, bsz=128, num_updates=12874, lr=9.9905e-05, gnorm=1.988, loss_scale=32, train_wall=11, gb_free=2.8, wall=148265
2021-06-20 11:50:02 | INFO | train_inner | epoch 005: 941 / 3002 loss=2.676, ppl=6.39, wps=5762, ups=0.09, wpb=64783, bsz=128, num_updates=12875, lr=9.9905e-05, gnorm=1.959, loss_scale=32, train_wall=11, gb_free=2.8, wall=148277
2021-06-20 11:50:13 | INFO | train_inner | epoch 005: 942 / 3002 loss=2.56, ppl=5.9, wps=5781.1, ups=0.09, wpb=64857, bsz=128, num_updates=12876, lr=9.9905e-05, gnorm=2.057, loss_scale=32, train_wall=11, gb_free=2.8, wall=148288
2021-06-20 11:50:25 | INFO | train_inner | epoch 005: 943 / 3002 loss=2.473, ppl=5.55, wps=5738.9, ups=0.09, wpb=64840, bsz=128, num_updates=12877, lr=9.9905e-05, gnorm=1.94, loss_scale=32, train_wall=11, gb_free=2.8, wall=148299
2021-06-20 11:50:36 | INFO | train_inner | epoch 005: 944 / 3002 loss=2.604, ppl=6.08, wps=5991.1, ups=0.09, wpb=64846, bsz=128, num_updates=12878, lr=9.9905e-05, gnorm=2, loss_scale=32, train_wall=10, gb_free=2.8, wall=148310
2021-06-20 11:50:47 | INFO | train_inner | epoch 005: 945 / 3002 loss=2.546, ppl=5.84, wps=5870.2, ups=0.09, wpb=64812, bsz=128, num_updates=12879, lr=9.9905e-05, gnorm=2.004, loss_scale=32, train_wall=11, gb_free=2.8, wall=148321
2021-06-20 11:50:58 | INFO | train_inner | epoch 005: 946 / 3002 loss=2.606, ppl=6.09, wps=5829.1, ups=0.09, wpb=64864, bsz=128, num_updates=12880, lr=9.9905e-05, gnorm=2.034, loss_scale=32, train_wall=11, gb_free=2.8, wall=148332
2021-06-20 11:51:09 | INFO | train_inner | epoch 005: 947 / 3002 loss=2.467, ppl=5.53, wps=5811.6, ups=0.09, wpb=64836, bsz=128, num_updates=12881, lr=9.99049e-05, gnorm=1.949, loss_scale=32, train_wall=11, gb_free=2.8, wall=148343
2021-06-20 11:51:20 | INFO | train_inner | epoch 005: 948 / 3002 loss=2.492, ppl=5.62, wps=5859.7, ups=0.09, wpb=64845, bsz=128, num_updates=12882, lr=9.99049e-05, gnorm=1.913, loss_scale=32, train_wall=11, gb_free=2.8, wall=148354
2021-06-20 11:51:31 | INFO | train_inner | epoch 005: 949 / 3002 loss=2.554, ppl=5.87, wps=5952.9, ups=0.09, wpb=64823, bsz=128, num_updates=12883, lr=9.99049e-05, gnorm=2.042, loss_scale=32, train_wall=10, gb_free=2.8, wall=148365
2021-06-20 11:51:42 | INFO | train_inner | epoch 005: 950 / 3002 loss=2.566, ppl=5.92, wps=5776.8, ups=0.09, wpb=64822, bsz=128, num_updates=12884, lr=9.99049e-05, gnorm=1.964, loss_scale=32, train_wall=11, gb_free=2.8, wall=148376
2021-06-20 11:51:53 | INFO | train_inner | epoch 005: 951 / 3002 loss=2.51, ppl=5.7, wps=5799.3, ups=0.09, wpb=64816, bsz=128, num_updates=12885, lr=9.99049e-05, gnorm=2.006, loss_scale=32, train_wall=11, gb_free=2.8, wall=148388
2021-06-20 11:52:04 | INFO | train_inner | epoch 005: 952 / 3002 loss=2.552, ppl=5.87, wps=5891.1, ups=0.09, wpb=64857, bsz=128, num_updates=12886, lr=9.99049e-05, gnorm=1.936, loss_scale=32, train_wall=11, gb_free=2.8, wall=148399
2021-06-20 11:52:15 | INFO | train_inner | epoch 005: 953 / 3002 loss=2.518, ppl=5.73, wps=5809.3, ups=0.09, wpb=64772, bsz=128, num_updates=12887, lr=9.99049e-05, gnorm=1.961, loss_scale=32, train_wall=11, gb_free=2.8, wall=148410
2021-06-20 11:52:27 | INFO | train_inner | epoch 005: 954 / 3002 loss=2.613, ppl=6.12, wps=5777.3, ups=0.09, wpb=64822, bsz=128, num_updates=12888, lr=9.99049e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=148421
2021-06-20 11:52:38 | INFO | train_inner | epoch 005: 955 / 3002 loss=2.556, ppl=5.88, wps=5881.5, ups=0.09, wpb=64775, bsz=128, num_updates=12889, lr=9.99049e-05, gnorm=1.892, loss_scale=32, train_wall=11, gb_free=2.8, wall=148432
2021-06-20 11:52:49 | INFO | train_inner | epoch 005: 956 / 3002 loss=2.71, ppl=6.54, wps=5911.1, ups=0.09, wpb=64862, bsz=128, num_updates=12890, lr=9.99049e-05, gnorm=2.003, loss_scale=32, train_wall=11, gb_free=2.8, wall=148443
2021-06-20 11:53:00 | INFO | train_inner | epoch 005: 957 / 3002 loss=2.539, ppl=5.81, wps=5755.1, ups=0.09, wpb=64789, bsz=128, num_updates=12891, lr=9.99049e-05, gnorm=1.981, loss_scale=32, train_wall=11, gb_free=2.8, wall=148454
2021-06-20 11:53:11 | INFO | train_inner | epoch 005: 958 / 3002 loss=2.469, ppl=5.54, wps=5826.5, ups=0.09, wpb=64827, bsz=128, num_updates=12892, lr=9.99049e-05, gnorm=2.004, loss_scale=32, train_wall=11, gb_free=2.8, wall=148465
2021-06-20 11:53:22 | INFO | train_inner | epoch 005: 959 / 3002 loss=2.671, ppl=6.37, wps=5696.3, ups=0.09, wpb=64749, bsz=128, num_updates=12893, lr=9.99048e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=148477
2021-06-20 11:53:33 | INFO | train_inner | epoch 005: 960 / 3002 loss=2.608, ppl=6.1, wps=5904, ups=0.09, wpb=64720, bsz=128, num_updates=12894, lr=9.99048e-05, gnorm=1.919, loss_scale=32, train_wall=10, gb_free=2.8, wall=148488
2021-06-20 11:53:44 | INFO | train_inner | epoch 005: 961 / 3002 loss=2.633, ppl=6.2, wps=5851, ups=0.09, wpb=64899, bsz=128, num_updates=12895, lr=9.99048e-05, gnorm=2.049, loss_scale=32, train_wall=11, gb_free=2.8, wall=148499
2021-06-20 11:53:55 | INFO | train_inner | epoch 005: 962 / 3002 loss=2.496, ppl=5.64, wps=5936.7, ups=0.09, wpb=64866, bsz=128, num_updates=12896, lr=9.99048e-05, gnorm=2.259, loss_scale=32, train_wall=10, gb_free=2.8, wall=148510
2021-06-20 11:54:07 | INFO | train_inner | epoch 005: 963 / 3002 loss=2.495, ppl=5.64, wps=5783.5, ups=0.09, wpb=64690, bsz=128, num_updates=12897, lr=9.99048e-05, gnorm=1.99, loss_scale=32, train_wall=11, gb_free=2.8, wall=148521
2021-06-20 11:54:18 | INFO | train_inner | epoch 005: 964 / 3002 loss=2.392, ppl=5.25, wps=5795.6, ups=0.09, wpb=64901, bsz=128, num_updates=12898, lr=9.99048e-05, gnorm=1.974, loss_scale=32, train_wall=11, gb_free=2.8, wall=148532
2021-06-20 11:54:29 | INFO | train_inner | epoch 005: 965 / 3002 loss=2.584, ppl=6, wps=5855.1, ups=0.09, wpb=64857, bsz=128, num_updates=12899, lr=9.99048e-05, gnorm=2.011, loss_scale=32, train_wall=11, gb_free=2.8, wall=148543
2021-06-20 11:54:40 | INFO | train_inner | epoch 005: 966 / 3002 loss=2.593, ppl=6.03, wps=5853.3, ups=0.09, wpb=64780, bsz=128, num_updates=12900, lr=9.99048e-05, gnorm=2, loss_scale=32, train_wall=11, gb_free=2.8, wall=148554
2021-06-20 11:54:51 | INFO | train_inner | epoch 005: 967 / 3002 loss=2.402, ppl=5.28, wps=5726.3, ups=0.09, wpb=64863, bsz=128, num_updates=12901, lr=9.99048e-05, gnorm=1.892, loss_scale=32, train_wall=11, gb_free=2.8, wall=148566
2021-06-20 11:55:02 | INFO | train_inner | epoch 005: 968 / 3002 loss=2.542, ppl=5.82, wps=5845.6, ups=0.09, wpb=64796, bsz=128, num_updates=12902, lr=9.99048e-05, gnorm=2.021, loss_scale=32, train_wall=11, gb_free=2.8, wall=148577
2021-06-20 11:55:13 | INFO | train_inner | epoch 005: 969 / 3002 loss=2.401, ppl=5.28, wps=5820.6, ups=0.09, wpb=64826, bsz=128, num_updates=12903, lr=9.99048e-05, gnorm=1.925, loss_scale=32, train_wall=11, gb_free=2.8, wall=148588
2021-06-20 11:55:25 | INFO | train_inner | epoch 005: 970 / 3002 loss=2.509, ppl=5.69, wps=5798.7, ups=0.09, wpb=64895, bsz=128, num_updates=12904, lr=9.99048e-05, gnorm=2.003, loss_scale=32, train_wall=11, gb_free=2.8, wall=148599
2021-06-20 11:55:36 | INFO | train_inner | epoch 005: 971 / 3002 loss=2.525, ppl=5.76, wps=5849.8, ups=0.09, wpb=64862, bsz=128, num_updates=12905, lr=9.99048e-05, gnorm=1.992, loss_scale=32, train_wall=11, gb_free=2.8, wall=148610
2021-06-20 11:55:47 | INFO | train_inner | epoch 005: 972 / 3002 loss=2.496, ppl=5.64, wps=5927.2, ups=0.09, wpb=64845, bsz=128, num_updates=12906, lr=9.99047e-05, gnorm=1.986, loss_scale=32, train_wall=10, gb_free=2.8, wall=148621
2021-06-20 11:55:58 | INFO | train_inner | epoch 005: 973 / 3002 loss=2.615, ppl=6.13, wps=5843.9, ups=0.09, wpb=64816, bsz=128, num_updates=12907, lr=9.99047e-05, gnorm=1.993, loss_scale=32, train_wall=11, gb_free=2.8, wall=148632
2021-06-20 11:56:09 | INFO | train_inner | epoch 005: 974 / 3002 loss=2.448, ppl=5.46, wps=5834.7, ups=0.09, wpb=64865, bsz=128, num_updates=12908, lr=9.99047e-05, gnorm=1.98, loss_scale=32, train_wall=11, gb_free=2.8, wall=148643
2021-06-20 11:56:20 | INFO | train_inner | epoch 005: 975 / 3002 loss=2.651, ppl=6.28, wps=5790.4, ups=0.09, wpb=64890, bsz=128, num_updates=12909, lr=9.99047e-05, gnorm=2.028, loss_scale=32, train_wall=11, gb_free=2.8, wall=148654
2021-06-20 11:56:31 | INFO | train_inner | epoch 005: 976 / 3002 loss=2.64, ppl=6.24, wps=5800.4, ups=0.09, wpb=64816, bsz=128, num_updates=12910, lr=9.99047e-05, gnorm=2.076, loss_scale=32, train_wall=11, gb_free=2.8, wall=148666
2021-06-20 11:56:42 | INFO | train_inner | epoch 005: 977 / 3002 loss=2.564, ppl=5.91, wps=5942.7, ups=0.09, wpb=64793, bsz=128, num_updates=12911, lr=9.99047e-05, gnorm=1.887, loss_scale=32, train_wall=10, gb_free=2.8, wall=148676
2021-06-20 11:56:53 | INFO | train_inner | epoch 005: 978 / 3002 loss=2.596, ppl=6.05, wps=5750.3, ups=0.09, wpb=64728, bsz=128, num_updates=12912, lr=9.99047e-05, gnorm=1.954, loss_scale=32, train_wall=11, gb_free=2.8, wall=148688
2021-06-20 11:57:05 | INFO | train_inner | epoch 005: 979 / 3002 loss=2.526, ppl=5.76, wps=5836.1, ups=0.09, wpb=64827, bsz=128, num_updates=12913, lr=9.99047e-05, gnorm=2.001, loss_scale=32, train_wall=11, gb_free=2.8, wall=148699
2021-06-20 11:57:16 | INFO | train_inner | epoch 005: 980 / 3002 loss=2.622, ppl=6.16, wps=5702.2, ups=0.09, wpb=64763, bsz=128, num_updates=12914, lr=9.99047e-05, gnorm=1.975, loss_scale=32, train_wall=11, gb_free=2.8, wall=148710
2021-06-20 11:57:27 | INFO | train_inner | epoch 005: 981 / 3002 loss=2.514, ppl=5.71, wps=5898.6, ups=0.09, wpb=64798, bsz=128, num_updates=12915, lr=9.99047e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=148721
2021-06-20 11:57:38 | INFO | train_inner | epoch 005: 982 / 3002 loss=2.495, ppl=5.64, wps=5824.7, ups=0.09, wpb=64846, bsz=128, num_updates=12916, lr=9.99047e-05, gnorm=1.923, loss_scale=32, train_wall=11, gb_free=2.8, wall=148732
2021-06-20 11:57:49 | INFO | train_inner | epoch 005: 983 / 3002 loss=2.566, ppl=5.92, wps=5734.4, ups=0.09, wpb=64806, bsz=128, num_updates=12917, lr=9.99047e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=148744
2021-06-20 11:58:01 | INFO | train_inner | epoch 005: 984 / 3002 loss=2.516, ppl=5.72, wps=5785.6, ups=0.09, wpb=64849, bsz=128, num_updates=12918, lr=9.99046e-05, gnorm=1.975, loss_scale=32, train_wall=11, gb_free=2.8, wall=148755
2021-06-20 11:58:12 | INFO | train_inner | epoch 005: 985 / 3002 loss=2.516, ppl=5.72, wps=5783.6, ups=0.09, wpb=64899, bsz=128, num_updates=12919, lr=9.99046e-05, gnorm=1.977, loss_scale=32, train_wall=11, gb_free=2.8, wall=148766
2021-06-20 11:58:23 | INFO | train_inner | epoch 005: 986 / 3002 loss=2.563, ppl=5.91, wps=5881.1, ups=0.09, wpb=64917, bsz=128, num_updates=12920, lr=9.99046e-05, gnorm=1.987, loss_scale=32, train_wall=11, gb_free=2.8, wall=148777
2021-06-20 11:58:34 | INFO | train_inner | epoch 005: 987 / 3002 loss=2.608, ppl=6.1, wps=5783.4, ups=0.09, wpb=64849, bsz=128, num_updates=12921, lr=9.99046e-05, gnorm=1.945, loss_scale=32, train_wall=11, gb_free=2.8, wall=148788
2021-06-20 11:58:45 | INFO | train_inner | epoch 005: 988 / 3002 loss=2.554, ppl=5.87, wps=5837.9, ups=0.09, wpb=64836, bsz=128, num_updates=12922, lr=9.99046e-05, gnorm=1.937, loss_scale=32, train_wall=11, gb_free=2.8, wall=148799
2021-06-20 11:58:56 | INFO | train_inner | epoch 005: 989 / 3002 loss=2.55, ppl=5.86, wps=5849.8, ups=0.09, wpb=64752, bsz=128, num_updates=12923, lr=9.99046e-05, gnorm=1.967, loss_scale=32, train_wall=11, gb_free=2.8, wall=148811
2021-06-20 11:59:07 | INFO | train_inner | epoch 005: 990 / 3002 loss=2.497, ppl=5.64, wps=5801.5, ups=0.09, wpb=64779, bsz=128, num_updates=12924, lr=9.99046e-05, gnorm=1.991, loss_scale=32, train_wall=11, gb_free=2.8, wall=148822
2021-06-20 11:59:18 | INFO | train_inner | epoch 005: 991 / 3002 loss=2.559, ppl=5.89, wps=6034.6, ups=0.09, wpb=64803, bsz=128, num_updates=12925, lr=9.99046e-05, gnorm=1.983, loss_scale=32, train_wall=10, gb_free=2.8, wall=148832
2021-06-20 11:59:29 | INFO | train_inner | epoch 005: 992 / 3002 loss=2.461, ppl=5.5, wps=5845.2, ups=0.09, wpb=64819, bsz=128, num_updates=12926, lr=9.99046e-05, gnorm=1.94, loss_scale=32, train_wall=11, gb_free=2.8, wall=148843
2021-06-20 11:59:40 | INFO | train_inner | epoch 005: 993 / 3002 loss=2.391, ppl=5.25, wps=5867.5, ups=0.09, wpb=64877, bsz=128, num_updates=12927, lr=9.99046e-05, gnorm=2.047, loss_scale=32, train_wall=11, gb_free=2.8, wall=148855
2021-06-20 11:59:51 | INFO | train_inner | epoch 005: 994 / 3002 loss=2.543, ppl=5.83, wps=5808.5, ups=0.09, wpb=64805, bsz=128, num_updates=12928, lr=9.99046e-05, gnorm=2.044, loss_scale=32, train_wall=11, gb_free=2.8, wall=148866
2021-06-20 12:00:02 | INFO | train_inner | epoch 005: 995 / 3002 loss=2.585, ppl=6, wps=5925.5, ups=0.09, wpb=64777, bsz=128, num_updates=12929, lr=9.99046e-05, gnorm=2.035, loss_scale=32, train_wall=10, gb_free=2.8, wall=148877
2021-06-20 12:00:13 | INFO | train_inner | epoch 005: 996 / 3002 loss=2.53, ppl=5.77, wps=5806.8, ups=0.09, wpb=64811, bsz=128, num_updates=12930, lr=9.99046e-05, gnorm=2.084, loss_scale=32, train_wall=11, gb_free=2.8, wall=148888
2021-06-20 12:00:25 | INFO | train_inner | epoch 005: 997 / 3002 loss=2.485, ppl=5.6, wps=5712.8, ups=0.09, wpb=64736, bsz=128, num_updates=12931, lr=9.99045e-05, gnorm=1.895, loss_scale=32, train_wall=11, gb_free=2.8, wall=148899
2021-06-20 12:00:36 | INFO | train_inner | epoch 005: 998 / 3002 loss=2.486, ppl=5.6, wps=5740.6, ups=0.09, wpb=64864, bsz=128, num_updates=12932, lr=9.99045e-05, gnorm=1.929, loss_scale=32, train_wall=11, gb_free=2.8, wall=148910
2021-06-20 12:00:47 | INFO | train_inner | epoch 005: 999 / 3002 loss=2.525, ppl=5.76, wps=5771.3, ups=0.09, wpb=64846, bsz=128, num_updates=12933, lr=9.99045e-05, gnorm=1.943, loss_scale=32, train_wall=11, gb_free=2.8, wall=148922
2021-06-20 12:00:59 | INFO | train_inner | epoch 005: 1000 / 3002 loss=2.562, ppl=5.91, wps=5798.6, ups=0.09, wpb=64736, bsz=128, num_updates=12934, lr=9.99045e-05, gnorm=1.973, loss_scale=32, train_wall=11, gb_free=2.8, wall=148933
2021-06-20 12:01:10 | INFO | train_inner | epoch 005: 1001 / 3002 loss=2.441, ppl=5.43, wps=5746.9, ups=0.09, wpb=64831, bsz=128, num_updates=12935, lr=9.99045e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=148944
2021-06-20 12:01:21 | INFO | train_inner | epoch 005: 1002 / 3002 loss=2.497, ppl=5.65, wps=5861.7, ups=0.09, wpb=64827, bsz=128, num_updates=12936, lr=9.99045e-05, gnorm=1.973, loss_scale=32, train_wall=11, gb_free=2.8, wall=148955
2021-06-20 12:01:32 | INFO | train_inner | epoch 005: 1003 / 3002 loss=2.487, ppl=5.61, wps=5821.5, ups=0.09, wpb=64818, bsz=128, num_updates=12937, lr=9.99045e-05, gnorm=2.003, loss_scale=32, train_wall=11, gb_free=2.8, wall=148966
2021-06-20 12:01:43 | INFO | train_inner | epoch 005: 1004 / 3002 loss=2.548, ppl=5.85, wps=5871.3, ups=0.09, wpb=64827, bsz=128, num_updates=12938, lr=9.99045e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=148977
2021-06-20 12:01:54 | INFO | train_inner | epoch 005: 1005 / 3002 loss=2.528, ppl=5.77, wps=5905, ups=0.09, wpb=64811, bsz=128, num_updates=12939, lr=9.99045e-05, gnorm=2.054, loss_scale=32, train_wall=11, gb_free=2.8, wall=148988
2021-06-20 12:02:05 | INFO | train_inner | epoch 005: 1006 / 3002 loss=2.552, ppl=5.87, wps=5747.7, ups=0.09, wpb=64804, bsz=128, num_updates=12940, lr=9.99045e-05, gnorm=2.042, loss_scale=32, train_wall=11, gb_free=2.8, wall=149000
2021-06-20 12:02:16 | INFO | train_inner | epoch 005: 1007 / 3002 loss=2.575, ppl=5.96, wps=5775.6, ups=0.09, wpb=64822, bsz=128, num_updates=12941, lr=9.99045e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=149011
2021-06-20 12:02:28 | INFO | train_inner | epoch 005: 1008 / 3002 loss=2.484, ppl=5.6, wps=5848, ups=0.09, wpb=64834, bsz=128, num_updates=12942, lr=9.99045e-05, gnorm=2, loss_scale=32, train_wall=11, gb_free=2.8, wall=149022
2021-06-20 12:02:39 | INFO | train_inner | epoch 005: 1009 / 3002 loss=2.511, ppl=5.7, wps=5785.7, ups=0.09, wpb=64815, bsz=128, num_updates=12943, lr=9.99044e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=149033
2021-06-20 12:02:50 | INFO | train_inner | epoch 005: 1010 / 3002 loss=2.572, ppl=5.95, wps=5744.2, ups=0.09, wpb=64755, bsz=128, num_updates=12944, lr=9.99044e-05, gnorm=2.008, loss_scale=32, train_wall=11, gb_free=2.8, wall=149044
2021-06-20 12:03:01 | INFO | train_inner | epoch 005: 1011 / 3002 loss=2.545, ppl=5.84, wps=5749.9, ups=0.09, wpb=64886, bsz=128, num_updates=12945, lr=9.99044e-05, gnorm=2.018, loss_scale=32, train_wall=11, gb_free=2.8, wall=149056
2021-06-20 12:03:13 | INFO | train_inner | epoch 005: 1012 / 3002 loss=2.375, ppl=5.19, wps=5684.4, ups=0.09, wpb=64801, bsz=128, num_updates=12946, lr=9.99044e-05, gnorm=1.968, loss_scale=32, train_wall=11, gb_free=2.8, wall=149067
2021-06-20 12:03:24 | INFO | train_inner | epoch 005: 1013 / 3002 loss=2.522, ppl=5.74, wps=5845.7, ups=0.09, wpb=64866, bsz=128, num_updates=12947, lr=9.99044e-05, gnorm=1.99, loss_scale=32, train_wall=11, gb_free=2.8, wall=149078
2021-06-20 12:03:35 | INFO | train_inner | epoch 005: 1014 / 3002 loss=2.666, ppl=6.35, wps=5762.9, ups=0.09, wpb=64791, bsz=128, num_updates=12948, lr=9.99044e-05, gnorm=1.988, loss_scale=32, train_wall=11, gb_free=2.8, wall=149089
2021-06-20 12:03:46 | INFO | train_inner | epoch 005: 1015 / 3002 loss=2.465, ppl=5.52, wps=5812.6, ups=0.09, wpb=64811, bsz=128, num_updates=12949, lr=9.99044e-05, gnorm=1.978, loss_scale=32, train_wall=11, gb_free=2.8, wall=149101
2021-06-20 12:03:57 | INFO | train_inner | epoch 005: 1016 / 3002 loss=2.411, ppl=5.32, wps=5839.6, ups=0.09, wpb=64777, bsz=128, num_updates=12950, lr=9.99044e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=149112
2021-06-20 12:04:08 | INFO | train_inner | epoch 005: 1017 / 3002 loss=2.52, ppl=5.73, wps=5893.2, ups=0.09, wpb=64750, bsz=128, num_updates=12951, lr=9.99044e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=149123
2021-06-20 12:04:19 | INFO | train_inner | epoch 005: 1018 / 3002 loss=2.534, ppl=5.79, wps=5903.8, ups=0.09, wpb=64831, bsz=128, num_updates=12952, lr=9.99044e-05, gnorm=2.045, loss_scale=32, train_wall=11, gb_free=2.8, wall=149134
2021-06-20 12:04:30 | INFO | train_inner | epoch 005: 1019 / 3002 loss=2.601, ppl=6.07, wps=5938.2, ups=0.09, wpb=64816, bsz=128, num_updates=12953, lr=9.99044e-05, gnorm=1.94, loss_scale=32, train_wall=10, gb_free=2.8, wall=149145
2021-06-20 12:04:41 | INFO | train_inner | epoch 005: 1020 / 3002 loss=2.651, ppl=6.28, wps=5837.5, ups=0.09, wpb=64850, bsz=128, num_updates=12954, lr=9.99044e-05, gnorm=2.113, loss_scale=32, train_wall=11, gb_free=2.8, wall=149156
2021-06-20 12:04:53 | INFO | train_inner | epoch 005: 1021 / 3002 loss=2.483, ppl=5.59, wps=5754.6, ups=0.09, wpb=64850, bsz=128, num_updates=12955, lr=9.99044e-05, gnorm=1.971, loss_scale=32, train_wall=11, gb_free=2.8, wall=149167
2021-06-20 12:05:04 | INFO | train_inner | epoch 005: 1022 / 3002 loss=2.456, ppl=5.49, wps=5838.9, ups=0.09, wpb=64784, bsz=128, num_updates=12956, lr=9.99043e-05, gnorm=1.948, loss_scale=32, train_wall=11, gb_free=2.8, wall=149178
2021-06-20 12:05:15 | INFO | train_inner | epoch 005: 1023 / 3002 loss=2.42, ppl=5.35, wps=5689.4, ups=0.09, wpb=64809, bsz=128, num_updates=12957, lr=9.99043e-05, gnorm=1.905, loss_scale=32, train_wall=11, gb_free=2.8, wall=149189
2021-06-20 12:05:26 | INFO | train_inner | epoch 005: 1024 / 3002 loss=2.577, ppl=5.97, wps=5709, ups=0.09, wpb=64852, bsz=128, num_updates=12958, lr=9.99043e-05, gnorm=2.062, loss_scale=32, train_wall=11, gb_free=2.8, wall=149201
2021-06-20 12:05:38 | INFO | train_inner | epoch 005: 1025 / 3002 loss=2.589, ppl=6.02, wps=5667.1, ups=0.09, wpb=64787, bsz=128, num_updates=12959, lr=9.99043e-05, gnorm=1.963, loss_scale=32, train_wall=11, gb_free=2.8, wall=149212
2021-06-20 12:05:49 | INFO | train_inner | epoch 005: 1026 / 3002 loss=2.677, ppl=6.39, wps=5830.2, ups=0.09, wpb=64761, bsz=128, num_updates=12960, lr=9.99043e-05, gnorm=1.982, loss_scale=32, train_wall=11, gb_free=2.8, wall=149223
2021-06-20 12:06:00 | INFO | train_inner | epoch 005: 1027 / 3002 loss=2.7, ppl=6.5, wps=5716.5, ups=0.09, wpb=64805, bsz=128, num_updates=12961, lr=9.99043e-05, gnorm=1.966, loss_scale=32, train_wall=11, gb_free=2.8, wall=149235
2021-06-20 12:06:11 | INFO | train_inner | epoch 005: 1028 / 3002 loss=2.592, ppl=6.03, wps=5856.7, ups=0.09, wpb=64748, bsz=128, num_updates=12962, lr=9.99043e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=149246
2021-06-20 12:06:22 | INFO | train_inner | epoch 005: 1029 / 3002 loss=2.442, ppl=5.44, wps=5835.5, ups=0.09, wpb=64911, bsz=128, num_updates=12963, lr=9.99043e-05, gnorm=2.139, loss_scale=32, train_wall=11, gb_free=2.8, wall=149257
2021-06-20 12:06:34 | INFO | train_inner | epoch 005: 1030 / 3002 loss=2.488, ppl=5.61, wps=5749.9, ups=0.09, wpb=64845, bsz=128, num_updates=12964, lr=9.99043e-05, gnorm=1.959, loss_scale=32, train_wall=11, gb_free=2.8, wall=149268
2021-06-20 12:06:45 | INFO | train_inner | epoch 005: 1031 / 3002 loss=2.572, ppl=5.95, wps=5803.3, ups=0.09, wpb=64900, bsz=128, num_updates=12965, lr=9.99043e-05, gnorm=2.014, loss_scale=32, train_wall=11, gb_free=2.8, wall=149279
2021-06-20 12:06:56 | INFO | train_inner | epoch 005: 1032 / 3002 loss=2.615, ppl=6.13, wps=5897.3, ups=0.09, wpb=64844, bsz=128, num_updates=12966, lr=9.99043e-05, gnorm=2.07, loss_scale=32, train_wall=11, gb_free=2.8, wall=149290
2021-06-20 12:07:07 | INFO | train_inner | epoch 005: 1033 / 3002 loss=2.683, ppl=6.42, wps=5827.4, ups=0.09, wpb=64856, bsz=128, num_updates=12967, lr=9.99043e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=149301
2021-06-20 12:07:18 | INFO | train_inner | epoch 005: 1034 / 3002 loss=2.499, ppl=5.65, wps=5925.3, ups=0.09, wpb=64803, bsz=128, num_updates=12968, lr=9.99042e-05, gnorm=1.969, loss_scale=32, train_wall=10, gb_free=2.8, wall=149312
2021-06-20 12:07:29 | INFO | train_inner | epoch 005: 1035 / 3002 loss=2.5, ppl=5.66, wps=6016.6, ups=0.09, wpb=64860, bsz=128, num_updates=12969, lr=9.99042e-05, gnorm=1.894, loss_scale=64, train_wall=10, gb_free=2.8, wall=149323
2021-06-20 12:07:40 | INFO | train_inner | epoch 005: 1036 / 3002 loss=2.572, ppl=5.95, wps=5888.7, ups=0.09, wpb=64798, bsz=128, num_updates=12970, lr=9.99042e-05, gnorm=1.966, loss_scale=64, train_wall=11, gb_free=2.8, wall=149334
2021-06-20 12:07:51 | INFO | train_inner | epoch 005: 1037 / 3002 loss=2.396, ppl=5.26, wps=5733.6, ups=0.09, wpb=64832, bsz=128, num_updates=12971, lr=9.99042e-05, gnorm=1.828, loss_scale=64, train_wall=11, gb_free=2.8, wall=149345
2021-06-20 12:08:02 | INFO | train_inner | epoch 005: 1038 / 3002 loss=2.38, ppl=5.21, wps=5754.4, ups=0.09, wpb=64872, bsz=128, num_updates=12972, lr=9.99042e-05, gnorm=2.041, loss_scale=64, train_wall=11, gb_free=2.8, wall=149357
2021-06-20 12:08:13 | INFO | train_inner | epoch 005: 1039 / 3002 loss=2.499, ppl=5.65, wps=5838.5, ups=0.09, wpb=64831, bsz=128, num_updates=12973, lr=9.99042e-05, gnorm=2.002, loss_scale=64, train_wall=11, gb_free=2.8, wall=149368
2021-06-20 12:08:25 | INFO | train_inner | epoch 005: 1040 / 3002 loss=2.594, ppl=6.04, wps=5852.5, ups=0.09, wpb=64875, bsz=128, num_updates=12974, lr=9.99042e-05, gnorm=2.006, loss_scale=64, train_wall=11, gb_free=2.8, wall=149379
2021-06-20 12:08:36 | INFO | train_inner | epoch 005: 1041 / 3002 loss=2.391, ppl=5.25, wps=5755.4, ups=0.09, wpb=64736, bsz=128, num_updates=12975, lr=9.99042e-05, gnorm=1.962, loss_scale=64, train_wall=11, gb_free=2.8, wall=149390
2021-06-20 12:08:47 | INFO | train_inner | epoch 005: 1042 / 3002 loss=2.59, ppl=6.02, wps=5971.1, ups=0.09, wpb=64859, bsz=128, num_updates=12976, lr=9.99042e-05, gnorm=1.945, loss_scale=64, train_wall=10, gb_free=2.8, wall=149401
2021-06-20 12:08:58 | INFO | train_inner | epoch 005: 1043 / 3002 loss=2.602, ppl=6.07, wps=5738.6, ups=0.09, wpb=64809, bsz=128, num_updates=12977, lr=9.99042e-05, gnorm=2.022, loss_scale=64, train_wall=11, gb_free=2.8, wall=149412
2021-06-20 12:09:09 | INFO | train_inner | epoch 005: 1044 / 3002 loss=2.694, ppl=6.47, wps=5810.9, ups=0.09, wpb=64870, bsz=128, num_updates=12978, lr=9.99042e-05, gnorm=2.021, loss_scale=64, train_wall=11, gb_free=2.8, wall=149423
2021-06-20 12:09:20 | INFO | train_inner | epoch 005: 1045 / 3002 loss=2.42, ppl=5.35, wps=5850.3, ups=0.09, wpb=64869, bsz=128, num_updates=12979, lr=9.99042e-05, gnorm=2.018, loss_scale=64, train_wall=11, gb_free=2.8, wall=149435
2021-06-20 12:09:31 | INFO | train_inner | epoch 005: 1046 / 3002 loss=2.645, ppl=6.26, wps=6000.5, ups=0.09, wpb=64818, bsz=128, num_updates=12980, lr=9.99042e-05, gnorm=1.949, loss_scale=64, train_wall=10, gb_free=2.8, wall=149445
2021-06-20 12:09:42 | INFO | train_inner | epoch 005: 1047 / 3002 loss=2.531, ppl=5.78, wps=5772, ups=0.09, wpb=64866, bsz=128, num_updates=12981, lr=9.99041e-05, gnorm=1.938, loss_scale=64, train_wall=11, gb_free=2.8, wall=149457
2021-06-20 12:09:53 | INFO | train_inner | epoch 005: 1048 / 3002 loss=2.559, ppl=5.89, wps=6022.3, ups=0.09, wpb=64836, bsz=128, num_updates=12982, lr=9.99041e-05, gnorm=1.957, loss_scale=64, train_wall=10, gb_free=2.8, wall=149467
2021-06-20 12:10:04 | INFO | train_inner | epoch 005: 1049 / 3002 loss=2.51, ppl=5.7, wps=5875.5, ups=0.09, wpb=64920, bsz=128, num_updates=12983, lr=9.99041e-05, gnorm=2.039, loss_scale=64, train_wall=11, gb_free=2.8, wall=149478
2021-06-20 12:10:15 | INFO | train_inner | epoch 005: 1050 / 3002 loss=2.509, ppl=5.69, wps=5870.9, ups=0.09, wpb=64804, bsz=128, num_updates=12984, lr=9.99041e-05, gnorm=2.043, loss_scale=64, train_wall=11, gb_free=2.8, wall=149489
2021-06-20 12:10:26 | INFO | train_inner | epoch 005: 1051 / 3002 loss=2.429, ppl=5.38, wps=5929.1, ups=0.09, wpb=64863, bsz=128, num_updates=12985, lr=9.99041e-05, gnorm=1.914, loss_scale=64, train_wall=10, gb_free=2.8, wall=149500
2021-06-20 12:10:37 | INFO | train_inner | epoch 005: 1052 / 3002 loss=2.509, ppl=5.69, wps=5741.5, ups=0.09, wpb=64830, bsz=128, num_updates=12986, lr=9.99041e-05, gnorm=1.99, loss_scale=64, train_wall=11, gb_free=2.8, wall=149512
2021-06-20 12:10:48 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0
2021-06-20 12:11:00 | INFO | train_inner | epoch 005: 1054 / 3002 loss=2.507, ppl=5.68, wps=2894.5, ups=0.04, wpb=64745, bsz=128, num_updates=12987, lr=9.99041e-05, gnorm=1.891, loss_scale=32, train_wall=21, gb_free=2.8, wall=149534
2021-06-20 12:11:11 | INFO | train_inner | epoch 005: 1055 / 3002 loss=2.529, ppl=5.77, wps=5938.4, ups=0.09, wpb=64918, bsz=128, num_updates=12988, lr=9.99041e-05, gnorm=1.988, loss_scale=32, train_wall=10, gb_free=2.8, wall=149545
2021-06-20 12:11:22 | INFO | train_inner | epoch 005: 1056 / 3002 loss=2.539, ppl=5.81, wps=5746.4, ups=0.09, wpb=64792, bsz=128, num_updates=12989, lr=9.99041e-05, gnorm=2.024, loss_scale=32, train_wall=11, gb_free=2.8, wall=149556
2021-06-20 12:11:33 | INFO | train_inner | epoch 005: 1057 / 3002 loss=2.425, ppl=5.37, wps=5808.5, ups=0.09, wpb=64835, bsz=128, num_updates=12990, lr=9.99041e-05, gnorm=1.955, loss_scale=32, train_wall=11, gb_free=2.8, wall=149567
2021-06-20 12:11:44 | INFO | train_inner | epoch 005: 1058 / 3002 loss=2.482, ppl=5.59, wps=5829, ups=0.09, wpb=64761, bsz=128, num_updates=12991, lr=9.99041e-05, gnorm=2.068, loss_scale=32, train_wall=11, gb_free=2.8, wall=149579
2021-06-20 12:11:55 | INFO | train_inner | epoch 005: 1059 / 3002 loss=2.503, ppl=5.67, wps=5823.1, ups=0.09, wpb=64812, bsz=128, num_updates=12992, lr=9.99041e-05, gnorm=2.085, loss_scale=32, train_wall=11, gb_free=2.8, wall=149590
2021-06-20 12:12:06 | INFO | train_inner | epoch 005: 1060 / 3002 loss=2.392, ppl=5.25, wps=5897.3, ups=0.09, wpb=64880, bsz=128, num_updates=12993, lr=9.9904e-05, gnorm=1.945, loss_scale=32, train_wall=11, gb_free=2.8, wall=149601
2021-06-20 12:12:17 | INFO | train_inner | epoch 005: 1061 / 3002 loss=2.557, ppl=5.89, wps=5944.5, ups=0.09, wpb=64808, bsz=128, num_updates=12994, lr=9.9904e-05, gnorm=2.005, loss_scale=32, train_wall=10, gb_free=2.8, wall=149612
2021-06-20 12:12:28 | INFO | train_inner | epoch 005: 1062 / 3002 loss=2.532, ppl=5.78, wps=5867.4, ups=0.09, wpb=64813, bsz=128, num_updates=12995, lr=9.9904e-05, gnorm=1.937, loss_scale=32, train_wall=11, gb_free=2.8, wall=149623
2021-06-20 12:12:39 | INFO | train_inner | epoch 005: 1063 / 3002 loss=2.508, ppl=5.69, wps=5890.5, ups=0.09, wpb=64817, bsz=128, num_updates=12996, lr=9.9904e-05, gnorm=1.939, loss_scale=32, train_wall=11, gb_free=2.8, wall=149634
2021-06-20 12:12:50 | INFO | train_inner | epoch 005: 1064 / 3002 loss=2.504, ppl=5.67, wps=5867.7, ups=0.09, wpb=64766, bsz=128, num_updates=12997, lr=9.9904e-05, gnorm=1.98, loss_scale=32, train_wall=11, gb_free=2.8, wall=149645
2021-06-20 12:13:02 | INFO | train_inner | epoch 005: 1065 / 3002 loss=2.536, ppl=5.8, wps=5731.7, ups=0.09, wpb=64855, bsz=128, num_updates=12998, lr=9.9904e-05, gnorm=1.893, loss_scale=32, train_wall=11, gb_free=2.8, wall=149656
2021-06-20 12:13:12 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0
2021-06-20 12:13:23 | INFO | train_inner | epoch 005: 1067 / 3002 loss=2.581, ppl=5.98, wps=2968.6, ups=0.05, wpb=64870, bsz=128, num_updates=12999, lr=9.9904e-05, gnorm=1.984, loss_scale=16, train_wall=21, gb_free=2.8, wall=149678
2021-06-20 12:13:35 | INFO | train_inner | epoch 005: 1068 / 3002 loss=2.614, ppl=6.12, wps=5869.9, ups=0.09, wpb=64822, bsz=128, num_updates=13000, lr=9.9904e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=149689
2021-06-20 12:13:45 | INFO | train_inner | epoch 005: 1069 / 3002 loss=2.559, ppl=5.89, wps=5969.7, ups=0.09, wpb=64845, bsz=128, num_updates=13001, lr=9.9904e-05, gnorm=1.999, loss_scale=16, train_wall=10, gb_free=2.8, wall=149700
2021-06-20 12:13:57 | INFO | train_inner | epoch 005: 1070 / 3002 loss=2.642, ppl=6.24, wps=5703, ups=0.09, wpb=64813, bsz=128, num_updates=13002, lr=9.9904e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=149711
2021-06-20 12:14:08 | INFO | train_inner | epoch 005: 1071 / 3002 loss=2.486, ppl=5.6, wps=5815.4, ups=0.09, wpb=64879, bsz=128, num_updates=13003, lr=9.9904e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=149722
2021-06-20 12:14:19 | INFO | train_inner | epoch 005: 1072 / 3002 loss=2.51, ppl=5.7, wps=5928.7, ups=0.09, wpb=64813, bsz=128, num_updates=13004, lr=9.9904e-05, gnorm=1.943, loss_scale=16, train_wall=10, gb_free=2.8, wall=149733
2021-06-20 12:14:30 | INFO | train_inner | epoch 005: 1073 / 3002 loss=2.608, ppl=6.1, wps=5867, ups=0.09, wpb=64829, bsz=128, num_updates=13005, lr=9.9904e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=149744
2021-06-20 12:14:41 | INFO | train_inner | epoch 005: 1074 / 3002 loss=2.365, ppl=5.15, wps=5809.6, ups=0.09, wpb=64751, bsz=128, num_updates=13006, lr=9.99039e-05, gnorm=1.898, loss_scale=16, train_wall=11, gb_free=2.8, wall=149755
2021-06-20 12:14:52 | INFO | train_inner | epoch 005: 1075 / 3002 loss=2.701, ppl=6.5, wps=5835.4, ups=0.09, wpb=64874, bsz=128, num_updates=13007, lr=9.99039e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=149767
2021-06-20 12:15:03 | INFO | train_inner | epoch 005: 1076 / 3002 loss=2.577, ppl=5.97, wps=5875.6, ups=0.09, wpb=64888, bsz=128, num_updates=13008, lr=9.99039e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=149778
2021-06-20 12:15:14 | INFO | train_inner | epoch 005: 1077 / 3002 loss=2.561, ppl=5.9, wps=5948, ups=0.09, wpb=64910, bsz=128, num_updates=13009, lr=9.99039e-05, gnorm=2.058, loss_scale=16, train_wall=10, gb_free=2.8, wall=149788
2021-06-20 12:15:25 | INFO | train_inner | epoch 005: 1078 / 3002 loss=2.447, ppl=5.45, wps=5766.9, ups=0.09, wpb=64848, bsz=128, num_updates=13010, lr=9.99039e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=149800
2021-06-20 12:15:36 | INFO | train_inner | epoch 005: 1079 / 3002 loss=2.658, ppl=6.31, wps=5873, ups=0.09, wpb=64807, bsz=128, num_updates=13011, lr=9.99039e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=149811
2021-06-20 12:15:47 | INFO | train_inner | epoch 005: 1080 / 3002 loss=2.67, ppl=6.36, wps=5897.4, ups=0.09, wpb=64883, bsz=128, num_updates=13012, lr=9.99039e-05, gnorm=2.092, loss_scale=16, train_wall=11, gb_free=2.8, wall=149822
2021-06-20 12:15:58 | INFO | train_inner | epoch 005: 1081 / 3002 loss=2.465, ppl=5.52, wps=5851.9, ups=0.09, wpb=64731, bsz=128, num_updates=13013, lr=9.99039e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=149833
2021-06-20 12:16:10 | INFO | train_inner | epoch 005: 1082 / 3002 loss=2.508, ppl=5.69, wps=5859.5, ups=0.09, wpb=64872, bsz=128, num_updates=13014, lr=9.99039e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=149844
2021-06-20 12:16:21 | INFO | train_inner | epoch 005: 1083 / 3002 loss=2.441, ppl=5.43, wps=5821.3, ups=0.09, wpb=64791, bsz=128, num_updates=13015, lr=9.99039e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=149855
2021-06-20 12:16:32 | INFO | train_inner | epoch 005: 1084 / 3002 loss=2.404, ppl=5.29, wps=5849.7, ups=0.09, wpb=64860, bsz=128, num_updates=13016, lr=9.99039e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=149866
2021-06-20 12:16:43 | INFO | train_inner | epoch 005: 1085 / 3002 loss=2.346, ppl=5.08, wps=5957.5, ups=0.09, wpb=64792, bsz=128, num_updates=13017, lr=9.99039e-05, gnorm=2.006, loss_scale=16, train_wall=10, gb_free=2.8, wall=149877
2021-06-20 12:16:54 | INFO | train_inner | epoch 005: 1086 / 3002 loss=2.424, ppl=5.37, wps=5758.3, ups=0.09, wpb=64839, bsz=128, num_updates=13018, lr=9.99038e-05, gnorm=2.119, loss_scale=16, train_wall=11, gb_free=2.8, wall=149888
2021-06-20 12:17:05 | INFO | train_inner | epoch 005: 1087 / 3002 loss=2.673, ppl=6.38, wps=5807.2, ups=0.09, wpb=64804, bsz=128, num_updates=13019, lr=9.99038e-05, gnorm=2.1, loss_scale=16, train_wall=11, gb_free=2.8, wall=149899
2021-06-20 12:17:16 | INFO | train_inner | epoch 005: 1088 / 3002 loss=2.634, ppl=6.21, wps=5823.6, ups=0.09, wpb=64779, bsz=128, num_updates=13020, lr=9.99038e-05, gnorm=2.083, loss_scale=16, train_wall=11, gb_free=2.8, wall=149911
2021-06-20 12:17:27 | INFO | train_inner | epoch 005: 1089 / 3002 loss=2.428, ppl=5.38, wps=5809.9, ups=0.09, wpb=64845, bsz=128, num_updates=13021, lr=9.99038e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=149922
2021-06-20 12:17:38 | INFO | train_inner | epoch 005: 1090 / 3002 loss=2.529, ppl=5.77, wps=5873, ups=0.09, wpb=64809, bsz=128, num_updates=13022, lr=9.99038e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=149933
2021-06-20 12:17:49 | INFO | train_inner | epoch 005: 1091 / 3002 loss=2.441, ppl=5.43, wps=5896.7, ups=0.09, wpb=64845, bsz=128, num_updates=13023, lr=9.99038e-05, gnorm=1.93, loss_scale=16, train_wall=11, gb_free=2.8, wall=149944
2021-06-20 12:18:00 | INFO | train_inner | epoch 005: 1092 / 3002 loss=2.732, ppl=6.64, wps=5868.2, ups=0.09, wpb=64799, bsz=128, num_updates=13024, lr=9.99038e-05, gnorm=2.035, loss_scale=16, train_wall=11, gb_free=2.8, wall=149955
2021-06-20 12:18:11 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 12:18:23 | INFO | train_inner | epoch 005: 1094 / 3002 loss=2.368, ppl=5.16, wps=2932.5, ups=0.05, wpb=64844, bsz=128, num_updates=13025, lr=9.99038e-05, gnorm=2.24, loss_scale=8, train_wall=21, gb_free=2.8, wall=149977
2021-06-20 12:18:34 | INFO | train_inner | epoch 005: 1095 / 3002 loss=2.675, ppl=6.39, wps=5776.7, ups=0.09, wpb=64860, bsz=128, num_updates=13026, lr=9.99038e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=149988
2021-06-20 12:18:45 | INFO | train_inner | epoch 005: 1096 / 3002 loss=2.438, ppl=5.42, wps=5970.7, ups=0.09, wpb=64861, bsz=128, num_updates=13027, lr=9.99038e-05, gnorm=1.974, loss_scale=8, train_wall=10, gb_free=2.8, wall=149999
2021-06-20 12:18:56 | INFO | train_inner | epoch 005: 1097 / 3002 loss=2.578, ppl=5.97, wps=5733.9, ups=0.09, wpb=64778, bsz=128, num_updates=13028, lr=9.99038e-05, gnorm=2.12, loss_scale=8, train_wall=11, gb_free=2.8, wall=150010
2021-06-20 12:19:07 | INFO | train_inner | epoch 005: 1098 / 3002 loss=2.521, ppl=5.74, wps=5814.4, ups=0.09, wpb=64772, bsz=128, num_updates=13029, lr=9.99038e-05, gnorm=2.551, loss_scale=8, train_wall=11, gb_free=2.8, wall=150021
2021-06-20 12:19:18 | INFO | train_inner | epoch 005: 1099 / 3002 loss=2.46, ppl=5.5, wps=5756.3, ups=0.09, wpb=64741, bsz=128, num_updates=13030, lr=9.99038e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=150033
2021-06-20 12:19:29 | INFO | train_inner | epoch 005: 1100 / 3002 loss=2.516, ppl=5.72, wps=5881.4, ups=0.09, wpb=64886, bsz=128, num_updates=13031, lr=9.99037e-05, gnorm=2.138, loss_scale=8, train_wall=11, gb_free=2.8, wall=150044
2021-06-20 12:19:41 | INFO | train_inner | epoch 005: 1101 / 3002 loss=2.426, ppl=5.37, wps=5792.4, ups=0.09, wpb=64834, bsz=128, num_updates=13032, lr=9.99037e-05, gnorm=2.08, loss_scale=8, train_wall=11, gb_free=2.8, wall=150055
2021-06-20 12:19:52 | INFO | train_inner | epoch 005: 1102 / 3002 loss=2.613, ppl=6.12, wps=5844.9, ups=0.09, wpb=64894, bsz=128, num_updates=13033, lr=9.99037e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=150066
2021-06-20 12:20:03 | INFO | train_inner | epoch 005: 1103 / 3002 loss=2.56, ppl=5.9, wps=5833.2, ups=0.09, wpb=64808, bsz=128, num_updates=13034, lr=9.99037e-05, gnorm=2.524, loss_scale=8, train_wall=11, gb_free=2.8, wall=150077
2021-06-20 12:20:14 | INFO | train_inner | epoch 005: 1104 / 3002 loss=2.684, ppl=6.43, wps=5929.5, ups=0.09, wpb=64824, bsz=128, num_updates=13035, lr=9.99037e-05, gnorm=2.058, loss_scale=8, train_wall=10, gb_free=2.8, wall=150088
2021-06-20 12:20:25 | INFO | train_inner | epoch 005: 1105 / 3002 loss=2.644, ppl=6.25, wps=5981.2, ups=0.09, wpb=64929, bsz=128, num_updates=13036, lr=9.99037e-05, gnorm=2.104, loss_scale=8, train_wall=10, gb_free=2.8, wall=150099
2021-06-20 12:20:35 | INFO | train_inner | epoch 005: 1106 / 3002 loss=2.536, ppl=5.8, wps=5964.4, ups=0.09, wpb=64867, bsz=128, num_updates=13037, lr=9.99037e-05, gnorm=2.089, loss_scale=8, train_wall=10, gb_free=2.8, wall=150110
2021-06-20 12:20:47 | INFO | train_inner | epoch 005: 1107 / 3002 loss=2.585, ppl=6, wps=5818.8, ups=0.09, wpb=64856, bsz=128, num_updates=13038, lr=9.99037e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=150121
2021-06-20 12:20:58 | INFO | train_inner | epoch 005: 1108 / 3002 loss=2.549, ppl=5.85, wps=5768.9, ups=0.09, wpb=64842, bsz=128, num_updates=13039, lr=9.99037e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=150132
2021-06-20 12:21:09 | INFO | train_inner | epoch 005: 1109 / 3002 loss=2.429, ppl=5.38, wps=5692.6, ups=0.09, wpb=64889, bsz=128, num_updates=13040, lr=9.99037e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=150144
2021-06-20 12:21:20 | INFO | train_inner | epoch 005: 1110 / 3002 loss=2.787, ppl=6.9, wps=5833.9, ups=0.09, wpb=64770, bsz=128, num_updates=13041, lr=9.99037e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=150155
2021-06-20 12:21:31 | INFO | train_inner | epoch 005: 1111 / 3002 loss=2.568, ppl=5.93, wps=5824.9, ups=0.09, wpb=64875, bsz=128, num_updates=13042, lr=9.99037e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=150166
2021-06-20 12:21:43 | INFO | train_inner | epoch 005: 1112 / 3002 loss=2.578, ppl=5.97, wps=5836, ups=0.09, wpb=64762, bsz=128, num_updates=13043, lr=9.99036e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=150177
2021-06-20 12:21:53 | INFO | train_inner | epoch 005: 1113 / 3002 loss=2.372, ppl=5.18, wps=6007.7, ups=0.09, wpb=64803, bsz=128, num_updates=13044, lr=9.99036e-05, gnorm=1.966, loss_scale=8, train_wall=10, gb_free=2.8, wall=150188
2021-06-20 12:22:04 | INFO | train_inner | epoch 005: 1114 / 3002 loss=2.578, ppl=5.97, wps=5877.6, ups=0.09, wpb=64842, bsz=128, num_updates=13045, lr=9.99036e-05, gnorm=2.093, loss_scale=8, train_wall=11, gb_free=2.8, wall=150199
2021-06-20 12:22:15 | INFO | train_inner | epoch 005: 1115 / 3002 loss=2.542, ppl=5.82, wps=6007, ups=0.09, wpb=64885, bsz=128, num_updates=13046, lr=9.99036e-05, gnorm=2.049, loss_scale=8, train_wall=10, gb_free=2.8, wall=150209
2021-06-20 12:22:26 | INFO | train_inner | epoch 005: 1116 / 3002 loss=2.508, ppl=5.69, wps=5768.3, ups=0.09, wpb=64783, bsz=128, num_updates=13047, lr=9.99036e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=150221
2021-06-20 12:22:37 | INFO | train_inner | epoch 005: 1117 / 3002 loss=2.449, ppl=5.46, wps=5867.4, ups=0.09, wpb=64828, bsz=128, num_updates=13048, lr=9.99036e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=150232
2021-06-20 12:22:48 | INFO | train_inner | epoch 005: 1118 / 3002 loss=2.559, ppl=5.89, wps=5908.2, ups=0.09, wpb=64934, bsz=128, num_updates=13049, lr=9.99036e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=150243
2021-06-20 12:23:00 | INFO | train_inner | epoch 005: 1119 / 3002 loss=2.414, ppl=5.33, wps=5830, ups=0.09, wpb=64890, bsz=128, num_updates=13050, lr=9.99036e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=150254
2021-06-20 12:23:11 | INFO | train_inner | epoch 005: 1120 / 3002 loss=2.497, ppl=5.64, wps=5879.9, ups=0.09, wpb=64868, bsz=128, num_updates=13051, lr=9.99036e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=150265
2021-06-20 12:23:21 | INFO | train_inner | epoch 005: 1121 / 3002 loss=2.498, ppl=5.65, wps=5994.5, ups=0.09, wpb=64913, bsz=128, num_updates=13052, lr=9.99036e-05, gnorm=1.997, loss_scale=8, train_wall=10, gb_free=2.8, wall=150276
2021-06-20 12:23:32 | INFO | train_inner | epoch 005: 1122 / 3002 loss=2.423, ppl=5.36, wps=5896.5, ups=0.09, wpb=64803, bsz=128, num_updates=13053, lr=9.99036e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=150287
2021-06-20 12:23:43 | INFO | train_inner | epoch 005: 1123 / 3002 loss=2.529, ppl=5.77, wps=5863.8, ups=0.09, wpb=64838, bsz=128, num_updates=13054, lr=9.99036e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=150298
2021-06-20 12:23:54 | INFO | train_inner | epoch 005: 1124 / 3002 loss=2.499, ppl=5.65, wps=5891.2, ups=0.09, wpb=64767, bsz=128, num_updates=13055, lr=9.99036e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=150309
2021-06-20 12:24:06 | INFO | train_inner | epoch 005: 1125 / 3002 loss=2.587, ppl=6.01, wps=5757.8, ups=0.09, wpb=64840, bsz=128, num_updates=13056, lr=9.99035e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=150320
2021-06-20 12:24:17 | INFO | train_inner | epoch 005: 1126 / 3002 loss=2.564, ppl=5.91, wps=5802.3, ups=0.09, wpb=64839, bsz=128, num_updates=13057, lr=9.99035e-05, gnorm=2.589, loss_scale=8, train_wall=11, gb_free=2.8, wall=150331
2021-06-20 12:24:28 | INFO | train_inner | epoch 005: 1127 / 3002 loss=2.411, ppl=5.32, wps=5953, ups=0.09, wpb=64810, bsz=128, num_updates=13058, lr=9.99035e-05, gnorm=2.044, loss_scale=8, train_wall=10, gb_free=2.8, wall=150342
2021-06-20 12:24:39 | INFO | train_inner | epoch 005: 1128 / 3002 loss=2.587, ppl=6.01, wps=5741.6, ups=0.09, wpb=64787, bsz=128, num_updates=13059, lr=9.99035e-05, gnorm=2.084, loss_scale=8, train_wall=11, gb_free=2.8, wall=150353
2021-06-20 12:24:50 | INFO | train_inner | epoch 005: 1129 / 3002 loss=2.55, ppl=5.86, wps=5897.7, ups=0.09, wpb=64928, bsz=128, num_updates=13060, lr=9.99035e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=150364
2021-06-20 12:25:01 | INFO | train_inner | epoch 005: 1130 / 3002 loss=2.641, ppl=6.24, wps=5846.5, ups=0.09, wpb=64787, bsz=128, num_updates=13061, lr=9.99035e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=150375
2021-06-20 12:25:12 | INFO | train_inner | epoch 005: 1131 / 3002 loss=2.437, ppl=5.41, wps=5943.4, ups=0.09, wpb=64816, bsz=128, num_updates=13062, lr=9.99035e-05, gnorm=2.042, loss_scale=8, train_wall=10, gb_free=2.8, wall=150386
2021-06-20 12:25:23 | INFO | train_inner | epoch 005: 1132 / 3002 loss=2.47, ppl=5.54, wps=5862.1, ups=0.09, wpb=64765, bsz=128, num_updates=13063, lr=9.99035e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=150397
2021-06-20 12:25:34 | INFO | train_inner | epoch 005: 1133 / 3002 loss=2.557, ppl=5.89, wps=5791.3, ups=0.09, wpb=64886, bsz=128, num_updates=13064, lr=9.99035e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=150409
2021-06-20 12:25:46 | INFO | train_inner | epoch 005: 1134 / 3002 loss=2.526, ppl=5.76, wps=5791.5, ups=0.09, wpb=64764, bsz=128, num_updates=13065, lr=9.99035e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=150420
2021-06-20 12:25:57 | INFO | train_inner | epoch 005: 1135 / 3002 loss=2.632, ppl=6.2, wps=5746.5, ups=0.09, wpb=64849, bsz=128, num_updates=13066, lr=9.99035e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=150431
2021-06-20 12:26:08 | INFO | train_inner | epoch 005: 1136 / 3002 loss=2.605, ppl=6.08, wps=5789.6, ups=0.09, wpb=64852, bsz=128, num_updates=13067, lr=9.99035e-05, gnorm=2.051, loss_scale=8, train_wall=11, gb_free=2.8, wall=150442
2021-06-20 12:26:19 | INFO | train_inner | epoch 005: 1137 / 3002 loss=2.467, ppl=5.53, wps=5796, ups=0.09, wpb=64862, bsz=128, num_updates=13068, lr=9.99034e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=150454
2021-06-20 12:26:30 | INFO | train_inner | epoch 005: 1138 / 3002 loss=2.416, ppl=5.34, wps=5872.7, ups=0.09, wpb=64865, bsz=128, num_updates=13069, lr=9.99034e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=150465
2021-06-20 12:26:41 | INFO | train_inner | epoch 005: 1139 / 3002 loss=2.57, ppl=5.94, wps=5911, ups=0.09, wpb=64844, bsz=128, num_updates=13070, lr=9.99034e-05, gnorm=2.321, loss_scale=8, train_wall=11, gb_free=2.8, wall=150476
2021-06-20 12:26:52 | INFO | train_inner | epoch 005: 1140 / 3002 loss=2.579, ppl=5.97, wps=5844.8, ups=0.09, wpb=64884, bsz=128, num_updates=13071, lr=9.99034e-05, gnorm=2.195, loss_scale=8, train_wall=11, gb_free=2.8, wall=150487
2021-06-20 12:27:03 | INFO | train_inner | epoch 005: 1141 / 3002 loss=2.5, ppl=5.66, wps=5837, ups=0.09, wpb=64912, bsz=128, num_updates=13072, lr=9.99034e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=150498
2021-06-20 12:27:15 | INFO | train_inner | epoch 005: 1142 / 3002 loss=2.682, ppl=6.42, wps=5744.2, ups=0.09, wpb=64758, bsz=128, num_updates=13073, lr=9.99034e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=150509
2021-06-20 12:27:26 | INFO | train_inner | epoch 005: 1143 / 3002 loss=2.387, ppl=5.23, wps=5823.6, ups=0.09, wpb=64872, bsz=128, num_updates=13074, lr=9.99034e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=150520
2021-06-20 12:27:37 | INFO | train_inner | epoch 005: 1144 / 3002 loss=2.475, ppl=5.56, wps=5885.8, ups=0.09, wpb=64876, bsz=128, num_updates=13075, lr=9.99034e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=150531
2021-06-20 12:27:48 | INFO | train_inner | epoch 005: 1145 / 3002 loss=2.603, ppl=6.08, wps=5798.2, ups=0.09, wpb=64842, bsz=128, num_updates=13076, lr=9.99034e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=150542
2021-06-20 12:27:59 | INFO | train_inner | epoch 005: 1146 / 3002 loss=2.382, ppl=5.21, wps=5775.2, ups=0.09, wpb=64842, bsz=128, num_updates=13077, lr=9.99034e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=150554
2021-06-20 12:28:10 | INFO | train_inner | epoch 005: 1147 / 3002 loss=2.508, ppl=5.69, wps=5872.7, ups=0.09, wpb=64829, bsz=128, num_updates=13078, lr=9.99034e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=150565
2021-06-20 12:28:21 | INFO | train_inner | epoch 005: 1148 / 3002 loss=2.501, ppl=5.66, wps=5964.1, ups=0.09, wpb=64728, bsz=128, num_updates=13079, lr=9.99034e-05, gnorm=2.044, loss_scale=8, train_wall=10, gb_free=2.8, wall=150575
2021-06-20 12:28:32 | INFO | train_inner | epoch 005: 1149 / 3002 loss=2.299, ppl=4.92, wps=5848.1, ups=0.09, wpb=64934, bsz=128, num_updates=13080, lr=9.99034e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=150587
2021-06-20 12:28:43 | INFO | train_inner | epoch 005: 1150 / 3002 loss=2.579, ppl=5.98, wps=5776.2, ups=0.09, wpb=64886, bsz=128, num_updates=13081, lr=9.99033e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=150598
2021-06-20 12:28:55 | INFO | train_inner | epoch 005: 1151 / 3002 loss=2.536, ppl=5.8, wps=5777, ups=0.09, wpb=64749, bsz=128, num_updates=13082, lr=9.99033e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=150609
2021-06-20 12:29:06 | INFO | train_inner | epoch 005: 1152 / 3002 loss=2.549, ppl=5.85, wps=5911.6, ups=0.09, wpb=64814, bsz=128, num_updates=13083, lr=9.99033e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=150620
2021-06-20 12:29:17 | INFO | train_inner | epoch 005: 1153 / 3002 loss=2.609, ppl=6.1, wps=5797.6, ups=0.09, wpb=64836, bsz=128, num_updates=13084, lr=9.99033e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=150631
2021-06-20 12:29:28 | INFO | train_inner | epoch 005: 1154 / 3002 loss=2.417, ppl=5.34, wps=5934.8, ups=0.09, wpb=64943, bsz=128, num_updates=13085, lr=9.99033e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=150642
2021-06-20 12:29:39 | INFO | train_inner | epoch 005: 1155 / 3002 loss=2.448, ppl=5.46, wps=5852.5, ups=0.09, wpb=64884, bsz=128, num_updates=13086, lr=9.99033e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=150653
2021-06-20 12:29:50 | INFO | train_inner | epoch 005: 1156 / 3002 loss=2.452, ppl=5.47, wps=5883.4, ups=0.09, wpb=64754, bsz=128, num_updates=13087, lr=9.99033e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=150664
2021-06-20 12:30:01 | INFO | train_inner | epoch 005: 1157 / 3002 loss=2.469, ppl=5.54, wps=5839.9, ups=0.09, wpb=64864, bsz=128, num_updates=13088, lr=9.99033e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=150675
2021-06-20 12:30:12 | INFO | train_inner | epoch 005: 1158 / 3002 loss=2.369, ppl=5.17, wps=5803.8, ups=0.09, wpb=64831, bsz=128, num_updates=13089, lr=9.99033e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=150687
2021-06-20 12:30:23 | INFO | train_inner | epoch 005: 1159 / 3002 loss=2.473, ppl=5.55, wps=5893.8, ups=0.09, wpb=64871, bsz=128, num_updates=13090, lr=9.99033e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=150698
2021-06-20 12:30:34 | INFO | train_inner | epoch 005: 1160 / 3002 loss=2.522, ppl=5.75, wps=5749.3, ups=0.09, wpb=64770, bsz=128, num_updates=13091, lr=9.99033e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=150709
2021-06-20 12:30:46 | INFO | train_inner | epoch 005: 1161 / 3002 loss=2.544, ppl=5.83, wps=5778.2, ups=0.09, wpb=64831, bsz=128, num_updates=13092, lr=9.99033e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=150720
2021-06-20 12:30:57 | INFO | train_inner | epoch 005: 1162 / 3002 loss=2.5, ppl=5.66, wps=5828.8, ups=0.09, wpb=64791, bsz=128, num_updates=13093, lr=9.99032e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=150731
2021-06-20 12:31:08 | INFO | train_inner | epoch 005: 1163 / 3002 loss=2.544, ppl=5.83, wps=5836.5, ups=0.09, wpb=64844, bsz=128, num_updates=13094, lr=9.99032e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=150742
2021-06-20 12:31:19 | INFO | train_inner | epoch 005: 1164 / 3002 loss=2.548, ppl=5.85, wps=5802, ups=0.09, wpb=64800, bsz=128, num_updates=13095, lr=9.99032e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=150753
2021-06-20 12:31:30 | INFO | train_inner | epoch 005: 1165 / 3002 loss=2.471, ppl=5.55, wps=5914.4, ups=0.09, wpb=64965, bsz=128, num_updates=13096, lr=9.99032e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=150764
2021-06-20 12:31:41 | INFO | train_inner | epoch 005: 1166 / 3002 loss=2.432, ppl=5.4, wps=5788.2, ups=0.09, wpb=64766, bsz=128, num_updates=13097, lr=9.99032e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=150776
2021-06-20 12:31:52 | INFO | train_inner | epoch 005: 1167 / 3002 loss=2.499, ppl=5.65, wps=5818.6, ups=0.09, wpb=64794, bsz=128, num_updates=13098, lr=9.99032e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=150787
2021-06-20 12:32:04 | INFO | train_inner | epoch 005: 1168 / 3002 loss=2.697, ppl=6.49, wps=5790, ups=0.09, wpb=64848, bsz=128, num_updates=13099, lr=9.99032e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=150798
2021-06-20 12:32:15 | INFO | train_inner | epoch 005: 1169 / 3002 loss=2.518, ppl=5.73, wps=5903.1, ups=0.09, wpb=64928, bsz=128, num_updates=13100, lr=9.99032e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=150809
2021-06-20 12:32:26 | INFO | train_inner | epoch 005: 1170 / 3002 loss=2.593, ppl=6.03, wps=5863.5, ups=0.09, wpb=64938, bsz=128, num_updates=13101, lr=9.99032e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=150820
2021-06-20 12:32:36 | INFO | train_inner | epoch 005: 1171 / 3002 loss=2.531, ppl=5.78, wps=6044, ups=0.09, wpb=64852, bsz=128, num_updates=13102, lr=9.99032e-05, gnorm=2.012, loss_scale=8, train_wall=10, gb_free=2.8, wall=150831
2021-06-20 12:32:47 | INFO | train_inner | epoch 005: 1172 / 3002 loss=2.526, ppl=5.76, wps=5921.3, ups=0.09, wpb=64760, bsz=128, num_updates=13103, lr=9.99032e-05, gnorm=1.956, loss_scale=8, train_wall=10, gb_free=2.8, wall=150842
2021-06-20 12:32:59 | INFO | train_inner | epoch 005: 1173 / 3002 loss=2.622, ppl=6.16, wps=5739.9, ups=0.09, wpb=64705, bsz=128, num_updates=13104, lr=9.99032e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=150853
2021-06-20 12:33:10 | INFO | train_inner | epoch 005: 1174 / 3002 loss=2.647, ppl=6.27, wps=5909.4, ups=0.09, wpb=64776, bsz=128, num_updates=13105, lr=9.99032e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=150864
2021-06-20 12:33:20 | INFO | train_inner | epoch 005: 1175 / 3002 loss=2.51, ppl=5.7, wps=5966.6, ups=0.09, wpb=64853, bsz=128, num_updates=13106, lr=9.99031e-05, gnorm=1.892, loss_scale=8, train_wall=10, gb_free=2.8, wall=150875
2021-06-20 12:33:31 | INFO | train_inner | epoch 005: 1176 / 3002 loss=2.393, ppl=5.25, wps=5919.8, ups=0.09, wpb=64890, bsz=128, num_updates=13107, lr=9.99031e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=150886
2021-06-20 12:33:43 | INFO | train_inner | epoch 005: 1177 / 3002 loss=2.432, ppl=5.4, wps=5821.8, ups=0.09, wpb=64840, bsz=128, num_updates=13108, lr=9.99031e-05, gnorm=2.249, loss_scale=8, train_wall=11, gb_free=2.8, wall=150897
2021-06-20 12:33:54 | INFO | train_inner | epoch 005: 1178 / 3002 loss=2.582, ppl=5.99, wps=5858.1, ups=0.09, wpb=64793, bsz=128, num_updates=13109, lr=9.99031e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=150908
2021-06-20 12:34:05 | INFO | train_inner | epoch 005: 1179 / 3002 loss=2.562, ppl=5.9, wps=5926.6, ups=0.09, wpb=64814, bsz=128, num_updates=13110, lr=9.99031e-05, gnorm=2.111, loss_scale=8, train_wall=10, gb_free=2.8, wall=150919
2021-06-20 12:34:15 | INFO | train_inner | epoch 005: 1180 / 3002 loss=2.246, ppl=4.74, wps=6048, ups=0.09, wpb=64812, bsz=128, num_updates=13111, lr=9.99031e-05, gnorm=1.97, loss_scale=8, train_wall=10, gb_free=2.8, wall=150930
2021-06-20 12:34:26 | INFO | train_inner | epoch 005: 1181 / 3002 loss=2.446, ppl=5.45, wps=5859, ups=0.09, wpb=64853, bsz=128, num_updates=13112, lr=9.99031e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=150941
2021-06-20 12:34:37 | INFO | train_inner | epoch 005: 1182 / 3002 loss=2.341, ppl=5.07, wps=5870.2, ups=0.09, wpb=64835, bsz=128, num_updates=13113, lr=9.99031e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=150952
2021-06-20 12:34:48 | INFO | train_inner | epoch 005: 1183 / 3002 loss=2.75, ppl=6.73, wps=5826, ups=0.09, wpb=64841, bsz=128, num_updates=13114, lr=9.99031e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=150963
2021-06-20 12:34:59 | INFO | train_inner | epoch 005: 1184 / 3002 loss=2.505, ppl=5.67, wps=5914.9, ups=0.09, wpb=64888, bsz=128, num_updates=13115, lr=9.99031e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=150974
2021-06-20 12:35:11 | INFO | train_inner | epoch 005: 1185 / 3002 loss=2.564, ppl=5.91, wps=5842.6, ups=0.09, wpb=64873, bsz=128, num_updates=13116, lr=9.99031e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=150985
2021-06-20 12:35:21 | INFO | train_inner | epoch 005: 1186 / 3002 loss=2.504, ppl=5.67, wps=6059.7, ups=0.09, wpb=64783, bsz=128, num_updates=13117, lr=9.99031e-05, gnorm=1.985, loss_scale=8, train_wall=10, gb_free=2.8, wall=150996
2021-06-20 12:35:32 | INFO | train_inner | epoch 005: 1187 / 3002 loss=2.554, ppl=5.87, wps=5795.4, ups=0.09, wpb=64876, bsz=128, num_updates=13118, lr=9.9903e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=151007
2021-06-20 12:35:43 | INFO | train_inner | epoch 005: 1188 / 3002 loss=2.388, ppl=5.24, wps=5872.2, ups=0.09, wpb=64808, bsz=128, num_updates=13119, lr=9.9903e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=151018
2021-06-20 12:35:54 | INFO | train_inner | epoch 005: 1189 / 3002 loss=2.426, ppl=5.37, wps=5930.1, ups=0.09, wpb=64902, bsz=128, num_updates=13120, lr=9.9903e-05, gnorm=1.87, loss_scale=8, train_wall=10, gb_free=2.8, wall=151029
2021-06-20 12:36:06 | INFO | train_inner | epoch 005: 1190 / 3002 loss=2.644, ppl=6.25, wps=5678.9, ups=0.09, wpb=64728, bsz=128, num_updates=13121, lr=9.9903e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=151040
2021-06-20 12:36:17 | INFO | train_inner | epoch 005: 1191 / 3002 loss=2.483, ppl=5.59, wps=5876, ups=0.09, wpb=64743, bsz=128, num_updates=13122, lr=9.9903e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=151051
2021-06-20 12:36:28 | INFO | train_inner | epoch 005: 1192 / 3002 loss=2.471, ppl=5.55, wps=5863.3, ups=0.09, wpb=64861, bsz=128, num_updates=13123, lr=9.9903e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=151062
2021-06-20 12:36:39 | INFO | train_inner | epoch 005: 1193 / 3002 loss=2.463, ppl=5.51, wps=5737.3, ups=0.09, wpb=64854, bsz=128, num_updates=13124, lr=9.9903e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=151074
2021-06-20 12:36:50 | INFO | train_inner | epoch 005: 1194 / 3002 loss=2.363, ppl=5.14, wps=5763, ups=0.09, wpb=64804, bsz=128, num_updates=13125, lr=9.9903e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=151085
2021-06-20 12:37:02 | INFO | train_inner | epoch 005: 1195 / 3002 loss=2.528, ppl=5.77, wps=5847.8, ups=0.09, wpb=64806, bsz=128, num_updates=13126, lr=9.9903e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=151096
2021-06-20 12:37:13 | INFO | train_inner | epoch 005: 1196 / 3002 loss=2.475, ppl=5.56, wps=5889.8, ups=0.09, wpb=64790, bsz=128, num_updates=13127, lr=9.9903e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=151107
2021-06-20 12:37:24 | INFO | train_inner | epoch 005: 1197 / 3002 loss=2.475, ppl=5.56, wps=5857.2, ups=0.09, wpb=64869, bsz=128, num_updates=13128, lr=9.9903e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=151118
2021-06-20 12:37:35 | INFO | train_inner | epoch 005: 1198 / 3002 loss=2.46, ppl=5.5, wps=5891.1, ups=0.09, wpb=64904, bsz=128, num_updates=13129, lr=9.9903e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=151129
2021-06-20 12:37:46 | INFO | train_inner | epoch 005: 1199 / 3002 loss=2.603, ppl=6.08, wps=5816.4, ups=0.09, wpb=64871, bsz=128, num_updates=13130, lr=9.9903e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=151140
2021-06-20 12:37:57 | INFO | train_inner | epoch 005: 1200 / 3002 loss=2.632, ppl=6.2, wps=5769.1, ups=0.09, wpb=64812, bsz=128, num_updates=13131, lr=9.99029e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=151151
2021-06-20 12:38:08 | INFO | train_inner | epoch 005: 1201 / 3002 loss=2.47, ppl=5.54, wps=5866.5, ups=0.09, wpb=64876, bsz=128, num_updates=13132, lr=9.99029e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=151162
2021-06-20 12:38:19 | INFO | train_inner | epoch 005: 1202 / 3002 loss=2.473, ppl=5.55, wps=5730.5, ups=0.09, wpb=64858, bsz=128, num_updates=13133, lr=9.99029e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=151174
2021-06-20 12:38:30 | INFO | train_inner | epoch 005: 1203 / 3002 loss=2.702, ppl=6.51, wps=5913.8, ups=0.09, wpb=64817, bsz=128, num_updates=13134, lr=9.99029e-05, gnorm=2.012, loss_scale=8, train_wall=10, gb_free=2.8, wall=151185
2021-06-20 12:38:41 | INFO | train_inner | epoch 005: 1204 / 3002 loss=2.604, ppl=6.08, wps=5874.3, ups=0.09, wpb=64794, bsz=128, num_updates=13135, lr=9.99029e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=151196
2021-06-20 12:38:52 | INFO | train_inner | epoch 005: 1205 / 3002 loss=2.504, ppl=5.67, wps=5839.8, ups=0.09, wpb=64832, bsz=128, num_updates=13136, lr=9.99029e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=151207
2021-06-20 12:39:04 | INFO | train_inner | epoch 005: 1206 / 3002 loss=2.432, ppl=5.4, wps=5694.9, ups=0.09, wpb=64883, bsz=128, num_updates=13137, lr=9.99029e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=151218
2021-06-20 12:39:15 | INFO | train_inner | epoch 005: 1207 / 3002 loss=2.652, ppl=6.28, wps=5802.9, ups=0.09, wpb=64786, bsz=128, num_updates=13138, lr=9.99029e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=151229
2021-06-20 12:39:26 | INFO | train_inner | epoch 005: 1208 / 3002 loss=2.454, ppl=5.48, wps=5814.1, ups=0.09, wpb=64846, bsz=128, num_updates=13139, lr=9.99029e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=151241
2021-06-20 12:39:37 | INFO | train_inner | epoch 005: 1209 / 3002 loss=2.368, ppl=5.16, wps=5776.9, ups=0.09, wpb=64804, bsz=128, num_updates=13140, lr=9.99029e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=151252
2021-06-20 12:39:49 | INFO | train_inner | epoch 005: 1210 / 3002 loss=2.516, ppl=5.72, wps=5801, ups=0.09, wpb=64792, bsz=128, num_updates=13141, lr=9.99029e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=151263
2021-06-20 12:40:00 | INFO | train_inner | epoch 005: 1211 / 3002 loss=2.553, ppl=5.87, wps=5864, ups=0.09, wpb=64793, bsz=128, num_updates=13142, lr=9.99029e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=151274
2021-06-20 12:40:10 | INFO | train_inner | epoch 005: 1212 / 3002 loss=2.574, ppl=5.95, wps=6003.2, ups=0.09, wpb=64913, bsz=128, num_updates=13143, lr=9.99028e-05, gnorm=1.954, loss_scale=8, train_wall=10, gb_free=2.8, wall=151285
2021-06-20 12:40:21 | INFO | train_inner | epoch 005: 1213 / 3002 loss=2.751, ppl=6.73, wps=5907.4, ups=0.09, wpb=64815, bsz=128, num_updates=13144, lr=9.99028e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=151296
2021-06-20 12:40:33 | INFO | train_inner | epoch 005: 1214 / 3002 loss=2.461, ppl=5.51, wps=5808.3, ups=0.09, wpb=64818, bsz=128, num_updates=13145, lr=9.99028e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=151307
2021-06-20 12:40:44 | INFO | train_inner | epoch 005: 1215 / 3002 loss=2.523, ppl=5.75, wps=5802.4, ups=0.09, wpb=64816, bsz=128, num_updates=13146, lr=9.99028e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=151318
2021-06-20 12:40:55 | INFO | train_inner | epoch 005: 1216 / 3002 loss=2.46, ppl=5.5, wps=5813.5, ups=0.09, wpb=64863, bsz=128, num_updates=13147, lr=9.99028e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=151329
2021-06-20 12:41:06 | INFO | train_inner | epoch 005: 1217 / 3002 loss=2.468, ppl=5.53, wps=5737.3, ups=0.09, wpb=64813, bsz=128, num_updates=13148, lr=9.99028e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=151341
2021-06-20 12:41:17 | INFO | train_inner | epoch 005: 1218 / 3002 loss=2.455, ppl=5.48, wps=5804.7, ups=0.09, wpb=64797, bsz=128, num_updates=13149, lr=9.99028e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=151352
2021-06-20 12:41:29 | INFO | train_inner | epoch 005: 1219 / 3002 loss=2.505, ppl=5.68, wps=5755.4, ups=0.09, wpb=64859, bsz=128, num_updates=13150, lr=9.99028e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=151363
2021-06-20 12:41:40 | INFO | train_inner | epoch 005: 1220 / 3002 loss=2.457, ppl=5.49, wps=5891.7, ups=0.09, wpb=64816, bsz=128, num_updates=13151, lr=9.99028e-05, gnorm=2.082, loss_scale=8, train_wall=11, gb_free=2.8, wall=151374
2021-06-20 12:41:51 | INFO | train_inner | epoch 005: 1221 / 3002 loss=2.416, ppl=5.34, wps=5796.4, ups=0.09, wpb=64811, bsz=128, num_updates=13152, lr=9.99028e-05, gnorm=2.024, loss_scale=16, train_wall=11, gb_free=2.8, wall=151385
2021-06-20 12:42:02 | INFO | train_inner | epoch 005: 1222 / 3002 loss=2.404, ppl=5.29, wps=5857.9, ups=0.09, wpb=64781, bsz=128, num_updates=13153, lr=9.99028e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=151396
2021-06-20 12:42:13 | INFO | train_inner | epoch 005: 1223 / 3002 loss=2.618, ppl=6.14, wps=5774, ups=0.09, wpb=64803, bsz=128, num_updates=13154, lr=9.99028e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=151407
2021-06-20 12:42:24 | INFO | train_inner | epoch 005: 1224 / 3002 loss=2.616, ppl=6.13, wps=5908.6, ups=0.09, wpb=64850, bsz=128, num_updates=13155, lr=9.99028e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=151418
2021-06-20 12:42:35 | INFO | train_inner | epoch 005: 1225 / 3002 loss=2.664, ppl=6.34, wps=5860.6, ups=0.09, wpb=64854, bsz=128, num_updates=13156, lr=9.99027e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=151429
2021-06-20 12:42:46 | INFO | train_inner | epoch 005: 1226 / 3002 loss=2.448, ppl=5.46, wps=5942.4, ups=0.09, wpb=64845, bsz=128, num_updates=13157, lr=9.99027e-05, gnorm=1.944, loss_scale=16, train_wall=10, gb_free=2.8, wall=151440
2021-06-20 12:42:57 | INFO | train_inner | epoch 005: 1227 / 3002 loss=2.65, ppl=6.28, wps=5872.1, ups=0.09, wpb=64833, bsz=128, num_updates=13158, lr=9.99027e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=151451
2021-06-20 12:43:08 | INFO | train_inner | epoch 005: 1228 / 3002 loss=2.548, ppl=5.85, wps=5842.5, ups=0.09, wpb=64773, bsz=128, num_updates=13159, lr=9.99027e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=151463
2021-06-20 12:43:19 | INFO | train_inner | epoch 005: 1229 / 3002 loss=2.488, ppl=5.61, wps=5828.7, ups=0.09, wpb=64820, bsz=128, num_updates=13160, lr=9.99027e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=151474
2021-06-20 12:43:30 | INFO | train_inner | epoch 005: 1230 / 3002 loss=2.507, ppl=5.69, wps=5830.8, ups=0.09, wpb=64837, bsz=128, num_updates=13161, lr=9.99027e-05, gnorm=1.961, loss_scale=16, train_wall=11, gb_free=2.8, wall=151485
2021-06-20 12:43:41 | INFO | train_inner | epoch 005: 1231 / 3002 loss=2.462, ppl=5.51, wps=6009.4, ups=0.09, wpb=64855, bsz=128, num_updates=13162, lr=9.99027e-05, gnorm=2.117, loss_scale=16, train_wall=10, gb_free=2.8, wall=151496
2021-06-20 12:43:52 | INFO | train_inner | epoch 005: 1232 / 3002 loss=2.493, ppl=5.63, wps=5941.8, ups=0.09, wpb=64904, bsz=128, num_updates=13163, lr=9.99027e-05, gnorm=2.07, loss_scale=16, train_wall=10, gb_free=2.8, wall=151506
2021-06-20 12:44:03 | INFO | train_inner | epoch 005: 1233 / 3002 loss=2.429, ppl=5.39, wps=5933.7, ups=0.09, wpb=64824, bsz=128, num_updates=13164, lr=9.99027e-05, gnorm=2.068, loss_scale=16, train_wall=10, gb_free=2.8, wall=151517
2021-06-20 12:44:14 | INFO | train_inner | epoch 005: 1234 / 3002 loss=2.457, ppl=5.49, wps=5938.4, ups=0.09, wpb=64781, bsz=128, num_updates=13165, lr=9.99027e-05, gnorm=2.014, loss_scale=16, train_wall=10, gb_free=2.8, wall=151528
2021-06-20 12:44:25 | INFO | train_inner | epoch 005: 1235 / 3002 loss=2.598, ppl=6.05, wps=5785.3, ups=0.09, wpb=64779, bsz=128, num_updates=13166, lr=9.99027e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=151539
2021-06-20 12:44:36 | INFO | train_inner | epoch 005: 1236 / 3002 loss=2.809, ppl=7.01, wps=5774.8, ups=0.09, wpb=64814, bsz=128, num_updates=13167, lr=9.99027e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=151551
2021-06-20 12:44:48 | INFO | train_inner | epoch 005: 1237 / 3002 loss=2.442, ppl=5.43, wps=5812.9, ups=0.09, wpb=64814, bsz=128, num_updates=13168, lr=9.99026e-05, gnorm=2.122, loss_scale=16, train_wall=11, gb_free=2.8, wall=151562
2021-06-20 12:44:59 | INFO | train_inner | epoch 005: 1238 / 3002 loss=2.432, ppl=5.39, wps=5740.5, ups=0.09, wpb=64784, bsz=128, num_updates=13169, lr=9.99026e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=151573
2021-06-20 12:45:10 | INFO | train_inner | epoch 005: 1239 / 3002 loss=2.494, ppl=5.63, wps=5844.5, ups=0.09, wpb=64797, bsz=128, num_updates=13170, lr=9.99026e-05, gnorm=2.112, loss_scale=16, train_wall=11, gb_free=2.8, wall=151584
2021-06-20 12:45:21 | INFO | train_inner | epoch 005: 1240 / 3002 loss=2.551, ppl=5.86, wps=5873.6, ups=0.09, wpb=64829, bsz=128, num_updates=13171, lr=9.99026e-05, gnorm=2.085, loss_scale=16, train_wall=11, gb_free=2.8, wall=151595
2021-06-20 12:45:32 | INFO | train_inner | epoch 005: 1241 / 3002 loss=2.454, ppl=5.48, wps=5820.8, ups=0.09, wpb=64923, bsz=128, num_updates=13172, lr=9.99026e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=151606
2021-06-20 12:45:43 | INFO | train_inner | epoch 005: 1242 / 3002 loss=2.514, ppl=5.71, wps=5842.4, ups=0.09, wpb=64798, bsz=128, num_updates=13173, lr=9.99026e-05, gnorm=2.033, loss_scale=16, train_wall=11, gb_free=2.8, wall=151618
2021-06-20 12:45:54 | INFO | train_inner | epoch 005: 1243 / 3002 loss=2.513, ppl=5.71, wps=5805.8, ups=0.09, wpb=64870, bsz=128, num_updates=13174, lr=9.99026e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=151629
2021-06-20 12:46:05 | INFO | train_inner | epoch 005: 1244 / 3002 loss=2.503, ppl=5.67, wps=5852.4, ups=0.09, wpb=64840, bsz=128, num_updates=13175, lr=9.99026e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=151640
2021-06-20 12:46:17 | INFO | train_inner | epoch 005: 1245 / 3002 loss=2.448, ppl=5.46, wps=5841.3, ups=0.09, wpb=64856, bsz=128, num_updates=13176, lr=9.99026e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=151651
2021-06-20 12:46:28 | INFO | train_inner | epoch 005: 1246 / 3002 loss=2.473, ppl=5.55, wps=5742.3, ups=0.09, wpb=64919, bsz=128, num_updates=13177, lr=9.99026e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=151662
2021-06-20 12:46:39 | INFO | train_inner | epoch 005: 1247 / 3002 loss=2.489, ppl=5.61, wps=5791.8, ups=0.09, wpb=64801, bsz=128, num_updates=13178, lr=9.99026e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=151673
2021-06-20 12:46:50 | INFO | train_inner | epoch 005: 1248 / 3002 loss=2.594, ppl=6.04, wps=5874.3, ups=0.09, wpb=64784, bsz=128, num_updates=13179, lr=9.99026e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=151684
2021-06-20 12:47:01 | INFO | train_inner | epoch 005: 1249 / 3002 loss=2.382, ppl=5.21, wps=5798.7, ups=0.09, wpb=64878, bsz=128, num_updates=13180, lr=9.99026e-05, gnorm=2.039, loss_scale=16, train_wall=11, gb_free=2.8, wall=151696
2021-06-20 12:47:12 | INFO | train_inner | epoch 005: 1250 / 3002 loss=2.642, ppl=6.24, wps=5800.8, ups=0.09, wpb=64780, bsz=128, num_updates=13181, lr=9.99025e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=151707
2021-06-20 12:47:23 | INFO | train_inner | epoch 005: 1251 / 3002 loss=2.553, ppl=5.87, wps=5951.7, ups=0.09, wpb=64864, bsz=128, num_updates=13182, lr=9.99025e-05, gnorm=2.062, loss_scale=16, train_wall=10, gb_free=2.8, wall=151718
2021-06-20 12:47:34 | INFO | train_inner | epoch 005: 1252 / 3002 loss=2.53, ppl=5.77, wps=5851.9, ups=0.09, wpb=64771, bsz=128, num_updates=13183, lr=9.99025e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=151729
2021-06-20 12:47:46 | INFO | train_inner | epoch 005: 1253 / 3002 loss=2.385, ppl=5.22, wps=5822, ups=0.09, wpb=64871, bsz=128, num_updates=13184, lr=9.99025e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=151740
2021-06-20 12:47:57 | INFO | train_inner | epoch 005: 1254 / 3002 loss=2.669, ppl=6.36, wps=5820.7, ups=0.09, wpb=64786, bsz=128, num_updates=13185, lr=9.99025e-05, gnorm=2.12, loss_scale=16, train_wall=11, gb_free=2.8, wall=151751
2021-06-20 12:48:08 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 12:48:19 | INFO | train_inner | epoch 005: 1256 / 3002 loss=2.54, ppl=5.82, wps=2894.8, ups=0.04, wpb=64853, bsz=128, num_updates=13186, lr=9.99025e-05, gnorm=1.961, loss_scale=8, train_wall=21, gb_free=2.8, wall=151773
2021-06-20 12:48:30 | INFO | train_inner | epoch 005: 1257 / 3002 loss=2.459, ppl=5.5, wps=5906.3, ups=0.09, wpb=64814, bsz=128, num_updates=13187, lr=9.99025e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=151784
2021-06-20 12:48:41 | INFO | train_inner | epoch 005: 1258 / 3002 loss=2.458, ppl=5.49, wps=5938.7, ups=0.09, wpb=64889, bsz=128, num_updates=13188, lr=9.99025e-05, gnorm=1.98, loss_scale=8, train_wall=10, gb_free=2.8, wall=151795
2021-06-20 12:48:52 | INFO | train_inner | epoch 005: 1259 / 3002 loss=2.497, ppl=5.65, wps=5765.8, ups=0.09, wpb=64863, bsz=128, num_updates=13189, lr=9.99025e-05, gnorm=1.914, loss_scale=8, train_wall=11, gb_free=2.8, wall=151807
2021-06-20 12:49:03 | INFO | train_inner | epoch 005: 1260 / 3002 loss=2.622, ppl=6.16, wps=5790.9, ups=0.09, wpb=64785, bsz=128, num_updates=13190, lr=9.99025e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=151818
2021-06-20 12:49:15 | INFO | train_inner | epoch 005: 1261 / 3002 loss=2.508, ppl=5.69, wps=5727.2, ups=0.09, wpb=64788, bsz=128, num_updates=13191, lr=9.99025e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=151829
2021-06-20 12:49:26 | INFO | train_inner | epoch 005: 1262 / 3002 loss=2.577, ppl=5.97, wps=5769.3, ups=0.09, wpb=64886, bsz=128, num_updates=13192, lr=9.99025e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=151840
2021-06-20 12:49:37 | INFO | train_inner | epoch 005: 1263 / 3002 loss=2.44, ppl=5.43, wps=5926, ups=0.09, wpb=64927, bsz=128, num_updates=13193, lr=9.99024e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=151851
2021-06-20 12:49:48 | INFO | train_inner | epoch 005: 1264 / 3002 loss=2.636, ppl=6.22, wps=5792.3, ups=0.09, wpb=64838, bsz=128, num_updates=13194, lr=9.99024e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=151862
2021-06-20 12:49:59 | INFO | train_inner | epoch 005: 1265 / 3002 loss=2.4, ppl=5.28, wps=5811.9, ups=0.09, wpb=64745, bsz=128, num_updates=13195, lr=9.99024e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=151874
2021-06-20 12:50:10 | INFO | train_inner | epoch 005: 1266 / 3002 loss=2.575, ppl=5.96, wps=5888.5, ups=0.09, wpb=64837, bsz=128, num_updates=13196, lr=9.99024e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=151885
2021-06-20 12:50:21 | INFO | train_inner | epoch 005: 1267 / 3002 loss=2.705, ppl=6.52, wps=5866.6, ups=0.09, wpb=64856, bsz=128, num_updates=13197, lr=9.99024e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=151896
2021-06-20 12:50:32 | INFO | train_inner | epoch 005: 1268 / 3002 loss=2.538, ppl=5.81, wps=5896.6, ups=0.09, wpb=64809, bsz=128, num_updates=13198, lr=9.99024e-05, gnorm=2.082, loss_scale=8, train_wall=11, gb_free=2.8, wall=151907
2021-06-20 12:50:43 | INFO | train_inner | epoch 005: 1269 / 3002 loss=2.369, ppl=5.16, wps=5832.7, ups=0.09, wpb=64846, bsz=128, num_updates=13199, lr=9.99024e-05, gnorm=1.871, loss_scale=8, train_wall=11, gb_free=2.8, wall=151918
2021-06-20 12:50:55 | INFO | train_inner | epoch 005: 1270 / 3002 loss=2.456, ppl=5.49, wps=5853.3, ups=0.09, wpb=64856, bsz=128, num_updates=13200, lr=9.99024e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=151929
2021-06-20 12:51:05 | INFO | train_inner | epoch 005: 1271 / 3002 loss=2.47, ppl=5.54, wps=5939.6, ups=0.09, wpb=64762, bsz=128, num_updates=13201, lr=9.99024e-05, gnorm=1.98, loss_scale=8, train_wall=10, gb_free=2.8, wall=151940
2021-06-20 12:51:17 | INFO | train_inner | epoch 005: 1272 / 3002 loss=2.54, ppl=5.81, wps=5848.2, ups=0.09, wpb=64859, bsz=128, num_updates=13202, lr=9.99024e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=151951
2021-06-20 12:51:28 | INFO | train_inner | epoch 005: 1273 / 3002 loss=2.572, ppl=5.95, wps=5768, ups=0.09, wpb=64877, bsz=128, num_updates=13203, lr=9.99024e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=151962
2021-06-20 12:51:39 | INFO | train_inner | epoch 005: 1274 / 3002 loss=2.385, ppl=5.22, wps=5725.9, ups=0.09, wpb=64877, bsz=128, num_updates=13204, lr=9.99024e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=151973
2021-06-20 12:51:50 | INFO | train_inner | epoch 005: 1275 / 3002 loss=2.561, ppl=5.9, wps=5804.1, ups=0.09, wpb=64769, bsz=128, num_updates=13205, lr=9.99024e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=151985
2021-06-20 12:52:01 | INFO | train_inner | epoch 005: 1276 / 3002 loss=2.45, ppl=5.46, wps=5918.3, ups=0.09, wpb=64812, bsz=128, num_updates=13206, lr=9.99023e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=151996
2021-06-20 12:52:12 | INFO | train_inner | epoch 005: 1277 / 3002 loss=2.637, ppl=6.22, wps=5766.1, ups=0.09, wpb=64845, bsz=128, num_updates=13207, lr=9.99023e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=152007
2021-06-20 12:52:24 | INFO | train_inner | epoch 005: 1278 / 3002 loss=2.714, ppl=6.56, wps=5805.2, ups=0.09, wpb=64820, bsz=128, num_updates=13208, lr=9.99023e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=152018
2021-06-20 12:52:34 | INFO | train_inner | epoch 005: 1279 / 3002 loss=2.6, ppl=6.06, wps=5998.2, ups=0.09, wpb=64866, bsz=128, num_updates=13209, lr=9.99023e-05, gnorm=1.901, loss_scale=8, train_wall=10, gb_free=2.8, wall=152029
2021-06-20 12:52:46 | INFO | train_inner | epoch 005: 1280 / 3002 loss=2.499, ppl=5.65, wps=5721.2, ups=0.09, wpb=64779, bsz=128, num_updates=13210, lr=9.99023e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=152040
2021-06-20 12:52:57 | INFO | train_inner | epoch 005: 1281 / 3002 loss=2.548, ppl=5.85, wps=5837.3, ups=0.09, wpb=64845, bsz=128, num_updates=13211, lr=9.99023e-05, gnorm=2.26, loss_scale=8, train_wall=11, gb_free=2.8, wall=152051
2021-06-20 12:53:08 | INFO | train_inner | epoch 005: 1282 / 3002 loss=2.547, ppl=5.84, wps=5932.1, ups=0.09, wpb=64893, bsz=128, num_updates=13212, lr=9.99023e-05, gnorm=2.034, loss_scale=8, train_wall=10, gb_free=2.8, wall=152062
2021-06-20 12:53:19 | INFO | train_inner | epoch 005: 1283 / 3002 loss=2.394, ppl=5.26, wps=5843.6, ups=0.09, wpb=64779, bsz=128, num_updates=13213, lr=9.99023e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=152073
2021-06-20 12:53:30 | INFO | train_inner | epoch 005: 1284 / 3002 loss=2.442, ppl=5.44, wps=5831, ups=0.09, wpb=64721, bsz=128, num_updates=13214, lr=9.99023e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=152084
2021-06-20 12:53:41 | INFO | train_inner | epoch 005: 1285 / 3002 loss=2.448, ppl=5.46, wps=6040.5, ups=0.09, wpb=64835, bsz=128, num_updates=13215, lr=9.99023e-05, gnorm=1.972, loss_scale=8, train_wall=10, gb_free=2.8, wall=152095
2021-06-20 12:53:52 | INFO | train_inner | epoch 005: 1286 / 3002 loss=2.458, ppl=5.49, wps=5937, ups=0.09, wpb=64882, bsz=128, num_updates=13216, lr=9.99023e-05, gnorm=1.979, loss_scale=8, train_wall=10, gb_free=2.8, wall=152106
2021-06-20 12:54:03 | INFO | train_inner | epoch 005: 1287 / 3002 loss=2.477, ppl=5.57, wps=5764.4, ups=0.09, wpb=64856, bsz=128, num_updates=13217, lr=9.99023e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=152117
2021-06-20 12:54:14 | INFO | train_inner | epoch 005: 1288 / 3002 loss=2.412, ppl=5.32, wps=5827.9, ups=0.09, wpb=64818, bsz=128, num_updates=13218, lr=9.99022e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=152128
2021-06-20 12:54:25 | INFO | train_inner | epoch 005: 1289 / 3002 loss=2.379, ppl=5.2, wps=5813.5, ups=0.09, wpb=64859, bsz=128, num_updates=13219, lr=9.99022e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=152140
2021-06-20 12:54:36 | INFO | train_inner | epoch 005: 1290 / 3002 loss=2.458, ppl=5.49, wps=5873.5, ups=0.09, wpb=64846, bsz=128, num_updates=13220, lr=9.99022e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=152151
2021-06-20 12:54:47 | INFO | train_inner | epoch 005: 1291 / 3002 loss=2.375, ppl=5.19, wps=5873.5, ups=0.09, wpb=64813, bsz=128, num_updates=13221, lr=9.99022e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=152162
2021-06-20 12:54:59 | INFO | train_inner | epoch 005: 1292 / 3002 loss=2.416, ppl=5.34, wps=5756.3, ups=0.09, wpb=64796, bsz=128, num_updates=13222, lr=9.99022e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=152173
2021-06-20 12:55:10 | INFO | train_inner | epoch 005: 1293 / 3002 loss=2.582, ppl=5.99, wps=5827.7, ups=0.09, wpb=64842, bsz=128, num_updates=13223, lr=9.99022e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=152184
2021-06-20 12:55:21 | INFO | train_inner | epoch 005: 1294 / 3002 loss=2.627, ppl=6.18, wps=5886.6, ups=0.09, wpb=64795, bsz=128, num_updates=13224, lr=9.99022e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=152195
2021-06-20 12:55:32 | INFO | train_inner | epoch 005: 1295 / 3002 loss=2.616, ppl=6.13, wps=5864.3, ups=0.09, wpb=64853, bsz=128, num_updates=13225, lr=9.99022e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=152206
2021-06-20 12:55:43 | INFO | train_inner | epoch 005: 1296 / 3002 loss=2.642, ppl=6.24, wps=5924.1, ups=0.09, wpb=64826, bsz=128, num_updates=13226, lr=9.99022e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=152217
2021-06-20 12:55:53 | INFO | train_inner | epoch 005: 1297 / 3002 loss=2.489, ppl=5.62, wps=6012.1, ups=0.09, wpb=64741, bsz=128, num_updates=13227, lr=9.99022e-05, gnorm=2, loss_scale=8, train_wall=10, gb_free=2.8, wall=152228
2021-06-20 12:56:04 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-20 12:56:16 | INFO | train_inner | epoch 005: 1299 / 3002 loss=2.601, ppl=6.07, wps=2925, ups=0.05, wpb=64724, bsz=128, num_updates=13228, lr=9.99022e-05, gnorm=2.033, loss_scale=4, train_wall=21, gb_free=2.8, wall=152250
2021-06-20 12:56:27 | INFO | train_inner | epoch 005: 1300 / 3002 loss=2.429, ppl=5.39, wps=5916.9, ups=0.09, wpb=64868, bsz=128, num_updates=13229, lr=9.99022e-05, gnorm=1.881, loss_scale=4, train_wall=10, gb_free=2.8, wall=152261
2021-06-20 12:56:38 | INFO | train_inner | epoch 005: 1301 / 3002 loss=2.642, ppl=6.24, wps=5825.5, ups=0.09, wpb=64891, bsz=128, num_updates=13230, lr=9.99022e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=152272
2021-06-20 12:56:49 | INFO | train_inner | epoch 005: 1302 / 3002 loss=2.545, ppl=5.83, wps=5810, ups=0.09, wpb=64842, bsz=128, num_updates=13231, lr=9.99021e-05, gnorm=1.994, loss_scale=4, train_wall=11, gb_free=2.8, wall=152283
2021-06-20 12:57:00 | INFO | train_inner | epoch 005: 1303 / 3002 loss=2.425, ppl=5.37, wps=5901.2, ups=0.09, wpb=64855, bsz=128, num_updates=13232, lr=9.99021e-05, gnorm=2.406, loss_scale=4, train_wall=11, gb_free=2.8, wall=152294
2021-06-20 12:57:11 | INFO | train_inner | epoch 005: 1304 / 3002 loss=2.462, ppl=5.51, wps=5822.1, ups=0.09, wpb=64823, bsz=128, num_updates=13233, lr=9.99021e-05, gnorm=1.866, loss_scale=4, train_wall=11, gb_free=2.8, wall=152305
2021-06-20 12:57:22 | INFO | train_inner | epoch 005: 1305 / 3002 loss=2.541, ppl=5.82, wps=5854.5, ups=0.09, wpb=64823, bsz=128, num_updates=13234, lr=9.99021e-05, gnorm=1.953, loss_scale=4, train_wall=11, gb_free=2.8, wall=152316
2021-06-20 12:57:33 | INFO | train_inner | epoch 005: 1306 / 3002 loss=2.501, ppl=5.66, wps=5890.4, ups=0.09, wpb=64903, bsz=128, num_updates=13235, lr=9.99021e-05, gnorm=2.082, loss_scale=4, train_wall=11, gb_free=2.8, wall=152327
2021-06-20 12:57:44 | INFO | train_inner | epoch 005: 1307 / 3002 loss=2.467, ppl=5.53, wps=5827.9, ups=0.09, wpb=64922, bsz=128, num_updates=13236, lr=9.99021e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=152339
2021-06-20 12:57:55 | INFO | train_inner | epoch 005: 1308 / 3002 loss=2.415, ppl=5.33, wps=5893.8, ups=0.09, wpb=64884, bsz=128, num_updates=13237, lr=9.99021e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=152350
2021-06-20 12:58:06 | INFO | train_inner | epoch 005: 1309 / 3002 loss=2.538, ppl=5.81, wps=5882.3, ups=0.09, wpb=64865, bsz=128, num_updates=13238, lr=9.99021e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=152361
2021-06-20 12:58:17 | INFO | train_inner | epoch 005: 1310 / 3002 loss=2.482, ppl=5.59, wps=5901.2, ups=0.09, wpb=64848, bsz=128, num_updates=13239, lr=9.99021e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=152372
2021-06-20 12:58:28 | INFO | train_inner | epoch 005: 1311 / 3002 loss=2.526, ppl=5.76, wps=5900.7, ups=0.09, wpb=64785, bsz=128, num_updates=13240, lr=9.99021e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=152383
2021-06-20 12:58:39 | INFO | train_inner | epoch 005: 1312 / 3002 loss=2.442, ppl=5.44, wps=5823.4, ups=0.09, wpb=64865, bsz=128, num_updates=13241, lr=9.99021e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=152394
2021-06-20 12:58:50 | INFO | train_inner | epoch 005: 1313 / 3002 loss=2.431, ppl=5.39, wps=5802.1, ups=0.09, wpb=64845, bsz=128, num_updates=13242, lr=9.99021e-05, gnorm=2.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=152405
2021-06-20 12:59:01 | INFO | train_inner | epoch 005: 1314 / 3002 loss=2.57, ppl=5.94, wps=5893.5, ups=0.09, wpb=64835, bsz=128, num_updates=13243, lr=9.9902e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=152416
2021-06-20 12:59:13 | INFO | train_inner | epoch 005: 1315 / 3002 loss=2.632, ppl=6.2, wps=5800.3, ups=0.09, wpb=64809, bsz=128, num_updates=13244, lr=9.9902e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=152427
2021-06-20 12:59:24 | INFO | train_inner | epoch 005: 1316 / 3002 loss=2.425, ppl=5.37, wps=5771.5, ups=0.09, wpb=64771, bsz=128, num_updates=13245, lr=9.9902e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=152438
2021-06-20 12:59:35 | INFO | train_inner | epoch 005: 1317 / 3002 loss=2.662, ppl=6.33, wps=5806.9, ups=0.09, wpb=64837, bsz=128, num_updates=13246, lr=9.9902e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=152449
2021-06-20 12:59:46 | INFO | train_inner | epoch 005: 1318 / 3002 loss=2.431, ppl=5.39, wps=5840.6, ups=0.09, wpb=64842, bsz=128, num_updates=13247, lr=9.9902e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=152460
2021-06-20 12:59:57 | INFO | train_inner | epoch 005: 1319 / 3002 loss=2.475, ppl=5.56, wps=5947, ups=0.09, wpb=64856, bsz=128, num_updates=13248, lr=9.9902e-05, gnorm=1.968, loss_scale=4, train_wall=10, gb_free=2.8, wall=152471
2021-06-20 13:00:08 | INFO | train_inner | epoch 005: 1320 / 3002 loss=2.574, ppl=5.96, wps=5769.9, ups=0.09, wpb=64736, bsz=128, num_updates=13249, lr=9.9902e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=152483
2021-06-20 13:00:19 | INFO | train_inner | epoch 005: 1321 / 3002 loss=2.562, ppl=5.9, wps=5917.1, ups=0.09, wpb=64841, bsz=128, num_updates=13250, lr=9.9902e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=152494
2021-06-20 13:00:30 | INFO | train_inner | epoch 005: 1322 / 3002 loss=2.51, ppl=5.7, wps=5811, ups=0.09, wpb=64877, bsz=128, num_updates=13251, lr=9.9902e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=152505
2021-06-20 13:00:42 | INFO | train_inner | epoch 005: 1323 / 3002 loss=2.391, ppl=5.25, wps=5754.7, ups=0.09, wpb=64819, bsz=128, num_updates=13252, lr=9.9902e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=152516
2021-06-20 13:00:53 | INFO | train_inner | epoch 005: 1324 / 3002 loss=2.512, ppl=5.7, wps=5860.3, ups=0.09, wpb=64809, bsz=128, num_updates=13253, lr=9.9902e-05, gnorm=2.113, loss_scale=4, train_wall=11, gb_free=2.8, wall=152527
2021-06-20 13:01:04 | INFO | train_inner | epoch 005: 1325 / 3002 loss=2.469, ppl=5.54, wps=5687.5, ups=0.09, wpb=64821, bsz=128, num_updates=13254, lr=9.9902e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=152538
2021-06-20 13:01:15 | INFO | train_inner | epoch 005: 1326 / 3002 loss=2.541, ppl=5.82, wps=5778.3, ups=0.09, wpb=64803, bsz=128, num_updates=13255, lr=9.9902e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=152550
2021-06-20 13:01:26 | INFO | train_inner | epoch 005: 1327 / 3002 loss=2.533, ppl=5.79, wps=5952.4, ups=0.09, wpb=64853, bsz=128, num_updates=13256, lr=9.99019e-05, gnorm=1.989, loss_scale=4, train_wall=10, gb_free=2.8, wall=152561
2021-06-20 13:01:37 | INFO | train_inner | epoch 005: 1328 / 3002 loss=2.526, ppl=5.76, wps=5826.4, ups=0.09, wpb=64772, bsz=128, num_updates=13257, lr=9.99019e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=152572
2021-06-20 13:01:49 | INFO | train_inner | epoch 005: 1329 / 3002 loss=2.33, ppl=5.03, wps=5784.6, ups=0.09, wpb=64886, bsz=128, num_updates=13258, lr=9.99019e-05, gnorm=1.857, loss_scale=4, train_wall=11, gb_free=2.8, wall=152583
2021-06-20 13:02:00 | INFO | train_inner | epoch 005: 1330 / 3002 loss=2.532, ppl=5.78, wps=5788.6, ups=0.09, wpb=64793, bsz=128, num_updates=13259, lr=9.99019e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=152594
2021-06-20 13:02:11 | INFO | train_inner | epoch 005: 1331 / 3002 loss=2.671, ppl=6.37, wps=5831, ups=0.09, wpb=64872, bsz=128, num_updates=13260, lr=9.99019e-05, gnorm=2.323, loss_scale=4, train_wall=11, gb_free=2.8, wall=152605
2021-06-20 13:02:22 | INFO | train_inner | epoch 005: 1332 / 3002 loss=2.632, ppl=6.2, wps=5909.2, ups=0.09, wpb=64831, bsz=128, num_updates=13261, lr=9.99019e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=152616
2021-06-20 13:02:33 | INFO | train_inner | epoch 005: 1333 / 3002 loss=2.586, ppl=6, wps=5848.2, ups=0.09, wpb=64892, bsz=128, num_updates=13262, lr=9.99019e-05, gnorm=1.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=152627
2021-06-20 13:02:44 | INFO | train_inner | epoch 005: 1334 / 3002 loss=2.592, ppl=6.03, wps=5779.4, ups=0.09, wpb=64781, bsz=128, num_updates=13263, lr=9.99019e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=152638
2021-06-20 13:02:55 | INFO | train_inner | epoch 005: 1335 / 3002 loss=2.473, ppl=5.55, wps=5970.2, ups=0.09, wpb=64836, bsz=128, num_updates=13264, lr=9.99019e-05, gnorm=1.973, loss_scale=4, train_wall=10, gb_free=2.8, wall=152649
2021-06-20 13:03:06 | INFO | train_inner | epoch 005: 1336 / 3002 loss=2.506, ppl=5.68, wps=5947, ups=0.09, wpb=64735, bsz=128, num_updates=13265, lr=9.99019e-05, gnorm=1.869, loss_scale=4, train_wall=10, gb_free=2.8, wall=152660
2021-06-20 13:03:17 | INFO | train_inner | epoch 005: 1337 / 3002 loss=2.585, ppl=6, wps=5831.5, ups=0.09, wpb=64832, bsz=128, num_updates=13266, lr=9.99019e-05, gnorm=2.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=152671
2021-06-20 13:03:28 | INFO | train_inner | epoch 005: 1338 / 3002 loss=2.604, ppl=6.08, wps=5914, ups=0.09, wpb=64800, bsz=128, num_updates=13267, lr=9.99019e-05, gnorm=2.14, loss_scale=4, train_wall=11, gb_free=2.8, wall=152682
2021-06-20 13:03:39 | INFO | train_inner | epoch 005: 1339 / 3002 loss=2.532, ppl=5.78, wps=5825.5, ups=0.09, wpb=64852, bsz=128, num_updates=13268, lr=9.99018e-05, gnorm=1.884, loss_scale=4, train_wall=11, gb_free=2.8, wall=152693
2021-06-20 13:03:50 | INFO | train_inner | epoch 005: 1340 / 3002 loss=2.606, ppl=6.09, wps=5839.8, ups=0.09, wpb=64777, bsz=128, num_updates=13269, lr=9.99018e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=152705
2021-06-20 13:04:01 | INFO | train_inner | epoch 005: 1341 / 3002 loss=2.629, ppl=6.19, wps=5806.4, ups=0.09, wpb=64807, bsz=128, num_updates=13270, lr=9.99018e-05, gnorm=1.997, loss_scale=4, train_wall=11, gb_free=2.8, wall=152716
2021-06-20 13:04:12 | INFO | train_inner | epoch 005: 1342 / 3002 loss=2.557, ppl=5.88, wps=5885.5, ups=0.09, wpb=64838, bsz=128, num_updates=13271, lr=9.99018e-05, gnorm=2.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=152727
2021-06-20 13:04:23 | INFO | train_inner | epoch 005: 1343 / 3002 loss=2.452, ppl=5.47, wps=5935.3, ups=0.09, wpb=64875, bsz=128, num_updates=13272, lr=9.99018e-05, gnorm=1.914, loss_scale=4, train_wall=10, gb_free=2.8, wall=152738
2021-06-20 13:04:35 | INFO | train_inner | epoch 005: 1344 / 3002 loss=2.469, ppl=5.54, wps=5743.7, ups=0.09, wpb=64792, bsz=128, num_updates=13273, lr=9.99018e-05, gnorm=2.197, loss_scale=4, train_wall=11, gb_free=2.8, wall=152749
2021-06-20 13:04:46 | INFO | train_inner | epoch 005: 1345 / 3002 loss=2.49, ppl=5.62, wps=5882.6, ups=0.09, wpb=64796, bsz=128, num_updates=13274, lr=9.99018e-05, gnorm=1.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=152760
2021-06-20 13:04:57 | INFO | train_inner | epoch 005: 1346 / 3002 loss=2.589, ppl=6.02, wps=5939.6, ups=0.09, wpb=64902, bsz=128, num_updates=13275, lr=9.99018e-05, gnorm=1.993, loss_scale=4, train_wall=10, gb_free=2.8, wall=152771
2021-06-20 13:05:08 | INFO | train_inner | epoch 005: 1347 / 3002 loss=2.58, ppl=5.98, wps=5847.3, ups=0.09, wpb=64805, bsz=128, num_updates=13276, lr=9.99018e-05, gnorm=2.04, loss_scale=4, train_wall=11, gb_free=2.8, wall=152782
2021-06-20 13:05:19 | INFO | train_inner | epoch 005: 1348 / 3002 loss=2.489, ppl=5.61, wps=5812.1, ups=0.09, wpb=64878, bsz=128, num_updates=13277, lr=9.99018e-05, gnorm=2.171, loss_scale=4, train_wall=11, gb_free=2.8, wall=152793
2021-06-20 13:05:30 | INFO | train_inner | epoch 005: 1349 / 3002 loss=2.415, ppl=5.33, wps=5793.9, ups=0.09, wpb=64827, bsz=128, num_updates=13278, lr=9.99018e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=152804
2021-06-20 13:05:41 | INFO | train_inner | epoch 005: 1350 / 3002 loss=2.44, ppl=5.43, wps=5755.7, ups=0.09, wpb=64830, bsz=128, num_updates=13279, lr=9.99018e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=152816
2021-06-20 13:05:52 | INFO | train_inner | epoch 005: 1351 / 3002 loss=2.472, ppl=5.55, wps=5905.1, ups=0.09, wpb=64795, bsz=128, num_updates=13280, lr=9.99018e-05, gnorm=2.04, loss_scale=4, train_wall=11, gb_free=2.8, wall=152827
2021-06-20 13:06:03 | INFO | train_inner | epoch 005: 1352 / 3002 loss=2.592, ppl=6.03, wps=5789.7, ups=0.09, wpb=64754, bsz=128, num_updates=13281, lr=9.99017e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=152838
2021-06-20 13:06:14 | INFO | train_inner | epoch 005: 1353 / 3002 loss=2.473, ppl=5.55, wps=5877.9, ups=0.09, wpb=64850, bsz=128, num_updates=13282, lr=9.99017e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=152849
2021-06-20 13:06:26 | INFO | train_inner | epoch 005: 1354 / 3002 loss=2.466, ppl=5.52, wps=5799.4, ups=0.09, wpb=64827, bsz=128, num_updates=13283, lr=9.99017e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=152860
2021-06-20 13:06:37 | INFO | train_inner | epoch 005: 1355 / 3002 loss=2.514, ppl=5.71, wps=5841.2, ups=0.09, wpb=64845, bsz=128, num_updates=13284, lr=9.99017e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=152871
2021-06-20 13:06:47 | INFO | train_inner | epoch 005: 1356 / 3002 loss=2.516, ppl=5.72, wps=6013.9, ups=0.09, wpb=64794, bsz=128, num_updates=13285, lr=9.99017e-05, gnorm=1.942, loss_scale=4, train_wall=10, gb_free=2.8, wall=152882
2021-06-20 13:06:59 | INFO | train_inner | epoch 005: 1357 / 3002 loss=2.479, ppl=5.58, wps=5729.2, ups=0.09, wpb=64810, bsz=128, num_updates=13286, lr=9.99017e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=152893
2021-06-20 13:07:10 | INFO | train_inner | epoch 005: 1358 / 3002 loss=2.392, ppl=5.25, wps=5776.9, ups=0.09, wpb=64838, bsz=128, num_updates=13287, lr=9.99017e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=152904
2021-06-20 13:07:21 | INFO | train_inner | epoch 005: 1359 / 3002 loss=2.633, ppl=6.2, wps=5800.3, ups=0.09, wpb=64811, bsz=128, num_updates=13288, lr=9.99017e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=152916
2021-06-20 13:07:32 | INFO | train_inner | epoch 005: 1360 / 3002 loss=2.433, ppl=5.4, wps=5842.5, ups=0.09, wpb=64809, bsz=128, num_updates=13289, lr=9.99017e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=152927
2021-06-20 13:07:43 | INFO | train_inner | epoch 005: 1361 / 3002 loss=2.53, ppl=5.78, wps=5952.2, ups=0.09, wpb=64812, bsz=128, num_updates=13290, lr=9.99017e-05, gnorm=1.921, loss_scale=4, train_wall=10, gb_free=2.8, wall=152938
2021-06-20 13:07:54 | INFO | train_inner | epoch 005: 1362 / 3002 loss=2.565, ppl=5.92, wps=5878.5, ups=0.09, wpb=64901, bsz=128, num_updates=13291, lr=9.99017e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=152949
2021-06-20 13:08:05 | INFO | train_inner | epoch 005: 1363 / 3002 loss=2.592, ppl=6.03, wps=5764.7, ups=0.09, wpb=64741, bsz=128, num_updates=13292, lr=9.99017e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=152960
2021-06-20 13:08:16 | INFO | train_inner | epoch 005: 1364 / 3002 loss=2.408, ppl=5.31, wps=5926.5, ups=0.09, wpb=64865, bsz=128, num_updates=13293, lr=9.99016e-05, gnorm=1.947, loss_scale=4, train_wall=10, gb_free=2.8, wall=152971
2021-06-20 13:08:28 | INFO | train_inner | epoch 005: 1365 / 3002 loss=2.556, ppl=5.88, wps=5785.8, ups=0.09, wpb=64690, bsz=128, num_updates=13294, lr=9.99016e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=152982
2021-06-20 13:08:39 | INFO | train_inner | epoch 005: 1366 / 3002 loss=2.352, ppl=5.11, wps=5796.6, ups=0.09, wpb=64870, bsz=128, num_updates=13295, lr=9.99016e-05, gnorm=1.892, loss_scale=4, train_wall=11, gb_free=2.8, wall=152993
2021-06-20 13:08:50 | INFO | train_inner | epoch 005: 1367 / 3002 loss=2.569, ppl=5.94, wps=6000.7, ups=0.09, wpb=64870, bsz=128, num_updates=13296, lr=9.99016e-05, gnorm=2.078, loss_scale=4, train_wall=10, gb_free=2.8, wall=153004
2021-06-20 13:09:01 | INFO | train_inner | epoch 005: 1368 / 3002 loss=2.494, ppl=5.63, wps=5709.2, ups=0.09, wpb=64749, bsz=128, num_updates=13297, lr=9.99016e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=153015
2021-06-20 13:09:12 | INFO | train_inner | epoch 005: 1369 / 3002 loss=2.677, ppl=6.39, wps=5801.1, ups=0.09, wpb=64778, bsz=128, num_updates=13298, lr=9.99016e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=153026
2021-06-20 13:09:23 | INFO | train_inner | epoch 005: 1370 / 3002 loss=2.535, ppl=5.8, wps=5887, ups=0.09, wpb=64838, bsz=128, num_updates=13299, lr=9.99016e-05, gnorm=1.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=153037
2021-06-20 13:09:34 | INFO | train_inner | epoch 005: 1371 / 3002 loss=2.565, ppl=5.92, wps=5892.2, ups=0.09, wpb=64818, bsz=128, num_updates=13300, lr=9.99016e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=153048
2021-06-20 13:09:45 | INFO | train_inner | epoch 005: 1372 / 3002 loss=2.446, ppl=5.45, wps=5878.2, ups=0.09, wpb=64866, bsz=128, num_updates=13301, lr=9.99016e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=153059
2021-06-20 13:09:56 | INFO | train_inner | epoch 005: 1373 / 3002 loss=2.492, ppl=5.63, wps=5896.3, ups=0.09, wpb=64852, bsz=128, num_updates=13302, lr=9.99016e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=153070
2021-06-20 13:10:07 | INFO | train_inner | epoch 005: 1374 / 3002 loss=2.458, ppl=5.5, wps=5898, ups=0.09, wpb=64829, bsz=128, num_updates=13303, lr=9.99016e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=153081
2021-06-20 13:10:18 | INFO | train_inner | epoch 005: 1375 / 3002 loss=2.543, ppl=5.83, wps=5904.9, ups=0.09, wpb=64862, bsz=128, num_updates=13304, lr=9.99016e-05, gnorm=1.959, loss_scale=4, train_wall=11, gb_free=2.8, wall=153092
2021-06-20 13:10:29 | INFO | train_inner | epoch 005: 1376 / 3002 loss=2.5, ppl=5.66, wps=5750, ups=0.09, wpb=64838, bsz=128, num_updates=13305, lr=9.99016e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=153104
2021-06-20 13:10:40 | INFO | train_inner | epoch 005: 1377 / 3002 loss=2.588, ppl=6.01, wps=5855.6, ups=0.09, wpb=64816, bsz=128, num_updates=13306, lr=9.99015e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=153115
2021-06-20 13:10:52 | INFO | train_inner | epoch 005: 1378 / 3002 loss=2.51, ppl=5.7, wps=5844.8, ups=0.09, wpb=64846, bsz=128, num_updates=13307, lr=9.99015e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=153126
2021-06-20 13:11:03 | INFO | train_inner | epoch 005: 1379 / 3002 loss=2.438, ppl=5.42, wps=5716.7, ups=0.09, wpb=64849, bsz=128, num_updates=13308, lr=9.99015e-05, gnorm=1.904, loss_scale=4, train_wall=11, gb_free=2.8, wall=153137
2021-06-20 13:11:14 | INFO | train_inner | epoch 005: 1380 / 3002 loss=2.428, ppl=5.38, wps=5906.9, ups=0.09, wpb=64863, bsz=128, num_updates=13309, lr=9.99015e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=153148
2021-06-20 13:11:25 | INFO | train_inner | epoch 005: 1381 / 3002 loss=2.455, ppl=5.48, wps=5893.8, ups=0.09, wpb=64798, bsz=128, num_updates=13310, lr=9.99015e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=153159
2021-06-20 13:11:36 | INFO | train_inner | epoch 005: 1382 / 3002 loss=2.442, ppl=5.43, wps=5730.4, ups=0.09, wpb=64853, bsz=128, num_updates=13311, lr=9.99015e-05, gnorm=1.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=153171
2021-06-20 13:11:47 | INFO | train_inner | epoch 005: 1383 / 3002 loss=2.543, ppl=5.83, wps=5892.5, ups=0.09, wpb=64819, bsz=128, num_updates=13312, lr=9.99015e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=153182
2021-06-20 13:11:59 | INFO | train_inner | epoch 005: 1384 / 3002 loss=2.681, ppl=6.41, wps=5704.2, ups=0.09, wpb=64800, bsz=128, num_updates=13313, lr=9.99015e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=153193
2021-06-20 13:12:10 | INFO | train_inner | epoch 005: 1385 / 3002 loss=2.605, ppl=6.08, wps=5789.3, ups=0.09, wpb=64721, bsz=128, num_updates=13314, lr=9.99015e-05, gnorm=2.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=153204
2021-06-20 13:12:21 | INFO | train_inner | epoch 005: 1386 / 3002 loss=2.433, ppl=5.4, wps=5775.8, ups=0.09, wpb=64839, bsz=128, num_updates=13315, lr=9.99015e-05, gnorm=2.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=153215
2021-06-20 13:12:32 | INFO | train_inner | epoch 005: 1387 / 3002 loss=2.476, ppl=5.56, wps=5971.7, ups=0.09, wpb=64843, bsz=128, num_updates=13316, lr=9.99015e-05, gnorm=1.983, loss_scale=4, train_wall=10, gb_free=2.8, wall=153226
2021-06-20 13:12:43 | INFO | train_inner | epoch 005: 1388 / 3002 loss=2.599, ppl=6.06, wps=5826.7, ups=0.09, wpb=64824, bsz=128, num_updates=13317, lr=9.99015e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=153237
2021-06-20 13:12:54 | INFO | train_inner | epoch 005: 1389 / 3002 loss=2.405, ppl=5.3, wps=5941.5, ups=0.09, wpb=64851, bsz=128, num_updates=13318, lr=9.99014e-05, gnorm=1.976, loss_scale=4, train_wall=10, gb_free=2.8, wall=153248
2021-06-20 13:13:05 | INFO | train_inner | epoch 005: 1390 / 3002 loss=2.567, ppl=5.93, wps=5696.5, ups=0.09, wpb=64757, bsz=128, num_updates=13319, lr=9.99014e-05, gnorm=1.962, loss_scale=4, train_wall=11, gb_free=2.8, wall=153260
2021-06-20 13:13:16 | INFO | train_inner | epoch 005: 1391 / 3002 loss=2.369, ppl=5.17, wps=5840.5, ups=0.09, wpb=64843, bsz=128, num_updates=13320, lr=9.99014e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=153271
2021-06-20 13:13:27 | INFO | train_inner | epoch 005: 1392 / 3002 loss=2.462, ppl=5.51, wps=5929.9, ups=0.09, wpb=64924, bsz=128, num_updates=13321, lr=9.99014e-05, gnorm=1.915, loss_scale=4, train_wall=11, gb_free=2.8, wall=153282
2021-06-20 13:13:38 | INFO | train_inner | epoch 005: 1393 / 3002 loss=2.483, ppl=5.59, wps=5992.5, ups=0.09, wpb=64834, bsz=128, num_updates=13322, lr=9.99014e-05, gnorm=2.031, loss_scale=4, train_wall=10, gb_free=2.8, wall=153292
2021-06-20 13:13:49 | INFO | train_inner | epoch 005: 1394 / 3002 loss=2.357, ppl=5.12, wps=5848, ups=0.09, wpb=64878, bsz=128, num_updates=13323, lr=9.99014e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=153304
2021-06-20 13:14:00 | INFO | train_inner | epoch 005: 1395 / 3002 loss=2.451, ppl=5.47, wps=5792.3, ups=0.09, wpb=64873, bsz=128, num_updates=13324, lr=9.99014e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=153315
2021-06-20 13:14:12 | INFO | train_inner | epoch 005: 1396 / 3002 loss=2.558, ppl=5.89, wps=5751.9, ups=0.09, wpb=64800, bsz=128, num_updates=13325, lr=9.99014e-05, gnorm=2.076, loss_scale=4, train_wall=11, gb_free=2.8, wall=153326
2021-06-20 13:14:23 | INFO | train_inner | epoch 005: 1397 / 3002 loss=2.593, ppl=6.03, wps=5853.7, ups=0.09, wpb=64857, bsz=128, num_updates=13326, lr=9.99014e-05, gnorm=2.076, loss_scale=4, train_wall=11, gb_free=2.8, wall=153337
2021-06-20 13:14:34 | INFO | train_inner | epoch 005: 1398 / 3002 loss=2.561, ppl=5.9, wps=6009.9, ups=0.09, wpb=64869, bsz=128, num_updates=13327, lr=9.99014e-05, gnorm=2.036, loss_scale=4, train_wall=10, gb_free=2.8, wall=153348
2021-06-20 13:14:45 | INFO | train_inner | epoch 005: 1399 / 3002 loss=2.487, ppl=5.6, wps=5877.9, ups=0.09, wpb=64771, bsz=128, num_updates=13328, lr=9.99014e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=153359
2021-06-20 13:14:56 | INFO | train_inner | epoch 005: 1400 / 3002 loss=2.616, ppl=6.13, wps=5760.3, ups=0.09, wpb=64853, bsz=128, num_updates=13329, lr=9.99014e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=153370
2021-06-20 13:15:07 | INFO | train_inner | epoch 005: 1401 / 3002 loss=2.387, ppl=5.23, wps=5881.7, ups=0.09, wpb=64906, bsz=128, num_updates=13330, lr=9.99014e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=153381
2021-06-20 13:15:18 | INFO | train_inner | epoch 005: 1402 / 3002 loss=2.375, ppl=5.19, wps=5810, ups=0.09, wpb=64842, bsz=128, num_updates=13331, lr=9.99013e-05, gnorm=1.894, loss_scale=4, train_wall=11, gb_free=2.8, wall=153392
2021-06-20 13:15:29 | INFO | train_inner | epoch 005: 1403 / 3002 loss=2.393, ppl=5.25, wps=5823.1, ups=0.09, wpb=64863, bsz=128, num_updates=13332, lr=9.99013e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=153403
2021-06-20 13:15:40 | INFO | train_inner | epoch 005: 1404 / 3002 loss=2.446, ppl=5.45, wps=5843.6, ups=0.09, wpb=64793, bsz=128, num_updates=13333, lr=9.99013e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=153415
2021-06-20 13:15:51 | INFO | train_inner | epoch 005: 1405 / 3002 loss=2.405, ppl=5.3, wps=5866.3, ups=0.09, wpb=64911, bsz=128, num_updates=13334, lr=9.99013e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=153426
2021-06-20 13:16:02 | INFO | train_inner | epoch 005: 1406 / 3002 loss=2.573, ppl=5.95, wps=5839.3, ups=0.09, wpb=64867, bsz=128, num_updates=13335, lr=9.99013e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=153437
2021-06-20 13:16:13 | INFO | train_inner | epoch 005: 1407 / 3002 loss=2.667, ppl=6.35, wps=5861.6, ups=0.09, wpb=64800, bsz=128, num_updates=13336, lr=9.99013e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=153448
2021-06-20 13:16:25 | INFO | train_inner | epoch 005: 1408 / 3002 loss=2.528, ppl=5.77, wps=5786.3, ups=0.09, wpb=64822, bsz=128, num_updates=13337, lr=9.99013e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=153459
2021-06-20 13:16:36 | INFO | train_inner | epoch 005: 1409 / 3002 loss=2.515, ppl=5.72, wps=5858, ups=0.09, wpb=64725, bsz=128, num_updates=13338, lr=9.99013e-05, gnorm=1.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=153470
2021-06-20 13:16:47 | INFO | train_inner | epoch 005: 1410 / 3002 loss=2.478, ppl=5.57, wps=5695.7, ups=0.09, wpb=64709, bsz=128, num_updates=13339, lr=9.99013e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=153481
2021-06-20 13:16:58 | INFO | train_inner | epoch 005: 1411 / 3002 loss=2.466, ppl=5.52, wps=5902.2, ups=0.09, wpb=64783, bsz=128, num_updates=13340, lr=9.99013e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=153492
2021-06-20 13:17:09 | INFO | train_inner | epoch 005: 1412 / 3002 loss=2.459, ppl=5.5, wps=5826.1, ups=0.09, wpb=64816, bsz=128, num_updates=13341, lr=9.99013e-05, gnorm=1.874, loss_scale=4, train_wall=11, gb_free=2.8, wall=153504
2021-06-20 13:17:20 | INFO | train_inner | epoch 005: 1413 / 3002 loss=2.434, ppl=5.4, wps=5881.5, ups=0.09, wpb=64832, bsz=128, num_updates=13342, lr=9.99013e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=153515
2021-06-20 13:17:31 | INFO | train_inner | epoch 005: 1414 / 3002 loss=2.556, ppl=5.88, wps=5885, ups=0.09, wpb=64917, bsz=128, num_updates=13343, lr=9.99012e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=153526
2021-06-20 13:17:42 | INFO | train_inner | epoch 005: 1415 / 3002 loss=2.528, ppl=5.77, wps=5821, ups=0.09, wpb=64856, bsz=128, num_updates=13344, lr=9.99012e-05, gnorm=1.937, loss_scale=4, train_wall=11, gb_free=2.8, wall=153537
2021-06-20 13:17:54 | INFO | train_inner | epoch 005: 1416 / 3002 loss=2.703, ppl=6.51, wps=5803.5, ups=0.09, wpb=64757, bsz=128, num_updates=13345, lr=9.99012e-05, gnorm=2.109, loss_scale=4, train_wall=11, gb_free=2.8, wall=153548
2021-06-20 13:18:05 | INFO | train_inner | epoch 005: 1417 / 3002 loss=2.447, ppl=5.45, wps=5848.5, ups=0.09, wpb=64960, bsz=128, num_updates=13346, lr=9.99012e-05, gnorm=7.369, loss_scale=4, train_wall=11, gb_free=2.8, wall=153559
2021-06-20 13:18:16 | INFO | train_inner | epoch 005: 1418 / 3002 loss=2.635, ppl=6.21, wps=5852.7, ups=0.09, wpb=64854, bsz=128, num_updates=13347, lr=9.99012e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=153570
2021-06-20 13:18:27 | INFO | train_inner | epoch 005: 1419 / 3002 loss=2.467, ppl=5.53, wps=5864.1, ups=0.09, wpb=64813, bsz=128, num_updates=13348, lr=9.99012e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=153581
2021-06-20 13:18:38 | INFO | train_inner | epoch 005: 1420 / 3002 loss=2.528, ppl=5.77, wps=5750.8, ups=0.09, wpb=64786, bsz=128, num_updates=13349, lr=9.99012e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=153592
2021-06-20 13:18:49 | INFO | train_inner | epoch 005: 1421 / 3002 loss=2.599, ppl=6.06, wps=5858.4, ups=0.09, wpb=64854, bsz=128, num_updates=13350, lr=9.99012e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=153603
2021-06-20 13:19:00 | INFO | train_inner | epoch 005: 1422 / 3002 loss=2.549, ppl=5.85, wps=5882.6, ups=0.09, wpb=64845, bsz=128, num_updates=13351, lr=9.99012e-05, gnorm=2.023, loss_scale=4, train_wall=11, gb_free=2.8, wall=153614
2021-06-20 13:19:11 | INFO | train_inner | epoch 005: 1423 / 3002 loss=2.707, ppl=6.53, wps=5806.4, ups=0.09, wpb=64821, bsz=128, num_updates=13352, lr=9.99012e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=153626
2021-06-20 13:19:22 | INFO | train_inner | epoch 005: 1424 / 3002 loss=2.626, ppl=6.17, wps=5852.5, ups=0.09, wpb=64822, bsz=128, num_updates=13353, lr=9.99012e-05, gnorm=2.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=153637
2021-06-20 13:19:33 | INFO | train_inner | epoch 005: 1425 / 3002 loss=2.574, ppl=5.96, wps=5853.2, ups=0.09, wpb=64810, bsz=128, num_updates=13354, lr=9.99012e-05, gnorm=1.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=153648
2021-06-20 13:19:45 | INFO | train_inner | epoch 005: 1426 / 3002 loss=2.472, ppl=5.55, wps=5759.3, ups=0.09, wpb=64870, bsz=128, num_updates=13355, lr=9.99012e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=153659
2021-06-20 13:19:56 | INFO | train_inner | epoch 005: 1427 / 3002 loss=2.565, ppl=5.92, wps=5942.6, ups=0.09, wpb=64815, bsz=128, num_updates=13356, lr=9.99011e-05, gnorm=1.993, loss_scale=8, train_wall=10, gb_free=2.8, wall=153670
2021-06-20 13:20:07 | INFO | train_inner | epoch 005: 1428 / 3002 loss=2.601, ppl=6.07, wps=5819, ups=0.09, wpb=64817, bsz=128, num_updates=13357, lr=9.99011e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=153681
2021-06-20 13:20:18 | INFO | train_inner | epoch 005: 1429 / 3002 loss=2.686, ppl=6.44, wps=5776.3, ups=0.09, wpb=64864, bsz=128, num_updates=13358, lr=9.99011e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=153692
2021-06-20 13:20:29 | INFO | train_inner | epoch 005: 1430 / 3002 loss=2.708, ppl=6.53, wps=5830.6, ups=0.09, wpb=64814, bsz=128, num_updates=13359, lr=9.99011e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=153703
2021-06-20 13:20:40 | INFO | train_inner | epoch 005: 1431 / 3002 loss=2.465, ppl=5.52, wps=6028.2, ups=0.09, wpb=64877, bsz=128, num_updates=13360, lr=9.99011e-05, gnorm=2.115, loss_scale=8, train_wall=10, gb_free=2.8, wall=153714
2021-06-20 13:20:51 | INFO | train_inner | epoch 005: 1432 / 3002 loss=2.397, ppl=5.27, wps=5796.3, ups=0.09, wpb=64845, bsz=128, num_updates=13361, lr=9.99011e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=153725
2021-06-20 13:21:02 | INFO | train_inner | epoch 005: 1433 / 3002 loss=2.649, ppl=6.27, wps=5899.4, ups=0.09, wpb=64863, bsz=128, num_updates=13362, lr=9.99011e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=153736
2021-06-20 13:21:13 | INFO | train_inner | epoch 005: 1434 / 3002 loss=2.528, ppl=5.77, wps=5908.1, ups=0.09, wpb=64862, bsz=128, num_updates=13363, lr=9.99011e-05, gnorm=1.96, loss_scale=8, train_wall=10, gb_free=2.8, wall=153747
2021-06-20 13:21:24 | INFO | train_inner | epoch 005: 1435 / 3002 loss=2.554, ppl=5.87, wps=5865.9, ups=0.09, wpb=64935, bsz=128, num_updates=13364, lr=9.99011e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=153758
2021-06-20 13:21:35 | INFO | train_inner | epoch 005: 1436 / 3002 loss=2.639, ppl=6.23, wps=5886.6, ups=0.09, wpb=64888, bsz=128, num_updates=13365, lr=9.99011e-05, gnorm=2.449, loss_scale=8, train_wall=11, gb_free=2.8, wall=153769
2021-06-20 13:21:46 | INFO | train_inner | epoch 005: 1437 / 3002 loss=2.619, ppl=6.14, wps=5821.7, ups=0.09, wpb=64790, bsz=128, num_updates=13366, lr=9.99011e-05, gnorm=3.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=153781
2021-06-20 13:21:57 | INFO | train_inner | epoch 005: 1438 / 3002 loss=2.591, ppl=6.03, wps=5879.8, ups=0.09, wpb=64767, bsz=128, num_updates=13367, lr=9.99011e-05, gnorm=2.055, loss_scale=8, train_wall=11, gb_free=2.8, wall=153792
2021-06-20 13:22:08 | INFO | train_inner | epoch 005: 1439 / 3002 loss=2.466, ppl=5.53, wps=5884.4, ups=0.09, wpb=64873, bsz=128, num_updates=13368, lr=9.9901e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=153803
2021-06-20 13:22:20 | INFO | train_inner | epoch 005: 1440 / 3002 loss=2.485, ppl=5.6, wps=5778.1, ups=0.09, wpb=64846, bsz=128, num_updates=13369, lr=9.9901e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=153814
2021-06-20 13:22:31 | INFO | train_inner | epoch 005: 1441 / 3002 loss=2.652, ppl=6.28, wps=5839.5, ups=0.09, wpb=64788, bsz=128, num_updates=13370, lr=9.9901e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=153825
2021-06-20 13:22:42 | INFO | train_inner | epoch 005: 1442 / 3002 loss=2.499, ppl=5.65, wps=5802.6, ups=0.09, wpb=64905, bsz=128, num_updates=13371, lr=9.9901e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=153836
2021-06-20 13:22:53 | INFO | train_inner | epoch 005: 1443 / 3002 loss=2.54, ppl=5.81, wps=5973.4, ups=0.09, wpb=64885, bsz=128, num_updates=13372, lr=9.9901e-05, gnorm=3.715, loss_scale=8, train_wall=10, gb_free=2.8, wall=153847
2021-06-20 13:23:04 | INFO | train_inner | epoch 005: 1444 / 3002 loss=2.409, ppl=5.31, wps=5851.8, ups=0.09, wpb=64881, bsz=128, num_updates=13373, lr=9.9901e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=153858
2021-06-20 13:23:15 | INFO | train_inner | epoch 005: 1445 / 3002 loss=2.447, ppl=5.45, wps=5821.1, ups=0.09, wpb=64840, bsz=128, num_updates=13374, lr=9.9901e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=153869
2021-06-20 13:23:26 | INFO | train_inner | epoch 005: 1446 / 3002 loss=2.424, ppl=5.37, wps=5953.6, ups=0.09, wpb=64862, bsz=128, num_updates=13375, lr=9.9901e-05, gnorm=2.002, loss_scale=8, train_wall=10, gb_free=2.8, wall=153880
2021-06-20 13:23:37 | INFO | train_inner | epoch 005: 1447 / 3002 loss=2.506, ppl=5.68, wps=5849, ups=0.09, wpb=64839, bsz=128, num_updates=13376, lr=9.9901e-05, gnorm=2.123, loss_scale=8, train_wall=11, gb_free=2.8, wall=153891
2021-06-20 13:23:48 | INFO | train_inner | epoch 005: 1448 / 3002 loss=2.578, ppl=5.97, wps=5769.9, ups=0.09, wpb=64885, bsz=128, num_updates=13377, lr=9.9901e-05, gnorm=2.085, loss_scale=8, train_wall=11, gb_free=2.8, wall=153902
2021-06-20 13:23:59 | INFO | train_inner | epoch 005: 1449 / 3002 loss=2.405, ppl=5.3, wps=6004.4, ups=0.09, wpb=64830, bsz=128, num_updates=13378, lr=9.9901e-05, gnorm=2.061, loss_scale=8, train_wall=10, gb_free=2.8, wall=153913
2021-06-20 13:24:10 | INFO | train_inner | epoch 005: 1450 / 3002 loss=2.544, ppl=5.83, wps=5810.1, ups=0.09, wpb=64836, bsz=128, num_updates=13379, lr=9.9901e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=153924
2021-06-20 13:24:21 | INFO | train_inner | epoch 005: 1451 / 3002 loss=2.389, ppl=5.24, wps=5791.3, ups=0.09, wpb=64786, bsz=128, num_updates=13380, lr=9.9901e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=153936
2021-06-20 13:24:32 | INFO | train_inner | epoch 005: 1452 / 3002 loss=2.616, ppl=6.13, wps=5884, ups=0.09, wpb=64928, bsz=128, num_updates=13381, lr=9.99009e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=153947
2021-06-20 13:24:43 | INFO | train_inner | epoch 005: 1453 / 3002 loss=2.5, ppl=5.66, wps=5856.4, ups=0.09, wpb=64824, bsz=128, num_updates=13382, lr=9.99009e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=153958
2021-06-20 13:24:54 | INFO | train_inner | epoch 005: 1454 / 3002 loss=2.568, ppl=5.93, wps=5944.5, ups=0.09, wpb=64880, bsz=128, num_updates=13383, lr=9.99009e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=153969
2021-06-20 13:25:06 | INFO | train_inner | epoch 005: 1455 / 3002 loss=2.575, ppl=5.96, wps=5709, ups=0.09, wpb=64763, bsz=128, num_updates=13384, lr=9.99009e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=153980
2021-06-20 13:25:17 | INFO | train_inner | epoch 005: 1456 / 3002 loss=2.424, ppl=5.37, wps=5811.8, ups=0.09, wpb=64888, bsz=128, num_updates=13385, lr=9.99009e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=153991
2021-06-20 13:25:28 | INFO | train_inner | epoch 005: 1457 / 3002 loss=2.594, ppl=6.04, wps=5926.5, ups=0.09, wpb=64831, bsz=128, num_updates=13386, lr=9.99009e-05, gnorm=1.995, loss_scale=8, train_wall=10, gb_free=2.8, wall=154002
2021-06-20 13:25:39 | INFO | train_inner | epoch 005: 1458 / 3002 loss=2.421, ppl=5.36, wps=6014.3, ups=0.09, wpb=64875, bsz=128, num_updates=13387, lr=9.99009e-05, gnorm=2.054, loss_scale=8, train_wall=10, gb_free=2.8, wall=154013
2021-06-20 13:25:50 | INFO | train_inner | epoch 005: 1459 / 3002 loss=2.43, ppl=5.39, wps=5799, ups=0.09, wpb=64844, bsz=128, num_updates=13388, lr=9.99009e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=154024
2021-06-20 13:26:01 | INFO | train_inner | epoch 005: 1460 / 3002 loss=2.499, ppl=5.65, wps=5922.6, ups=0.09, wpb=64868, bsz=128, num_updates=13389, lr=9.99009e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=154035
2021-06-20 13:26:12 | INFO | train_inner | epoch 005: 1461 / 3002 loss=2.448, ppl=5.46, wps=5840.8, ups=0.09, wpb=64800, bsz=128, num_updates=13390, lr=9.99009e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=154046
2021-06-20 13:26:23 | INFO | train_inner | epoch 005: 1462 / 3002 loss=2.548, ppl=5.85, wps=5869.4, ups=0.09, wpb=64780, bsz=128, num_updates=13391, lr=9.99009e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=154057
2021-06-20 13:26:34 | INFO | train_inner | epoch 005: 1463 / 3002 loss=2.491, ppl=5.62, wps=5826.7, ups=0.09, wpb=64915, bsz=128, num_updates=13392, lr=9.99009e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=154068
2021-06-20 13:26:45 | INFO | train_inner | epoch 005: 1464 / 3002 loss=2.487, ppl=5.61, wps=5824.3, ups=0.09, wpb=64845, bsz=128, num_updates=13393, lr=9.99008e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=154079
2021-06-20 13:26:56 | INFO | train_inner | epoch 005: 1465 / 3002 loss=2.494, ppl=5.63, wps=5837.1, ups=0.09, wpb=64905, bsz=128, num_updates=13394, lr=9.99008e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=154091
2021-06-20 13:27:07 | INFO | train_inner | epoch 005: 1466 / 3002 loss=2.534, ppl=5.79, wps=5828.2, ups=0.09, wpb=64779, bsz=128, num_updates=13395, lr=9.99008e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=154102
2021-06-20 13:27:18 | INFO | train_inner | epoch 005: 1467 / 3002 loss=2.399, ppl=5.27, wps=5921.2, ups=0.09, wpb=64824, bsz=128, num_updates=13396, lr=9.99008e-05, gnorm=1.972, loss_scale=8, train_wall=10, gb_free=2.8, wall=154113
2021-06-20 13:27:29 | INFO | train_inner | epoch 005: 1468 / 3002 loss=2.542, ppl=5.82, wps=5836.1, ups=0.09, wpb=64858, bsz=128, num_updates=13397, lr=9.99008e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=154124
2021-06-20 13:27:40 | INFO | train_inner | epoch 005: 1469 / 3002 loss=2.483, ppl=5.59, wps=5846.2, ups=0.09, wpb=64817, bsz=128, num_updates=13398, lr=9.99008e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=154135
2021-06-20 13:27:51 | INFO | train_inner | epoch 005: 1470 / 3002 loss=2.451, ppl=5.47, wps=5904.9, ups=0.09, wpb=64849, bsz=128, num_updates=13399, lr=9.99008e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=154146
2021-06-20 13:28:03 | INFO | train_inner | epoch 005: 1471 / 3002 loss=2.522, ppl=5.74, wps=5776.2, ups=0.09, wpb=64825, bsz=128, num_updates=13400, lr=9.99008e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=154157
2021-06-20 13:28:14 | INFO | train_inner | epoch 005: 1472 / 3002 loss=2.495, ppl=5.64, wps=5821.8, ups=0.09, wpb=64813, bsz=128, num_updates=13401, lr=9.99008e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=154168
2021-06-20 13:28:25 | INFO | train_inner | epoch 005: 1473 / 3002 loss=2.573, ppl=5.95, wps=5780.7, ups=0.09, wpb=64821, bsz=128, num_updates=13402, lr=9.99008e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=154179
2021-06-20 13:28:36 | INFO | train_inner | epoch 005: 1474 / 3002 loss=2.452, ppl=5.47, wps=5877.5, ups=0.09, wpb=64837, bsz=128, num_updates=13403, lr=9.99008e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=154190
2021-06-20 13:28:47 | INFO | train_inner | epoch 005: 1475 / 3002 loss=2.592, ppl=6.03, wps=5846.9, ups=0.09, wpb=64821, bsz=128, num_updates=13404, lr=9.99008e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=154201
2021-06-20 13:28:58 | INFO | train_inner | epoch 005: 1476 / 3002 loss=2.548, ppl=5.85, wps=5861.6, ups=0.09, wpb=64710, bsz=128, num_updates=13405, lr=9.99008e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=154212
2021-06-20 13:29:09 | INFO | train_inner | epoch 005: 1477 / 3002 loss=2.647, ppl=6.26, wps=5921.2, ups=0.09, wpb=64818, bsz=128, num_updates=13406, lr=9.99007e-05, gnorm=1.962, loss_scale=8, train_wall=10, gb_free=2.8, wall=154223
2021-06-20 13:29:20 | INFO | train_inner | epoch 005: 1478 / 3002 loss=2.405, ppl=5.29, wps=5799.5, ups=0.09, wpb=64829, bsz=128, num_updates=13407, lr=9.99007e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=154235
2021-06-20 13:29:31 | INFO | train_inner | epoch 005: 1479 / 3002 loss=2.68, ppl=6.41, wps=5851.6, ups=0.09, wpb=64827, bsz=128, num_updates=13408, lr=9.99007e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=154246
2021-06-20 13:29:42 | INFO | train_inner | epoch 005: 1480 / 3002 loss=2.473, ppl=5.55, wps=5829.1, ups=0.09, wpb=64799, bsz=128, num_updates=13409, lr=9.99007e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=154257
2021-06-20 13:29:54 | INFO | train_inner | epoch 005: 1481 / 3002 loss=2.381, ppl=5.21, wps=5753.4, ups=0.09, wpb=64830, bsz=128, num_updates=13410, lr=9.99007e-05, gnorm=2.178, loss_scale=8, train_wall=11, gb_free=2.8, wall=154268
2021-06-20 13:30:05 | INFO | train_inner | epoch 005: 1482 / 3002 loss=2.582, ppl=5.99, wps=5785.7, ups=0.09, wpb=64814, bsz=128, num_updates=13411, lr=9.99007e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=154279
2021-06-20 13:30:16 | INFO | train_inner | epoch 005: 1483 / 3002 loss=2.341, ppl=5.06, wps=5843.7, ups=0.09, wpb=64884, bsz=128, num_updates=13412, lr=9.99007e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=154290
2021-06-20 13:30:27 | INFO | train_inner | epoch 005: 1484 / 3002 loss=2.613, ppl=6.12, wps=5864.5, ups=0.09, wpb=64851, bsz=128, num_updates=13413, lr=9.99007e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=154301
2021-06-20 13:30:38 | INFO | train_inner | epoch 005: 1485 / 3002 loss=2.667, ppl=6.35, wps=5885.7, ups=0.09, wpb=64855, bsz=128, num_updates=13414, lr=9.99007e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=154312
2021-06-20 13:30:49 | INFO | train_inner | epoch 005: 1486 / 3002 loss=2.481, ppl=5.58, wps=5811.3, ups=0.09, wpb=64803, bsz=128, num_updates=13415, lr=9.99007e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=154324
2021-06-20 13:31:00 | INFO | train_inner | epoch 005: 1487 / 3002 loss=2.491, ppl=5.62, wps=5799.1, ups=0.09, wpb=64822, bsz=128, num_updates=13416, lr=9.99007e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=154335
2021-06-20 13:31:11 | INFO | train_inner | epoch 005: 1488 / 3002 loss=2.545, ppl=5.83, wps=5885.7, ups=0.09, wpb=64837, bsz=128, num_updates=13417, lr=9.99007e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=154346
2021-06-20 13:31:23 | INFO | train_inner | epoch 005: 1489 / 3002 loss=2.548, ppl=5.85, wps=5848.3, ups=0.09, wpb=64808, bsz=128, num_updates=13418, lr=9.99006e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=154357
2021-06-20 13:31:34 | INFO | train_inner | epoch 005: 1490 / 3002 loss=2.429, ppl=5.39, wps=5876.8, ups=0.09, wpb=64913, bsz=128, num_updates=13419, lr=9.99006e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=154368
2021-06-20 13:31:45 | INFO | train_inner | epoch 005: 1491 / 3002 loss=2.452, ppl=5.47, wps=5746, ups=0.09, wpb=64846, bsz=128, num_updates=13420, lr=9.99006e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=154379
2021-06-20 13:31:56 | INFO | train_inner | epoch 005: 1492 / 3002 loss=2.542, ppl=5.82, wps=5905.5, ups=0.09, wpb=64828, bsz=128, num_updates=13421, lr=9.99006e-05, gnorm=1.944, loss_scale=8, train_wall=10, gb_free=2.8, wall=154390
2021-06-20 13:32:07 | INFO | train_inner | epoch 005: 1493 / 3002 loss=2.571, ppl=5.94, wps=5906.2, ups=0.09, wpb=64831, bsz=128, num_updates=13422, lr=9.99006e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=154401
2021-06-20 13:32:18 | INFO | train_inner | epoch 005: 1494 / 3002 loss=2.478, ppl=5.57, wps=5778.1, ups=0.09, wpb=64872, bsz=128, num_updates=13423, lr=9.99006e-05, gnorm=1.897, loss_scale=8, train_wall=11, gb_free=2.8, wall=154412
2021-06-20 13:32:29 | INFO | train_inner | epoch 005: 1495 / 3002 loss=2.297, ppl=4.92, wps=5875.9, ups=0.09, wpb=64847, bsz=128, num_updates=13424, lr=9.99006e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=154423
2021-06-20 13:32:40 | INFO | train_inner | epoch 005: 1496 / 3002 loss=2.555, ppl=5.88, wps=5790.1, ups=0.09, wpb=64831, bsz=128, num_updates=13425, lr=9.99006e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=154435
2021-06-20 13:32:51 | INFO | train_inner | epoch 005: 1497 / 3002 loss=2.467, ppl=5.53, wps=5804.2, ups=0.09, wpb=64899, bsz=128, num_updates=13426, lr=9.99006e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=154446
2021-06-20 13:33:03 | INFO | train_inner | epoch 005: 1498 / 3002 loss=2.444, ppl=5.44, wps=5723.7, ups=0.09, wpb=64878, bsz=128, num_updates=13427, lr=9.99006e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=154457
2021-06-20 13:33:14 | INFO | train_inner | epoch 005: 1499 / 3002 loss=2.456, ppl=5.49, wps=5786.4, ups=0.09, wpb=64804, bsz=128, num_updates=13428, lr=9.99006e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=154468
2021-06-20 13:33:25 | INFO | train_inner | epoch 005: 1500 / 3002 loss=2.458, ppl=5.5, wps=5869.9, ups=0.09, wpb=64818, bsz=128, num_updates=13429, lr=9.99006e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=154479
2021-06-20 13:33:36 | INFO | train_inner | epoch 005: 1501 / 3002 loss=2.459, ppl=5.5, wps=5869.6, ups=0.09, wpb=64874, bsz=128, num_updates=13430, lr=9.99006e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=154490
2021-06-20 13:33:47 | INFO | train_inner | epoch 005: 1502 / 3002 loss=2.467, ppl=5.53, wps=5783, ups=0.09, wpb=64796, bsz=128, num_updates=13431, lr=9.99005e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=154502
2021-06-20 13:33:59 | INFO | train_inner | epoch 005: 1503 / 3002 loss=2.651, ppl=6.28, wps=5791.7, ups=0.09, wpb=64859, bsz=128, num_updates=13432, lr=9.99005e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=154513
2021-06-20 13:34:10 | INFO | train_inner | epoch 005: 1504 / 3002 loss=2.379, ppl=5.2, wps=5785.6, ups=0.09, wpb=64834, bsz=128, num_updates=13433, lr=9.99005e-05, gnorm=2.243, loss_scale=8, train_wall=11, gb_free=2.8, wall=154524
2021-06-20 13:34:21 | INFO | train_inner | epoch 005: 1505 / 3002 loss=2.459, ppl=5.5, wps=5771.2, ups=0.09, wpb=64848, bsz=128, num_updates=13434, lr=9.99005e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=154535
2021-06-20 13:34:32 | INFO | train_inner | epoch 005: 1506 / 3002 loss=2.544, ppl=5.83, wps=5835.7, ups=0.09, wpb=64824, bsz=128, num_updates=13435, lr=9.99005e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=154546
2021-06-20 13:34:43 | INFO | train_inner | epoch 005: 1507 / 3002 loss=2.635, ppl=6.21, wps=5901.4, ups=0.09, wpb=64855, bsz=128, num_updates=13436, lr=9.99005e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=154557
2021-06-20 13:34:54 | INFO | train_inner | epoch 005: 1508 / 3002 loss=2.682, ppl=6.42, wps=5813.4, ups=0.09, wpb=64826, bsz=128, num_updates=13437, lr=9.99005e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=154569
2021-06-20 13:35:05 | INFO | train_inner | epoch 005: 1509 / 3002 loss=2.387, ppl=5.23, wps=5842.3, ups=0.09, wpb=64803, bsz=128, num_updates=13438, lr=9.99005e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=154580
2021-06-20 13:35:17 | INFO | train_inner | epoch 005: 1510 / 3002 loss=2.557, ppl=5.88, wps=5781.3, ups=0.09, wpb=64882, bsz=128, num_updates=13439, lr=9.99005e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=154591
2021-06-20 13:35:28 | INFO | train_inner | epoch 005: 1511 / 3002 loss=2.387, ppl=5.23, wps=5817.9, ups=0.09, wpb=64751, bsz=128, num_updates=13440, lr=9.99005e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=154602
2021-06-20 13:35:39 | INFO | train_inner | epoch 005: 1512 / 3002 loss=2.379, ppl=5.2, wps=5904.1, ups=0.09, wpb=64818, bsz=128, num_updates=13441, lr=9.99005e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=154613
2021-06-20 13:35:50 | INFO | train_inner | epoch 005: 1513 / 3002 loss=2.392, ppl=5.25, wps=5829.1, ups=0.09, wpb=64926, bsz=128, num_updates=13442, lr=9.99005e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=154624
2021-06-20 13:36:01 | INFO | train_inner | epoch 005: 1514 / 3002 loss=2.444, ppl=5.44, wps=5823, ups=0.09, wpb=64815, bsz=128, num_updates=13443, lr=9.99004e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=154635
2021-06-20 13:36:12 | INFO | train_inner | epoch 005: 1515 / 3002 loss=2.45, ppl=5.46, wps=5861.4, ups=0.09, wpb=64865, bsz=128, num_updates=13444, lr=9.99004e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=154646
2021-06-20 13:36:23 | INFO | train_inner | epoch 005: 1516 / 3002 loss=2.592, ppl=6.03, wps=5763.5, ups=0.09, wpb=64872, bsz=128, num_updates=13445, lr=9.99004e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=154658
2021-06-20 13:36:34 | INFO | train_inner | epoch 005: 1517 / 3002 loss=2.514, ppl=5.71, wps=5781, ups=0.09, wpb=64818, bsz=128, num_updates=13446, lr=9.99004e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=154669
2021-06-20 13:36:46 | INFO | train_inner | epoch 005: 1518 / 3002 loss=2.403, ppl=5.29, wps=5754.2, ups=0.09, wpb=64807, bsz=128, num_updates=13447, lr=9.99004e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=154680
2021-06-20 13:36:57 | INFO | train_inner | epoch 005: 1519 / 3002 loss=2.604, ppl=6.08, wps=5748.3, ups=0.09, wpb=64825, bsz=128, num_updates=13448, lr=9.99004e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=154691
2021-06-20 13:37:08 | INFO | train_inner | epoch 005: 1520 / 3002 loss=2.581, ppl=5.99, wps=5831.9, ups=0.09, wpb=64826, bsz=128, num_updates=13449, lr=9.99004e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=154702
2021-06-20 13:37:19 | INFO | train_inner | epoch 005: 1521 / 3002 loss=2.466, ppl=5.52, wps=5822.8, ups=0.09, wpb=64917, bsz=128, num_updates=13450, lr=9.99004e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=154714
2021-06-20 13:37:30 | INFO | train_inner | epoch 005: 1522 / 3002 loss=2.47, ppl=5.54, wps=5788.9, ups=0.09, wpb=64796, bsz=128, num_updates=13451, lr=9.99004e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=154725
2021-06-20 13:37:42 | INFO | train_inner | epoch 005: 1523 / 3002 loss=2.579, ppl=5.97, wps=5808.3, ups=0.09, wpb=64833, bsz=128, num_updates=13452, lr=9.99004e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=154736
2021-06-20 13:37:53 | INFO | train_inner | epoch 005: 1524 / 3002 loss=2.503, ppl=5.67, wps=5913.3, ups=0.09, wpb=64887, bsz=128, num_updates=13453, lr=9.99004e-05, gnorm=2.109, loss_scale=8, train_wall=11, gb_free=2.8, wall=154747
2021-06-20 13:38:04 | INFO | train_inner | epoch 005: 1525 / 3002 loss=2.694, ppl=6.47, wps=5920.1, ups=0.09, wpb=64905, bsz=128, num_updates=13454, lr=9.99004e-05, gnorm=2.051, loss_scale=8, train_wall=10, gb_free=2.8, wall=154758
2021-06-20 13:38:15 | INFO | train_inner | epoch 005: 1526 / 3002 loss=2.465, ppl=5.52, wps=5816.6, ups=0.09, wpb=64856, bsz=128, num_updates=13455, lr=9.99004e-05, gnorm=1.857, loss_scale=8, train_wall=11, gb_free=2.8, wall=154769
2021-06-20 13:38:26 | INFO | train_inner | epoch 005: 1527 / 3002 loss=2.355, ppl=5.12, wps=5736.4, ups=0.09, wpb=64860, bsz=128, num_updates=13456, lr=9.99003e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=154780
2021-06-20 13:38:37 | INFO | train_inner | epoch 005: 1528 / 3002 loss=2.51, ppl=5.7, wps=5740.1, ups=0.09, wpb=64854, bsz=128, num_updates=13457, lr=9.99003e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=154792
2021-06-20 13:38:48 | INFO | train_inner | epoch 005: 1529 / 3002 loss=2.591, ppl=6.03, wps=5821, ups=0.09, wpb=64790, bsz=128, num_updates=13458, lr=9.99003e-05, gnorm=2.247, loss_scale=8, train_wall=11, gb_free=2.8, wall=154803
2021-06-20 13:38:59 | INFO | train_inner | epoch 005: 1530 / 3002 loss=2.433, ppl=5.4, wps=5942.2, ups=0.09, wpb=64843, bsz=128, num_updates=13459, lr=9.99003e-05, gnorm=1.973, loss_scale=8, train_wall=10, gb_free=2.8, wall=154814
2021-06-20 13:39:11 | INFO | train_inner | epoch 005: 1531 / 3002 loss=2.499, ppl=5.65, wps=5801.3, ups=0.09, wpb=64861, bsz=128, num_updates=13460, lr=9.99003e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=154825
2021-06-20 13:39:21 | INFO | train_inner | epoch 005: 1532 / 3002 loss=2.538, ppl=5.81, wps=5962.9, ups=0.09, wpb=64865, bsz=128, num_updates=13461, lr=9.99003e-05, gnorm=1.966, loss_scale=8, train_wall=10, gb_free=2.8, wall=154836
2021-06-20 13:39:32 | INFO | train_inner | epoch 005: 1533 / 3002 loss=2.6, ppl=6.06, wps=5907, ups=0.09, wpb=64877, bsz=128, num_updates=13462, lr=9.99003e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=154847
2021-06-20 13:39:43 | INFO | train_inner | epoch 005: 1534 / 3002 loss=2.426, ppl=5.37, wps=5942.3, ups=0.09, wpb=64916, bsz=128, num_updates=13463, lr=9.99003e-05, gnorm=2.007, loss_scale=8, train_wall=10, gb_free=2.8, wall=154858
2021-06-20 13:39:54 | INFO | train_inner | epoch 005: 1535 / 3002 loss=2.585, ppl=6, wps=5884.9, ups=0.09, wpb=64820, bsz=128, num_updates=13464, lr=9.99003e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=154869
2021-06-20 13:40:05 | INFO | train_inner | epoch 005: 1536 / 3002 loss=2.59, ppl=6.02, wps=5803.1, ups=0.09, wpb=64785, bsz=128, num_updates=13465, lr=9.99003e-05, gnorm=2.111, loss_scale=8, train_wall=11, gb_free=2.8, wall=154880
2021-06-20 13:40:17 | INFO | train_inner | epoch 005: 1537 / 3002 loss=2.574, ppl=5.95, wps=5849.9, ups=0.09, wpb=64810, bsz=128, num_updates=13466, lr=9.99003e-05, gnorm=2.069, loss_scale=8, train_wall=11, gb_free=2.8, wall=154891
2021-06-20 13:40:28 | INFO | train_inner | epoch 005: 1538 / 3002 loss=2.649, ppl=6.27, wps=5813.8, ups=0.09, wpb=64842, bsz=128, num_updates=13467, lr=9.99003e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=154902
2021-06-20 13:40:39 | INFO | train_inner | epoch 005: 1539 / 3002 loss=2.589, ppl=6.01, wps=5905, ups=0.09, wpb=64781, bsz=128, num_updates=13468, lr=9.99002e-05, gnorm=2.028, loss_scale=8, train_wall=10, gb_free=2.8, wall=154913
2021-06-20 13:40:50 | INFO | train_inner | epoch 005: 1540 / 3002 loss=2.637, ppl=6.22, wps=5902.1, ups=0.09, wpb=64826, bsz=128, num_updates=13469, lr=9.99002e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=154924
2021-06-20 13:41:01 | INFO | train_inner | epoch 005: 1541 / 3002 loss=2.507, ppl=5.68, wps=5929.5, ups=0.09, wpb=64871, bsz=128, num_updates=13470, lr=9.99002e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=154935
2021-06-20 13:41:12 | INFO | train_inner | epoch 005: 1542 / 3002 loss=2.643, ppl=6.25, wps=5858.4, ups=0.09, wpb=64911, bsz=128, num_updates=13471, lr=9.99002e-05, gnorm=2.052, loss_scale=8, train_wall=11, gb_free=2.8, wall=154946
2021-06-20 13:41:23 | INFO | train_inner | epoch 005: 1543 / 3002 loss=2.457, ppl=5.49, wps=5795.5, ups=0.09, wpb=64749, bsz=128, num_updates=13472, lr=9.99002e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=154957
2021-06-20 13:41:34 | INFO | train_inner | epoch 005: 1544 / 3002 loss=2.464, ppl=5.52, wps=5975.8, ups=0.09, wpb=64856, bsz=128, num_updates=13473, lr=9.99002e-05, gnorm=2.022, loss_scale=8, train_wall=10, gb_free=2.8, wall=154968
2021-06-20 13:41:45 | INFO | train_inner | epoch 005: 1545 / 3002 loss=2.446, ppl=5.45, wps=5761.9, ups=0.09, wpb=64884, bsz=128, num_updates=13474, lr=9.99002e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=154979
2021-06-20 13:41:56 | INFO | train_inner | epoch 005: 1546 / 3002 loss=2.497, ppl=5.64, wps=5862.8, ups=0.09, wpb=64785, bsz=128, num_updates=13475, lr=9.99002e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=154990
2021-06-20 13:42:07 | INFO | train_inner | epoch 005: 1547 / 3002 loss=2.491, ppl=5.62, wps=5876.9, ups=0.09, wpb=64820, bsz=128, num_updates=13476, lr=9.99002e-05, gnorm=2.029, loss_scale=8, train_wall=11, gb_free=2.8, wall=155001
2021-06-20 13:42:18 | INFO | train_inner | epoch 005: 1548 / 3002 loss=2.64, ppl=6.23, wps=5889.5, ups=0.09, wpb=64844, bsz=128, num_updates=13477, lr=9.99002e-05, gnorm=2.067, loss_scale=8, train_wall=11, gb_free=2.8, wall=155012
2021-06-20 13:42:29 | INFO | train_inner | epoch 005: 1549 / 3002 loss=2.407, ppl=5.3, wps=5855.5, ups=0.09, wpb=64804, bsz=128, num_updates=13478, lr=9.99002e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=155023
2021-06-20 13:42:40 | INFO | train_inner | epoch 005: 1550 / 3002 loss=2.611, ppl=6.11, wps=5909.1, ups=0.09, wpb=64841, bsz=128, num_updates=13479, lr=9.99002e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=155034
2021-06-20 13:42:51 | INFO | train_inner | epoch 005: 1551 / 3002 loss=2.433, ppl=5.4, wps=5917, ups=0.09, wpb=64794, bsz=128, num_updates=13480, lr=9.99002e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=155045
2021-06-20 13:43:02 | INFO | train_inner | epoch 005: 1552 / 3002 loss=2.432, ppl=5.4, wps=5830.9, ups=0.09, wpb=64826, bsz=128, num_updates=13481, lr=9.99001e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=155057
2021-06-20 13:43:13 | INFO | train_inner | epoch 005: 1553 / 3002 loss=2.485, ppl=5.6, wps=5853.1, ups=0.09, wpb=64902, bsz=128, num_updates=13482, lr=9.99001e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=155068
2021-06-20 13:43:24 | INFO | train_inner | epoch 005: 1554 / 3002 loss=2.6, ppl=6.06, wps=5812.8, ups=0.09, wpb=64741, bsz=128, num_updates=13483, lr=9.99001e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=155079
2021-06-20 13:43:35 | INFO | train_inner | epoch 005: 1555 / 3002 loss=2.561, ppl=5.9, wps=5929.9, ups=0.09, wpb=64881, bsz=128, num_updates=13484, lr=9.99001e-05, gnorm=1.967, loss_scale=16, train_wall=10, gb_free=2.8, wall=155090
2021-06-20 13:43:46 | INFO | train_inner | epoch 005: 1556 / 3002 loss=2.384, ppl=5.22, wps=5950.5, ups=0.09, wpb=64916, bsz=128, num_updates=13485, lr=9.99001e-05, gnorm=1.993, loss_scale=16, train_wall=10, gb_free=2.8, wall=155101
2021-06-20 13:43:57 | INFO | train_inner | epoch 005: 1557 / 3002 loss=2.663, ppl=6.33, wps=5795.1, ups=0.09, wpb=64825, bsz=128, num_updates=13486, lr=9.99001e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=155112
2021-06-20 13:44:08 | INFO | train_inner | epoch 005: 1558 / 3002 loss=2.561, ppl=5.9, wps=5972.7, ups=0.09, wpb=64895, bsz=128, num_updates=13487, lr=9.99001e-05, gnorm=1.972, loss_scale=16, train_wall=10, gb_free=2.8, wall=155123
2021-06-20 13:44:19 | INFO | train_inner | epoch 005: 1559 / 3002 loss=2.384, ppl=5.22, wps=5959.5, ups=0.09, wpb=64854, bsz=128, num_updates=13488, lr=9.99001e-05, gnorm=2.143, loss_scale=16, train_wall=10, gb_free=2.8, wall=155134
2021-06-20 13:44:30 | INFO | train_inner | epoch 005: 1560 / 3002 loss=2.426, ppl=5.37, wps=5824.7, ups=0.09, wpb=64814, bsz=128, num_updates=13489, lr=9.99001e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=155145
2021-06-20 13:44:42 | INFO | train_inner | epoch 005: 1561 / 3002 loss=2.666, ppl=6.35, wps=5709.3, ups=0.09, wpb=64774, bsz=128, num_updates=13490, lr=9.99001e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=155156
2021-06-20 13:44:53 | INFO | train_inner | epoch 005: 1562 / 3002 loss=2.597, ppl=6.05, wps=5746.1, ups=0.09, wpb=64841, bsz=128, num_updates=13491, lr=9.99001e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=155167
2021-06-20 13:45:04 | INFO | train_inner | epoch 005: 1563 / 3002 loss=2.568, ppl=5.93, wps=5922.5, ups=0.09, wpb=64918, bsz=128, num_updates=13492, lr=9.99001e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=155178
2021-06-20 13:45:15 | INFO | train_inner | epoch 005: 1564 / 3002 loss=2.519, ppl=5.73, wps=5945.3, ups=0.09, wpb=64599, bsz=128, num_updates=13493, lr=9.99e-05, gnorm=1.999, loss_scale=16, train_wall=10, gb_free=2.8, wall=155189
2021-06-20 13:45:26 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 13:45:37 | INFO | train_inner | epoch 005: 1566 / 3002 loss=2.557, ppl=5.88, wps=2926.5, ups=0.05, wpb=64846, bsz=128, num_updates=13494, lr=9.99e-05, gnorm=1.995, loss_scale=8, train_wall=21, gb_free=2.8, wall=155211
2021-06-20 13:45:48 | INFO | train_inner | epoch 005: 1567 / 3002 loss=2.556, ppl=5.88, wps=5822.3, ups=0.09, wpb=64906, bsz=128, num_updates=13495, lr=9.99e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=155222
2021-06-20 13:45:59 | INFO | train_inner | epoch 005: 1568 / 3002 loss=2.503, ppl=5.67, wps=5814.7, ups=0.09, wpb=64847, bsz=128, num_updates=13496, lr=9.99e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=155234
2021-06-20 13:46:10 | INFO | train_inner | epoch 005: 1569 / 3002 loss=2.429, ppl=5.39, wps=5912.8, ups=0.09, wpb=64893, bsz=128, num_updates=13497, lr=9.99e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=155245
2021-06-20 13:46:21 | INFO | train_inner | epoch 005: 1570 / 3002 loss=2.259, ppl=4.79, wps=5980.5, ups=0.09, wpb=64804, bsz=128, num_updates=13498, lr=9.99e-05, gnorm=2.02, loss_scale=8, train_wall=10, gb_free=2.8, wall=155255
2021-06-20 13:46:32 | INFO | train_inner | epoch 005: 1571 / 3002 loss=2.584, ppl=6, wps=5840.6, ups=0.09, wpb=64832, bsz=128, num_updates=13499, lr=9.99e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=155266
2021-06-20 13:46:43 | INFO | train_inner | epoch 005: 1572 / 3002 loss=2.707, ppl=6.53, wps=5763.3, ups=0.09, wpb=64828, bsz=128, num_updates=13500, lr=9.99e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=155278
2021-06-20 13:46:55 | INFO | train_inner | epoch 005: 1573 / 3002 loss=2.595, ppl=6.04, wps=5766.9, ups=0.09, wpb=64867, bsz=128, num_updates=13501, lr=9.99e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=155289
2021-06-20 13:47:06 | INFO | train_inner | epoch 005: 1574 / 3002 loss=2.483, ppl=5.59, wps=5730.9, ups=0.09, wpb=64765, bsz=128, num_updates=13502, lr=9.99e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=155300
2021-06-20 13:47:17 | INFO | train_inner | epoch 005: 1575 / 3002 loss=2.461, ppl=5.51, wps=5993.8, ups=0.09, wpb=64889, bsz=128, num_updates=13503, lr=9.99e-05, gnorm=1.914, loss_scale=8, train_wall=10, gb_free=2.8, wall=155311
2021-06-20 13:47:28 | INFO | train_inner | epoch 005: 1576 / 3002 loss=2.534, ppl=5.79, wps=5719.6, ups=0.09, wpb=64840, bsz=128, num_updates=13504, lr=9.99e-05, gnorm=1.872, loss_scale=8, train_wall=11, gb_free=2.8, wall=155322
2021-06-20 13:47:39 | INFO | train_inner | epoch 005: 1577 / 3002 loss=2.46, ppl=5.5, wps=5831.2, ups=0.09, wpb=64902, bsz=128, num_updates=13505, lr=9.99e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=155334
2021-06-20 13:47:50 | INFO | train_inner | epoch 005: 1578 / 3002 loss=2.656, ppl=6.3, wps=5845.1, ups=0.09, wpb=64826, bsz=128, num_updates=13506, lr=9.98999e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=155345
2021-06-20 13:48:02 | INFO | train_inner | epoch 005: 1579 / 3002 loss=2.41, ppl=5.31, wps=5787.5, ups=0.09, wpb=64804, bsz=128, num_updates=13507, lr=9.98999e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=155356
2021-06-20 13:48:13 | INFO | train_inner | epoch 005: 1580 / 3002 loss=2.729, ppl=6.63, wps=5683.5, ups=0.09, wpb=64795, bsz=128, num_updates=13508, lr=9.98999e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=155367
2021-06-20 13:48:24 | INFO | train_inner | epoch 005: 1581 / 3002 loss=2.461, ppl=5.5, wps=5932.7, ups=0.09, wpb=64854, bsz=128, num_updates=13509, lr=9.98999e-05, gnorm=1.989, loss_scale=8, train_wall=10, gb_free=2.8, wall=155378
2021-06-20 13:48:35 | INFO | train_inner | epoch 005: 1582 / 3002 loss=2.352, ppl=5.1, wps=5906.3, ups=0.09, wpb=64834, bsz=128, num_updates=13510, lr=9.98999e-05, gnorm=2.177, loss_scale=8, train_wall=10, gb_free=2.8, wall=155389
2021-06-20 13:48:46 | INFO | train_inner | epoch 005: 1583 / 3002 loss=2.531, ppl=5.78, wps=5763.8, ups=0.09, wpb=64810, bsz=128, num_updates=13511, lr=9.98999e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=155400
2021-06-20 13:48:57 | INFO | train_inner | epoch 005: 1584 / 3002 loss=2.439, ppl=5.42, wps=5859.9, ups=0.09, wpb=64820, bsz=128, num_updates=13512, lr=9.98999e-05, gnorm=1.885, loss_scale=8, train_wall=11, gb_free=2.8, wall=155411
2021-06-20 13:49:08 | INFO | train_inner | epoch 005: 1585 / 3002 loss=2.395, ppl=5.26, wps=5766.7, ups=0.09, wpb=64765, bsz=128, num_updates=13513, lr=9.98999e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=155423
2021-06-20 13:49:20 | INFO | train_inner | epoch 005: 1586 / 3002 loss=2.616, ppl=6.13, wps=5824, ups=0.09, wpb=64824, bsz=128, num_updates=13514, lr=9.98999e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=155434
2021-06-20 13:49:31 | INFO | train_inner | epoch 005: 1587 / 3002 loss=2.58, ppl=5.98, wps=5821.8, ups=0.09, wpb=64811, bsz=128, num_updates=13515, lr=9.98999e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=155445
2021-06-20 13:49:42 | INFO | train_inner | epoch 005: 1588 / 3002 loss=2.519, ppl=5.73, wps=5853.5, ups=0.09, wpb=64756, bsz=128, num_updates=13516, lr=9.98999e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=155456
2021-06-20 13:49:53 | INFO | train_inner | epoch 005: 1589 / 3002 loss=2.407, ppl=5.3, wps=5780.2, ups=0.09, wpb=64868, bsz=128, num_updates=13517, lr=9.98999e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=155467
2021-06-20 13:50:04 | INFO | train_inner | epoch 005: 1590 / 3002 loss=2.57, ppl=5.94, wps=5782.3, ups=0.09, wpb=64704, bsz=128, num_updates=13518, lr=9.98998e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=155478
2021-06-20 13:50:15 | INFO | train_inner | epoch 005: 1591 / 3002 loss=2.586, ppl=6, wps=5729.8, ups=0.09, wpb=64781, bsz=128, num_updates=13519, lr=9.98998e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=155490
2021-06-20 13:50:26 | INFO | train_inner | epoch 005: 1592 / 3002 loss=2.503, ppl=5.67, wps=5892.8, ups=0.09, wpb=64847, bsz=128, num_updates=13520, lr=9.98998e-05, gnorm=2.092, loss_scale=8, train_wall=11, gb_free=2.8, wall=155501
2021-06-20 13:50:37 | INFO | train_inner | epoch 005: 1593 / 3002 loss=2.53, ppl=5.78, wps=5922.4, ups=0.09, wpb=64859, bsz=128, num_updates=13521, lr=9.98998e-05, gnorm=2.087, loss_scale=8, train_wall=10, gb_free=2.8, wall=155512
2021-06-20 13:50:48 | INFO | train_inner | epoch 005: 1594 / 3002 loss=2.429, ppl=5.38, wps=5942, ups=0.09, wpb=64889, bsz=128, num_updates=13522, lr=9.98998e-05, gnorm=1.892, loss_scale=8, train_wall=10, gb_free=2.8, wall=155523
2021-06-20 13:50:59 | INFO | train_inner | epoch 005: 1595 / 3002 loss=2.414, ppl=5.33, wps=5888.1, ups=0.09, wpb=64733, bsz=128, num_updates=13523, lr=9.98998e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=155534
2021-06-20 13:51:10 | INFO | train_inner | epoch 005: 1596 / 3002 loss=2.498, ppl=5.65, wps=5838.3, ups=0.09, wpb=64818, bsz=128, num_updates=13524, lr=9.98998e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=155545
2021-06-20 13:51:21 | INFO | train_inner | epoch 005: 1597 / 3002 loss=2.407, ppl=5.31, wps=6035.5, ups=0.09, wpb=64899, bsz=128, num_updates=13525, lr=9.98998e-05, gnorm=1.933, loss_scale=8, train_wall=10, gb_free=2.8, wall=155555
2021-06-20 13:51:32 | INFO | train_inner | epoch 005: 1598 / 3002 loss=2.511, ppl=5.7, wps=5879, ups=0.09, wpb=64853, bsz=128, num_updates=13526, lr=9.98998e-05, gnorm=2.221, loss_scale=8, train_wall=11, gb_free=2.8, wall=155567
2021-06-20 13:51:43 | INFO | train_inner | epoch 005: 1599 / 3002 loss=2.426, ppl=5.37, wps=5928, ups=0.09, wpb=64762, bsz=128, num_updates=13527, lr=9.98998e-05, gnorm=1.962, loss_scale=8, train_wall=10, gb_free=2.8, wall=155577
2021-06-20 13:51:54 | INFO | train_inner | epoch 005: 1600 / 3002 loss=2.365, ppl=5.15, wps=5797.3, ups=0.09, wpb=64807, bsz=128, num_updates=13528, lr=9.98998e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=155589
2021-06-20 13:52:05 | INFO | train_inner | epoch 005: 1601 / 3002 loss=2.651, ppl=6.28, wps=5822.8, ups=0.09, wpb=64832, bsz=128, num_updates=13529, lr=9.98998e-05, gnorm=2.049, loss_scale=8, train_wall=11, gb_free=2.8, wall=155600
2021-06-20 13:52:17 | INFO | train_inner | epoch 005: 1602 / 3002 loss=2.672, ppl=6.38, wps=5755.7, ups=0.09, wpb=64853, bsz=128, num_updates=13530, lr=9.98998e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=155611
2021-06-20 13:52:28 | INFO | train_inner | epoch 005: 1603 / 3002 loss=2.531, ppl=5.78, wps=5748.6, ups=0.09, wpb=64822, bsz=128, num_updates=13531, lr=9.98997e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=155622
2021-06-20 13:52:39 | INFO | train_inner | epoch 005: 1604 / 3002 loss=2.598, ppl=6.05, wps=5757.1, ups=0.09, wpb=64842, bsz=128, num_updates=13532, lr=9.98997e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=155634
2021-06-20 13:52:50 | INFO | train_inner | epoch 005: 1605 / 3002 loss=2.302, ppl=4.93, wps=5990.6, ups=0.09, wpb=64877, bsz=128, num_updates=13533, lr=9.98997e-05, gnorm=1.852, loss_scale=8, train_wall=10, gb_free=2.8, wall=155644
2021-06-20 13:53:01 | INFO | train_inner | epoch 005: 1606 / 3002 loss=2.528, ppl=5.77, wps=5948.8, ups=0.09, wpb=64820, bsz=128, num_updates=13534, lr=9.98997e-05, gnorm=1.915, loss_scale=8, train_wall=10, gb_free=2.8, wall=155655
2021-06-20 13:53:12 | INFO | train_inner | epoch 005: 1607 / 3002 loss=2.576, ppl=5.96, wps=5853.6, ups=0.09, wpb=64851, bsz=128, num_updates=13535, lr=9.98997e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=155666
2021-06-20 13:53:23 | INFO | train_inner | epoch 005: 1608 / 3002 loss=2.467, ppl=5.53, wps=5801.4, ups=0.09, wpb=64824, bsz=128, num_updates=13536, lr=9.98997e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=155678
2021-06-20 13:53:34 | INFO | train_inner | epoch 005: 1609 / 3002 loss=2.419, ppl=5.35, wps=5826.8, ups=0.09, wpb=64837, bsz=128, num_updates=13537, lr=9.98997e-05, gnorm=1.923, loss_scale=8, train_wall=11, gb_free=2.8, wall=155689
2021-06-20 13:53:46 | INFO | train_inner | epoch 005: 1610 / 3002 loss=2.659, ppl=6.31, wps=5684.3, ups=0.09, wpb=64792, bsz=128, num_updates=13538, lr=9.98997e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=155700
2021-06-20 13:53:57 | INFO | train_inner | epoch 005: 1611 / 3002 loss=2.474, ppl=5.55, wps=5803.3, ups=0.09, wpb=64746, bsz=128, num_updates=13539, lr=9.98997e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=155711
2021-06-20 13:54:08 | INFO | train_inner | epoch 005: 1612 / 3002 loss=2.319, ppl=4.99, wps=5803.9, ups=0.09, wpb=64794, bsz=128, num_updates=13540, lr=9.98997e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=155722
2021-06-20 13:54:19 | INFO | train_inner | epoch 005: 1613 / 3002 loss=2.558, ppl=5.89, wps=5763, ups=0.09, wpb=64820, bsz=128, num_updates=13541, lr=9.98997e-05, gnorm=2.457, loss_scale=8, train_wall=11, gb_free=2.8, wall=155734
2021-06-20 13:54:30 | INFO | train_inner | epoch 005: 1614 / 3002 loss=2.643, ppl=6.25, wps=5810.8, ups=0.09, wpb=64759, bsz=128, num_updates=13542, lr=9.98997e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=155745
2021-06-20 13:54:42 | INFO | train_inner | epoch 005: 1615 / 3002 loss=2.53, ppl=5.78, wps=5764.8, ups=0.09, wpb=64812, bsz=128, num_updates=13543, lr=9.98996e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=155756
2021-06-20 13:54:53 | INFO | train_inner | epoch 005: 1616 / 3002 loss=2.505, ppl=5.67, wps=5996.3, ups=0.09, wpb=64910, bsz=128, num_updates=13544, lr=9.98996e-05, gnorm=1.944, loss_scale=8, train_wall=10, gb_free=2.8, wall=155767
2021-06-20 13:55:04 | INFO | train_inner | epoch 005: 1617 / 3002 loss=2.46, ppl=5.5, wps=5786.7, ups=0.09, wpb=64779, bsz=128, num_updates=13545, lr=9.98996e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=155778
2021-06-20 13:55:15 | INFO | train_inner | epoch 005: 1618 / 3002 loss=2.482, ppl=5.59, wps=5761.1, ups=0.09, wpb=64852, bsz=128, num_updates=13546, lr=9.98996e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=155789
2021-06-20 13:55:26 | INFO | train_inner | epoch 005: 1619 / 3002 loss=2.422, ppl=5.36, wps=5707.9, ups=0.09, wpb=64864, bsz=128, num_updates=13547, lr=9.98996e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=155801
2021-06-20 13:55:38 | INFO | train_inner | epoch 005: 1620 / 3002 loss=2.501, ppl=5.66, wps=5750.7, ups=0.09, wpb=64775, bsz=128, num_updates=13548, lr=9.98996e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=155812
2021-06-20 13:55:49 | INFO | train_inner | epoch 005: 1621 / 3002 loss=2.585, ppl=6, wps=5855, ups=0.09, wpb=64835, bsz=128, num_updates=13549, lr=9.98996e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=155823
2021-06-20 13:56:00 | INFO | train_inner | epoch 005: 1622 / 3002 loss=2.547, ppl=5.85, wps=5882.4, ups=0.09, wpb=64794, bsz=128, num_updates=13550, lr=9.98996e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=155834
2021-06-20 13:56:11 | INFO | train_inner | epoch 005: 1623 / 3002 loss=2.488, ppl=5.61, wps=5816.2, ups=0.09, wpb=64818, bsz=128, num_updates=13551, lr=9.98996e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=155845
2021-06-20 13:56:22 | INFO | train_inner | epoch 005: 1624 / 3002 loss=2.54, ppl=5.82, wps=5860, ups=0.09, wpb=64774, bsz=128, num_updates=13552, lr=9.98996e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=155856
2021-06-20 13:56:33 | INFO | train_inner | epoch 005: 1625 / 3002 loss=2.56, ppl=5.9, wps=5750.1, ups=0.09, wpb=64811, bsz=128, num_updates=13553, lr=9.98996e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=155867
2021-06-20 13:56:44 | INFO | train_inner | epoch 005: 1626 / 3002 loss=2.647, ppl=6.26, wps=5890.5, ups=0.09, wpb=64807, bsz=128, num_updates=13554, lr=9.98996e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=155878
2021-06-20 13:56:55 | INFO | train_inner | epoch 005: 1627 / 3002 loss=2.591, ppl=6.03, wps=5756.1, ups=0.09, wpb=64791, bsz=128, num_updates=13555, lr=9.98996e-05, gnorm=4.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=155890
2021-06-20 13:57:07 | INFO | train_inner | epoch 005: 1628 / 3002 loss=2.518, ppl=5.73, wps=5773.2, ups=0.09, wpb=64828, bsz=128, num_updates=13556, lr=9.98995e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=155901
2021-06-20 13:57:18 | INFO | train_inner | epoch 005: 1629 / 3002 loss=2.612, ppl=6.11, wps=5865.9, ups=0.09, wpb=64910, bsz=128, num_updates=13557, lr=9.98995e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=155912
2021-06-20 13:57:29 | INFO | train_inner | epoch 005: 1630 / 3002 loss=2.483, ppl=5.59, wps=5799.9, ups=0.09, wpb=64858, bsz=128, num_updates=13558, lr=9.98995e-05, gnorm=3.089, loss_scale=8, train_wall=11, gb_free=2.8, wall=155923
2021-06-20 13:57:40 | INFO | train_inner | epoch 005: 1631 / 3002 loss=2.397, ppl=5.27, wps=5955.4, ups=0.09, wpb=64826, bsz=128, num_updates=13559, lr=9.98995e-05, gnorm=2.084, loss_scale=8, train_wall=10, gb_free=2.8, wall=155934
2021-06-20 13:57:51 | INFO | train_inner | epoch 005: 1632 / 3002 loss=2.613, ppl=6.12, wps=5870, ups=0.09, wpb=64852, bsz=128, num_updates=13560, lr=9.98995e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=155945
2021-06-20 13:58:02 | INFO | train_inner | epoch 005: 1633 / 3002 loss=2.474, ppl=5.55, wps=5806, ups=0.09, wpb=64777, bsz=128, num_updates=13561, lr=9.98995e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=155956
2021-06-20 13:58:13 | INFO | train_inner | epoch 005: 1634 / 3002 loss=2.58, ppl=5.98, wps=5791.6, ups=0.09, wpb=64794, bsz=128, num_updates=13562, lr=9.98995e-05, gnorm=2.08, loss_scale=8, train_wall=11, gb_free=2.8, wall=155968
2021-06-20 13:58:24 | INFO | train_inner | epoch 005: 1635 / 3002 loss=2.438, ppl=5.42, wps=5719.1, ups=0.09, wpb=64752, bsz=128, num_updates=13563, lr=9.98995e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=155979
2021-06-20 13:58:36 | INFO | train_inner | epoch 005: 1636 / 3002 loss=2.467, ppl=5.53, wps=5878.7, ups=0.09, wpb=64777, bsz=128, num_updates=13564, lr=9.98995e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=155990
2021-06-20 13:58:47 | INFO | train_inner | epoch 005: 1637 / 3002 loss=2.618, ppl=6.14, wps=5836.4, ups=0.09, wpb=64816, bsz=128, num_updates=13565, lr=9.98995e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=156001
2021-06-20 13:58:58 | INFO | train_inner | epoch 005: 1638 / 3002 loss=2.441, ppl=5.43, wps=5796.9, ups=0.09, wpb=64769, bsz=128, num_updates=13566, lr=9.98995e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=156012
2021-06-20 13:59:09 | INFO | train_inner | epoch 005: 1639 / 3002 loss=2.423, ppl=5.36, wps=5835.2, ups=0.09, wpb=64790, bsz=128, num_updates=13567, lr=9.98995e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=156023
2021-06-20 13:59:20 | INFO | train_inner | epoch 005: 1640 / 3002 loss=2.658, ppl=6.31, wps=5879.4, ups=0.09, wpb=64817, bsz=128, num_updates=13568, lr=9.98994e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=156034
2021-06-20 13:59:31 | INFO | train_inner | epoch 005: 1641 / 3002 loss=2.657, ppl=6.31, wps=5886.4, ups=0.09, wpb=64746, bsz=128, num_updates=13569, lr=9.98994e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=156045
2021-06-20 13:59:42 | INFO | train_inner | epoch 005: 1642 / 3002 loss=2.547, ppl=5.84, wps=5835.1, ups=0.09, wpb=64727, bsz=128, num_updates=13570, lr=9.98994e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=156056
2021-06-20 13:59:53 | INFO | train_inner | epoch 005: 1643 / 3002 loss=2.43, ppl=5.39, wps=5766.7, ups=0.09, wpb=64850, bsz=128, num_updates=13571, lr=9.98994e-05, gnorm=5.728, loss_scale=8, train_wall=11, gb_free=2.8, wall=156068
2021-06-20 14:00:04 | INFO | train_inner | epoch 005: 1644 / 3002 loss=2.433, ppl=5.4, wps=5840.9, ups=0.09, wpb=64837, bsz=128, num_updates=13572, lr=9.98994e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=156079
2021-06-20 14:00:15 | INFO | train_inner | epoch 005: 1645 / 3002 loss=2.406, ppl=5.3, wps=5835.4, ups=0.09, wpb=64807, bsz=128, num_updates=13573, lr=9.98994e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=156090
2021-06-20 14:00:27 | INFO | train_inner | epoch 005: 1646 / 3002 loss=2.536, ppl=5.8, wps=5784, ups=0.09, wpb=64877, bsz=128, num_updates=13574, lr=9.98994e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=156101
2021-06-20 14:00:38 | INFO | train_inner | epoch 005: 1647 / 3002 loss=2.566, ppl=5.92, wps=5788, ups=0.09, wpb=64790, bsz=128, num_updates=13575, lr=9.98994e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=156112
2021-06-20 14:00:49 | INFO | train_inner | epoch 005: 1648 / 3002 loss=2.504, ppl=5.67, wps=5783.6, ups=0.09, wpb=64822, bsz=128, num_updates=13576, lr=9.98994e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=156123
2021-06-20 14:01:00 | INFO | train_inner | epoch 005: 1649 / 3002 loss=2.489, ppl=5.61, wps=5784.5, ups=0.09, wpb=64739, bsz=128, num_updates=13577, lr=9.98994e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=156135
2021-06-20 14:01:11 | INFO | train_inner | epoch 005: 1650 / 3002 loss=2.513, ppl=5.71, wps=5777.7, ups=0.09, wpb=64783, bsz=128, num_updates=13578, lr=9.98994e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=156146
2021-06-20 14:01:23 | INFO | train_inner | epoch 005: 1651 / 3002 loss=2.582, ppl=5.99, wps=5822.7, ups=0.09, wpb=64732, bsz=128, num_updates=13579, lr=9.98994e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=156157
2021-06-20 14:01:34 | INFO | train_inner | epoch 005: 1652 / 3002 loss=2.496, ppl=5.64, wps=5921.2, ups=0.09, wpb=64889, bsz=128, num_updates=13580, lr=9.98994e-05, gnorm=2.258, loss_scale=8, train_wall=10, gb_free=2.8, wall=156168
2021-06-20 14:01:45 | INFO | train_inner | epoch 005: 1653 / 3002 loss=2.457, ppl=5.49, wps=5863.9, ups=0.09, wpb=64903, bsz=128, num_updates=13581, lr=9.98993e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=156179
2021-06-20 14:01:56 | INFO | train_inner | epoch 005: 1654 / 3002 loss=2.411, ppl=5.32, wps=5927.9, ups=0.09, wpb=64854, bsz=128, num_updates=13582, lr=9.98993e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=156190
2021-06-20 14:02:07 | INFO | train_inner | epoch 005: 1655 / 3002 loss=2.521, ppl=5.74, wps=5854.2, ups=0.09, wpb=64858, bsz=128, num_updates=13583, lr=9.98993e-05, gnorm=2.494, loss_scale=8, train_wall=11, gb_free=2.8, wall=156201
2021-06-20 14:02:18 | INFO | train_inner | epoch 005: 1656 / 3002 loss=2.581, ppl=5.99, wps=5812.2, ups=0.09, wpb=64751, bsz=128, num_updates=13584, lr=9.98993e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=156212
2021-06-20 14:02:29 | INFO | train_inner | epoch 005: 1657 / 3002 loss=2.351, ppl=5.1, wps=5755.9, ups=0.09, wpb=64749, bsz=128, num_updates=13585, lr=9.98993e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=156223
2021-06-20 14:02:40 | INFO | train_inner | epoch 005: 1658 / 3002 loss=2.489, ppl=5.61, wps=5837.8, ups=0.09, wpb=64854, bsz=128, num_updates=13586, lr=9.98993e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=156234
2021-06-20 14:02:51 | INFO | train_inner | epoch 005: 1659 / 3002 loss=2.398, ppl=5.27, wps=5829.6, ups=0.09, wpb=64810, bsz=128, num_updates=13587, lr=9.98993e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=156246
2021-06-20 14:03:02 | INFO | train_inner | epoch 005: 1660 / 3002 loss=2.423, ppl=5.36, wps=5867.4, ups=0.09, wpb=64862, bsz=128, num_updates=13588, lr=9.98993e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=156257
2021-06-20 14:03:13 | INFO | train_inner | epoch 005: 1661 / 3002 loss=2.63, ppl=6.19, wps=5920.4, ups=0.09, wpb=64842, bsz=128, num_updates=13589, lr=9.98993e-05, gnorm=1.85, loss_scale=8, train_wall=11, gb_free=2.8, wall=156268
2021-06-20 14:03:24 | INFO | train_inner | epoch 005: 1662 / 3002 loss=2.546, ppl=5.84, wps=5862, ups=0.09, wpb=64808, bsz=128, num_updates=13590, lr=9.98993e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=156279
2021-06-20 14:03:35 | INFO | train_inner | epoch 005: 1663 / 3002 loss=2.428, ppl=5.38, wps=5865.6, ups=0.09, wpb=64810, bsz=128, num_updates=13591, lr=9.98993e-05, gnorm=6.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=156290
2021-06-20 14:03:46 | INFO | train_inner | epoch 005: 1664 / 3002 loss=2.452, ppl=5.47, wps=5834, ups=0.09, wpb=64834, bsz=128, num_updates=13592, lr=9.98993e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=156301
2021-06-20 14:03:57 | INFO | train_inner | epoch 005: 1665 / 3002 loss=2.694, ppl=6.47, wps=5928.8, ups=0.09, wpb=64855, bsz=128, num_updates=13593, lr=9.98992e-05, gnorm=1.982, loss_scale=8, train_wall=10, gb_free=2.8, wall=156312
2021-06-20 14:04:09 | INFO | train_inner | epoch 005: 1666 / 3002 loss=2.431, ppl=5.39, wps=5855.4, ups=0.09, wpb=64884, bsz=128, num_updates=13594, lr=9.98992e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=156323
2021-06-20 14:04:20 | INFO | train_inner | epoch 005: 1667 / 3002 loss=2.531, ppl=5.78, wps=5805.6, ups=0.09, wpb=64774, bsz=128, num_updates=13595, lr=9.98992e-05, gnorm=2.162, loss_scale=8, train_wall=11, gb_free=2.8, wall=156334
2021-06-20 14:04:31 | INFO | train_inner | epoch 005: 1668 / 3002 loss=2.591, ppl=6.03, wps=5765.8, ups=0.09, wpb=64789, bsz=128, num_updates=13596, lr=9.98992e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=156345
2021-06-20 14:04:42 | INFO | train_inner | epoch 005: 1669 / 3002 loss=2.614, ppl=6.12, wps=5776.4, ups=0.09, wpb=64848, bsz=128, num_updates=13597, lr=9.98992e-05, gnorm=8.566, loss_scale=8, train_wall=11, gb_free=2.8, wall=156356
2021-06-20 14:04:53 | INFO | train_inner | epoch 005: 1670 / 3002 loss=2.463, ppl=5.51, wps=5874.5, ups=0.09, wpb=64823, bsz=128, num_updates=13598, lr=9.98992e-05, gnorm=2.274, loss_scale=8, train_wall=11, gb_free=2.8, wall=156368
2021-06-20 14:05:04 | INFO | train_inner | epoch 005: 1671 / 3002 loss=2.632, ppl=6.2, wps=5838.4, ups=0.09, wpb=64769, bsz=128, num_updates=13599, lr=9.98992e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=156379
2021-06-20 14:05:15 | INFO | train_inner | epoch 005: 1672 / 3002 loss=2.46, ppl=5.5, wps=5856.6, ups=0.09, wpb=64884, bsz=128, num_updates=13600, lr=9.98992e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=156390
2021-06-20 14:05:27 | INFO | train_inner | epoch 005: 1673 / 3002 loss=2.554, ppl=5.87, wps=5802.1, ups=0.09, wpb=64824, bsz=128, num_updates=13601, lr=9.98992e-05, gnorm=2.611, loss_scale=8, train_wall=11, gb_free=2.8, wall=156401
2021-06-20 14:05:38 | INFO | train_inner | epoch 005: 1674 / 3002 loss=2.628, ppl=6.18, wps=5896.2, ups=0.09, wpb=64863, bsz=128, num_updates=13602, lr=9.98992e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=156412
2021-06-20 14:05:49 | INFO | train_inner | epoch 005: 1675 / 3002 loss=2.547, ppl=5.84, wps=5819.4, ups=0.09, wpb=64815, bsz=128, num_updates=13603, lr=9.98992e-05, gnorm=2.835, loss_scale=8, train_wall=11, gb_free=2.8, wall=156423
2021-06-20 14:06:00 | INFO | train_inner | epoch 005: 1676 / 3002 loss=2.544, ppl=5.83, wps=5862.5, ups=0.09, wpb=64813, bsz=128, num_updates=13604, lr=9.98992e-05, gnorm=2.16, loss_scale=8, train_wall=11, gb_free=2.8, wall=156434
2021-06-20 14:06:11 | INFO | train_inner | epoch 005: 1677 / 3002 loss=2.512, ppl=5.7, wps=5803.2, ups=0.09, wpb=64807, bsz=128, num_updates=13605, lr=9.98992e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=156445
2021-06-20 14:06:22 | INFO | train_inner | epoch 005: 1678 / 3002 loss=2.544, ppl=5.83, wps=5800.3, ups=0.09, wpb=64795, bsz=128, num_updates=13606, lr=9.98991e-05, gnorm=2.812, loss_scale=8, train_wall=11, gb_free=2.8, wall=156456
2021-06-20 14:06:33 | INFO | train_inner | epoch 005: 1679 / 3002 loss=2.429, ppl=5.38, wps=5793, ups=0.09, wpb=64895, bsz=128, num_updates=13607, lr=9.98991e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=156468
2021-06-20 14:06:44 | INFO | train_inner | epoch 005: 1680 / 3002 loss=2.587, ppl=6.01, wps=5833.1, ups=0.09, wpb=64824, bsz=128, num_updates=13608, lr=9.98991e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=156479
2021-06-20 14:06:56 | INFO | train_inner | epoch 005: 1681 / 3002 loss=2.586, ppl=6.01, wps=5786.5, ups=0.09, wpb=64751, bsz=128, num_updates=13609, lr=9.98991e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=156490
2021-06-20 14:07:07 | INFO | train_inner | epoch 005: 1682 / 3002 loss=2.459, ppl=5.5, wps=5916.8, ups=0.09, wpb=64813, bsz=128, num_updates=13610, lr=9.98991e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=156501
2021-06-20 14:07:18 | INFO | train_inner | epoch 005: 1683 / 3002 loss=2.355, ppl=5.11, wps=5764.5, ups=0.09, wpb=64797, bsz=128, num_updates=13611, lr=9.98991e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=156512
2021-06-20 14:07:29 | INFO | train_inner | epoch 005: 1684 / 3002 loss=2.646, ppl=6.26, wps=5772.1, ups=0.09, wpb=64813, bsz=128, num_updates=13612, lr=9.98991e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=156523
2021-06-20 14:07:40 | INFO | train_inner | epoch 005: 1685 / 3002 loss=2.575, ppl=5.96, wps=5946.6, ups=0.09, wpb=64831, bsz=128, num_updates=13613, lr=9.98991e-05, gnorm=2.174, loss_scale=8, train_wall=10, gb_free=2.8, wall=156534
2021-06-20 14:07:51 | INFO | train_inner | epoch 005: 1686 / 3002 loss=2.566, ppl=5.92, wps=5688.6, ups=0.09, wpb=64843, bsz=128, num_updates=13614, lr=9.98991e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=156546
2021-06-20 14:08:02 | INFO | train_inner | epoch 005: 1687 / 3002 loss=2.572, ppl=5.95, wps=5903.7, ups=0.09, wpb=64835, bsz=128, num_updates=13615, lr=9.98991e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=156557
2021-06-20 14:08:14 | INFO | train_inner | epoch 005: 1688 / 3002 loss=2.699, ppl=6.49, wps=5746.9, ups=0.09, wpb=64753, bsz=128, num_updates=13616, lr=9.98991e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=156568
2021-06-20 14:08:25 | INFO | train_inner | epoch 005: 1689 / 3002 loss=2.506, ppl=5.68, wps=5854.4, ups=0.09, wpb=64859, bsz=128, num_updates=13617, lr=9.98991e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=156579
2021-06-20 14:08:36 | INFO | train_inner | epoch 005: 1690 / 3002 loss=2.488, ppl=5.61, wps=5916.6, ups=0.09, wpb=64863, bsz=128, num_updates=13618, lr=9.9899e-05, gnorm=1.889, loss_scale=8, train_wall=10, gb_free=2.8, wall=156590
2021-06-20 14:08:47 | INFO | train_inner | epoch 005: 1691 / 3002 loss=2.37, ppl=5.17, wps=5892.3, ups=0.09, wpb=64824, bsz=128, num_updates=13619, lr=9.9899e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=156601
2021-06-20 14:08:58 | INFO | train_inner | epoch 005: 1692 / 3002 loss=2.518, ppl=5.73, wps=5743.3, ups=0.09, wpb=64842, bsz=128, num_updates=13620, lr=9.9899e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=156612
2021-06-20 14:09:09 | INFO | train_inner | epoch 005: 1693 / 3002 loss=2.621, ppl=6.15, wps=5736.8, ups=0.09, wpb=64822, bsz=128, num_updates=13621, lr=9.9899e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=156624
2021-06-20 14:09:20 | INFO | train_inner | epoch 005: 1694 / 3002 loss=2.634, ppl=6.21, wps=5795.5, ups=0.09, wpb=64779, bsz=128, num_updates=13622, lr=9.9899e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=156635
2021-06-20 14:09:31 | INFO | train_inner | epoch 005: 1695 / 3002 loss=2.572, ppl=5.95, wps=5832.1, ups=0.09, wpb=64810, bsz=128, num_updates=13623, lr=9.9899e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=156646
2021-06-20 14:09:42 | INFO | train_inner | epoch 005: 1696 / 3002 loss=2.527, ppl=5.76, wps=5909.2, ups=0.09, wpb=64828, bsz=128, num_updates=13624, lr=9.9899e-05, gnorm=2.065, loss_scale=16, train_wall=11, gb_free=2.8, wall=156657
2021-06-20 14:09:54 | INFO | train_inner | epoch 005: 1697 / 3002 loss=2.561, ppl=5.9, wps=5809.9, ups=0.09, wpb=64799, bsz=128, num_updates=13625, lr=9.9899e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=156668
2021-06-20 14:10:05 | INFO | train_inner | epoch 005: 1698 / 3002 loss=2.467, ppl=5.53, wps=5891.4, ups=0.09, wpb=64754, bsz=128, num_updates=13626, lr=9.9899e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=156679
2021-06-20 14:10:16 | INFO | train_inner | epoch 005: 1699 / 3002 loss=2.337, ppl=5.05, wps=5900.8, ups=0.09, wpb=64862, bsz=128, num_updates=13627, lr=9.9899e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=156690
2021-06-20 14:10:27 | INFO | train_inner | epoch 005: 1700 / 3002 loss=2.516, ppl=5.72, wps=5822.2, ups=0.09, wpb=64805, bsz=128, num_updates=13628, lr=9.9899e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=156701
2021-06-20 14:10:38 | INFO | train_inner | epoch 005: 1701 / 3002 loss=2.373, ppl=5.18, wps=5896.5, ups=0.09, wpb=64868, bsz=128, num_updates=13629, lr=9.9899e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=156712
2021-06-20 14:10:49 | INFO | train_inner | epoch 005: 1702 / 3002 loss=2.45, ppl=5.47, wps=5817.1, ups=0.09, wpb=64767, bsz=128, num_updates=13630, lr=9.9899e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=156723
2021-06-20 14:11:00 | INFO | train_inner | epoch 005: 1703 / 3002 loss=2.342, ppl=5.07, wps=5877.1, ups=0.09, wpb=64854, bsz=128, num_updates=13631, lr=9.98989e-05, gnorm=1.943, loss_scale=16, train_wall=11, gb_free=2.8, wall=156734
2021-06-20 14:11:11 | INFO | train_inner | epoch 005: 1704 / 3002 loss=2.507, ppl=5.69, wps=5796.1, ups=0.09, wpb=64878, bsz=128, num_updates=13632, lr=9.98989e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=156745
2021-06-20 14:11:22 | INFO | train_inner | epoch 005: 1705 / 3002 loss=2.455, ppl=5.48, wps=5919.7, ups=0.09, wpb=64796, bsz=128, num_updates=13633, lr=9.98989e-05, gnorm=1.9, loss_scale=16, train_wall=10, gb_free=2.8, wall=156756
2021-06-20 14:11:33 | INFO | train_inner | epoch 005: 1706 / 3002 loss=2.638, ppl=6.22, wps=5858.9, ups=0.09, wpb=64810, bsz=128, num_updates=13634, lr=9.98989e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=156767
2021-06-20 14:11:44 | INFO | train_inner | epoch 005: 1707 / 3002 loss=2.618, ppl=6.14, wps=5972.2, ups=0.09, wpb=64891, bsz=128, num_updates=13635, lr=9.98989e-05, gnorm=1.993, loss_scale=16, train_wall=10, gb_free=2.8, wall=156778
2021-06-20 14:11:55 | INFO | train_inner | epoch 005: 1708 / 3002 loss=2.493, ppl=5.63, wps=5823.4, ups=0.09, wpb=64861, bsz=128, num_updates=13636, lr=9.98989e-05, gnorm=1.926, loss_scale=16, train_wall=11, gb_free=2.8, wall=156789
2021-06-20 14:12:06 | INFO | train_inner | epoch 005: 1709 / 3002 loss=2.457, ppl=5.49, wps=5884, ups=0.09, wpb=64851, bsz=128, num_updates=13637, lr=9.98989e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=156800
2021-06-20 14:12:17 | INFO | train_inner | epoch 005: 1710 / 3002 loss=2.483, ppl=5.59, wps=5752.5, ups=0.09, wpb=64859, bsz=128, num_updates=13638, lr=9.98989e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=156812
2021-06-20 14:12:29 | INFO | train_inner | epoch 005: 1711 / 3002 loss=2.4, ppl=5.28, wps=5820, ups=0.09, wpb=64810, bsz=128, num_updates=13639, lr=9.98989e-05, gnorm=2.007, loss_scale=16, train_wall=11, gb_free=2.8, wall=156823
2021-06-20 14:12:40 | INFO | train_inner | epoch 005: 1712 / 3002 loss=2.703, ppl=6.51, wps=5765.5, ups=0.09, wpb=64784, bsz=128, num_updates=13640, lr=9.98989e-05, gnorm=2.104, loss_scale=16, train_wall=11, gb_free=2.8, wall=156834
2021-06-20 14:12:51 | INFO | train_inner | epoch 005: 1713 / 3002 loss=2.495, ppl=5.64, wps=5782.4, ups=0.09, wpb=64869, bsz=128, num_updates=13641, lr=9.98989e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=156845
2021-06-20 14:13:02 | INFO | train_inner | epoch 005: 1714 / 3002 loss=2.479, ppl=5.58, wps=5781.4, ups=0.09, wpb=64849, bsz=128, num_updates=13642, lr=9.98989e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=156857
2021-06-20 14:13:13 | INFO | train_inner | epoch 005: 1715 / 3002 loss=2.54, ppl=5.81, wps=5871.4, ups=0.09, wpb=64779, bsz=128, num_updates=13643, lr=9.98988e-05, gnorm=1.929, loss_scale=16, train_wall=11, gb_free=2.8, wall=156868
2021-06-20 14:13:24 | INFO | train_inner | epoch 005: 1716 / 3002 loss=2.452, ppl=5.47, wps=5781.2, ups=0.09, wpb=64724, bsz=128, num_updates=13644, lr=9.98988e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=156879
2021-06-20 14:13:35 | INFO | train_inner | epoch 005: 1717 / 3002 loss=2.609, ppl=6.1, wps=5870.6, ups=0.09, wpb=64810, bsz=128, num_updates=13645, lr=9.98988e-05, gnorm=2.008, loss_scale=16, train_wall=11, gb_free=2.8, wall=156890
2021-06-20 14:13:47 | INFO | train_inner | epoch 005: 1718 / 3002 loss=2.569, ppl=5.93, wps=5693.9, ups=0.09, wpb=64852, bsz=128, num_updates=13646, lr=9.98988e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=156901
2021-06-20 14:13:58 | INFO | train_inner | epoch 005: 1719 / 3002 loss=2.479, ppl=5.58, wps=5935, ups=0.09, wpb=64831, bsz=128, num_updates=13647, lr=9.98988e-05, gnorm=1.952, loss_scale=16, train_wall=10, gb_free=2.8, wall=156912
2021-06-20 14:14:09 | INFO | train_inner | epoch 005: 1720 / 3002 loss=2.45, ppl=5.46, wps=5836.1, ups=0.09, wpb=64886, bsz=128, num_updates=13648, lr=9.98988e-05, gnorm=2.066, loss_scale=16, train_wall=11, gb_free=2.8, wall=156923
2021-06-20 14:14:20 | INFO | train_inner | epoch 005: 1721 / 3002 loss=2.44, ppl=5.43, wps=5773.6, ups=0.09, wpb=64737, bsz=128, num_updates=13649, lr=9.98988e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=156934
2021-06-20 14:14:31 | INFO | train_inner | epoch 005: 1722 / 3002 loss=2.652, ppl=6.28, wps=6043, ups=0.09, wpb=64896, bsz=128, num_updates=13650, lr=9.98988e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=156945
2021-06-20 14:14:42 | INFO | train_inner | epoch 005: 1723 / 3002 loss=2.553, ppl=5.87, wps=5844.7, ups=0.09, wpb=64861, bsz=128, num_updates=13651, lr=9.98988e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=156956
2021-06-20 14:14:53 | INFO | train_inner | epoch 005: 1724 / 3002 loss=2.535, ppl=5.8, wps=5728.9, ups=0.09, wpb=64794, bsz=128, num_updates=13652, lr=9.98988e-05, gnorm=2.055, loss_scale=16, train_wall=11, gb_free=2.8, wall=156968
2021-06-20 14:15:04 | INFO | train_inner | epoch 005: 1725 / 3002 loss=2.474, ppl=5.56, wps=5842.6, ups=0.09, wpb=64830, bsz=128, num_updates=13653, lr=9.98988e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=156979
2021-06-20 14:15:15 | INFO | train_inner | epoch 005: 1726 / 3002 loss=2.448, ppl=5.46, wps=5826.5, ups=0.09, wpb=64912, bsz=128, num_updates=13654, lr=9.98988e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=156990
2021-06-20 14:15:27 | INFO | train_inner | epoch 005: 1727 / 3002 loss=2.378, ppl=5.2, wps=5872.1, ups=0.09, wpb=64846, bsz=128, num_updates=13655, lr=9.98988e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=157001
2021-06-20 14:15:37 | INFO | train_inner | epoch 005: 1728 / 3002 loss=2.539, ppl=5.81, wps=5923.4, ups=0.09, wpb=64839, bsz=128, num_updates=13656, lr=9.98987e-05, gnorm=1.921, loss_scale=16, train_wall=10, gb_free=2.8, wall=157012
2021-06-20 14:15:49 | INFO | train_inner | epoch 005: 1729 / 3002 loss=2.562, ppl=5.91, wps=5850.8, ups=0.09, wpb=64778, bsz=128, num_updates=13657, lr=9.98987e-05, gnorm=1.888, loss_scale=16, train_wall=11, gb_free=2.8, wall=157023
2021-06-20 14:16:00 | INFO | train_inner | epoch 005: 1730 / 3002 loss=2.459, ppl=5.5, wps=5881.2, ups=0.09, wpb=64827, bsz=128, num_updates=13658, lr=9.98987e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=157034
2021-06-20 14:16:11 | INFO | train_inner | epoch 005: 1731 / 3002 loss=2.542, ppl=5.83, wps=5845.1, ups=0.09, wpb=64748, bsz=128, num_updates=13659, lr=9.98987e-05, gnorm=2.267, loss_scale=16, train_wall=11, gb_free=2.8, wall=157045
2021-06-20 14:16:22 | INFO | train_inner | epoch 005: 1732 / 3002 loss=2.708, ppl=6.54, wps=5785.7, ups=0.09, wpb=64704, bsz=128, num_updates=13660, lr=9.98987e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=157056
2021-06-20 14:16:33 | INFO | train_inner | epoch 005: 1733 / 3002 loss=2.407, ppl=5.3, wps=5749.8, ups=0.09, wpb=64772, bsz=128, num_updates=13661, lr=9.98987e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=157067
2021-06-20 14:16:44 | INFO | train_inner | epoch 005: 1734 / 3002 loss=2.421, ppl=5.36, wps=5826.5, ups=0.09, wpb=64738, bsz=128, num_updates=13662, lr=9.98987e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=157079
2021-06-20 14:16:55 | INFO | train_inner | epoch 005: 1735 / 3002 loss=2.464, ppl=5.52, wps=5838.8, ups=0.09, wpb=64952, bsz=128, num_updates=13663, lr=9.98987e-05, gnorm=1.887, loss_scale=16, train_wall=11, gb_free=2.8, wall=157090
2021-06-20 14:17:06 | INFO | train_inner | epoch 005: 1736 / 3002 loss=2.462, ppl=5.51, wps=5816, ups=0.09, wpb=64867, bsz=128, num_updates=13664, lr=9.98987e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=157101
2021-06-20 14:17:18 | INFO | train_inner | epoch 005: 1737 / 3002 loss=2.497, ppl=5.65, wps=5801, ups=0.09, wpb=64789, bsz=128, num_updates=13665, lr=9.98987e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=157112
2021-06-20 14:17:29 | INFO | train_inner | epoch 005: 1738 / 3002 loss=2.517, ppl=5.73, wps=5959.9, ups=0.09, wpb=64868, bsz=128, num_updates=13666, lr=9.98987e-05, gnorm=2.039, loss_scale=16, train_wall=10, gb_free=2.8, wall=157123
2021-06-20 14:17:40 | INFO | train_inner | epoch 005: 1739 / 3002 loss=2.483, ppl=5.59, wps=5907.6, ups=0.09, wpb=64878, bsz=128, num_updates=13667, lr=9.98987e-05, gnorm=2.021, loss_scale=16, train_wall=11, gb_free=2.8, wall=157134
2021-06-20 14:17:51 | INFO | train_inner | epoch 005: 1740 / 3002 loss=2.534, ppl=5.79, wps=5827.3, ups=0.09, wpb=64722, bsz=128, num_updates=13668, lr=9.98986e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=157145
2021-06-20 14:18:02 | INFO | train_inner | epoch 005: 1741 / 3002 loss=2.628, ppl=6.18, wps=5756.5, ups=0.09, wpb=64812, bsz=128, num_updates=13669, lr=9.98986e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=157156
2021-06-20 14:18:13 | INFO | train_inner | epoch 005: 1742 / 3002 loss=2.558, ppl=5.89, wps=5884.1, ups=0.09, wpb=64939, bsz=128, num_updates=13670, lr=9.98986e-05, gnorm=1.967, loss_scale=16, train_wall=11, gb_free=2.8, wall=157167
2021-06-20 14:18:24 | INFO | train_inner | epoch 005: 1743 / 3002 loss=2.464, ppl=5.52, wps=5825.4, ups=0.09, wpb=64866, bsz=128, num_updates=13671, lr=9.98986e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=157178
2021-06-20 14:18:35 | INFO | train_inner | epoch 005: 1744 / 3002 loss=2.525, ppl=5.76, wps=5858.4, ups=0.09, wpb=64808, bsz=128, num_updates=13672, lr=9.98986e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=157189
2021-06-20 14:18:46 | INFO | train_inner | epoch 005: 1745 / 3002 loss=2.465, ppl=5.52, wps=5901, ups=0.09, wpb=64812, bsz=128, num_updates=13673, lr=9.98986e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=157200
2021-06-20 14:18:57 | INFO | train_inner | epoch 005: 1746 / 3002 loss=2.419, ppl=5.35, wps=5926.2, ups=0.09, wpb=64799, bsz=128, num_updates=13674, lr=9.98986e-05, gnorm=1.945, loss_scale=16, train_wall=10, gb_free=2.8, wall=157211
2021-06-20 14:19:08 | INFO | train_inner | epoch 005: 1747 / 3002 loss=2.44, ppl=5.43, wps=5810.9, ups=0.09, wpb=64771, bsz=128, num_updates=13675, lr=9.98986e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=157223
2021-06-20 14:19:19 | INFO | train_inner | epoch 005: 1748 / 3002 loss=2.408, ppl=5.31, wps=5826.8, ups=0.09, wpb=64831, bsz=128, num_updates=13676, lr=9.98986e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=157234
2021-06-20 14:19:30 | INFO | train_inner | epoch 005: 1749 / 3002 loss=2.491, ppl=5.62, wps=5805.5, ups=0.09, wpb=64859, bsz=128, num_updates=13677, lr=9.98986e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=157245
2021-06-20 14:19:42 | INFO | train_inner | epoch 005: 1750 / 3002 loss=2.535, ppl=5.8, wps=5848.4, ups=0.09, wpb=64874, bsz=128, num_updates=13678, lr=9.98986e-05, gnorm=2.097, loss_scale=16, train_wall=11, gb_free=2.8, wall=157256
2021-06-20 14:19:53 | INFO | train_inner | epoch 005: 1751 / 3002 loss=2.423, ppl=5.36, wps=5732.9, ups=0.09, wpb=64828, bsz=128, num_updates=13679, lr=9.98986e-05, gnorm=1.963, loss_scale=16, train_wall=11, gb_free=2.8, wall=157267
2021-06-20 14:20:04 | INFO | train_inner | epoch 005: 1752 / 3002 loss=2.469, ppl=5.54, wps=5822.9, ups=0.09, wpb=64825, bsz=128, num_updates=13680, lr=9.98986e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=157278
2021-06-20 14:20:15 | INFO | train_inner | epoch 005: 1753 / 3002 loss=2.529, ppl=5.77, wps=5788.1, ups=0.09, wpb=64858, bsz=128, num_updates=13681, lr=9.98985e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=157290
2021-06-20 14:20:26 | INFO | train_inner | epoch 005: 1754 / 3002 loss=2.754, ppl=6.75, wps=5843.9, ups=0.09, wpb=64843, bsz=128, num_updates=13682, lr=9.98985e-05, gnorm=2.073, loss_scale=16, train_wall=11, gb_free=2.8, wall=157301
2021-06-20 14:20:37 | INFO | train_inner | epoch 005: 1755 / 3002 loss=2.456, ppl=5.49, wps=5849.6, ups=0.09, wpb=64728, bsz=128, num_updates=13683, lr=9.98985e-05, gnorm=2.014, loss_scale=16, train_wall=11, gb_free=2.8, wall=157312
2021-06-20 14:20:49 | INFO | train_inner | epoch 005: 1756 / 3002 loss=2.364, ppl=5.15, wps=5830.5, ups=0.09, wpb=64916, bsz=128, num_updates=13684, lr=9.98985e-05, gnorm=1.882, loss_scale=16, train_wall=11, gb_free=2.8, wall=157323
2021-06-20 14:21:00 | INFO | train_inner | epoch 005: 1757 / 3002 loss=2.499, ppl=5.65, wps=5881.1, ups=0.09, wpb=64815, bsz=128, num_updates=13685, lr=9.98985e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=157334
2021-06-20 14:21:11 | INFO | train_inner | epoch 005: 1758 / 3002 loss=2.485, ppl=5.6, wps=5891.1, ups=0.09, wpb=64755, bsz=128, num_updates=13686, lr=9.98985e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=157345
2021-06-20 14:21:22 | INFO | train_inner | epoch 005: 1759 / 3002 loss=2.603, ppl=6.08, wps=5835.5, ups=0.09, wpb=64853, bsz=128, num_updates=13687, lr=9.98985e-05, gnorm=2.091, loss_scale=16, train_wall=11, gb_free=2.8, wall=157356
2021-06-20 14:21:33 | INFO | train_inner | epoch 005: 1760 / 3002 loss=2.45, ppl=5.47, wps=5741, ups=0.09, wpb=64756, bsz=128, num_updates=13688, lr=9.98985e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=157367
2021-06-20 14:21:44 | INFO | train_inner | epoch 005: 1761 / 3002 loss=2.479, ppl=5.58, wps=5821.8, ups=0.09, wpb=64850, bsz=128, num_updates=13689, lr=9.98985e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=157378
2021-06-20 14:21:55 | INFO | train_inner | epoch 005: 1762 / 3002 loss=2.357, ppl=5.12, wps=5792.5, ups=0.09, wpb=64869, bsz=128, num_updates=13690, lr=9.98985e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=157390
2021-06-20 14:22:06 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 14:22:17 | INFO | train_inner | epoch 005: 1764 / 3002 loss=2.537, ppl=5.8, wps=2930.5, ups=0.05, wpb=64845, bsz=128, num_updates=13691, lr=9.98985e-05, gnorm=1.99, loss_scale=8, train_wall=21, gb_free=2.8, wall=157412
2021-06-20 14:22:28 | INFO | train_inner | epoch 005: 1765 / 3002 loss=2.498, ppl=5.65, wps=5899.9, ups=0.09, wpb=64794, bsz=128, num_updates=13692, lr=9.98985e-05, gnorm=2.018, loss_scale=8, train_wall=11, gb_free=2.8, wall=157423
2021-06-20 14:22:40 | INFO | train_inner | epoch 005: 1766 / 3002 loss=2.658, ppl=6.31, wps=5817.3, ups=0.09, wpb=64754, bsz=128, num_updates=13693, lr=9.98984e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=157434
2021-06-20 14:22:51 | INFO | train_inner | epoch 005: 1767 / 3002 loss=2.473, ppl=5.55, wps=5768.6, ups=0.09, wpb=64727, bsz=128, num_updates=13694, lr=9.98984e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=157445
2021-06-20 14:23:02 | INFO | train_inner | epoch 005: 1768 / 3002 loss=2.657, ppl=6.31, wps=5818.7, ups=0.09, wpb=64692, bsz=128, num_updates=13695, lr=9.98984e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=157456
2021-06-20 14:23:13 | INFO | train_inner | epoch 005: 1769 / 3002 loss=2.561, ppl=5.9, wps=5757.4, ups=0.09, wpb=64805, bsz=128, num_updates=13696, lr=9.98984e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=157467
2021-06-20 14:23:24 | INFO | train_inner | epoch 005: 1770 / 3002 loss=2.419, ppl=5.35, wps=5882.2, ups=0.09, wpb=64839, bsz=128, num_updates=13697, lr=9.98984e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=157478
2021-06-20 14:23:35 | INFO | train_inner | epoch 005: 1771 / 3002 loss=2.632, ppl=6.2, wps=5805.5, ups=0.09, wpb=64806, bsz=128, num_updates=13698, lr=9.98984e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=157490
2021-06-20 14:23:46 | INFO | train_inner | epoch 005: 1772 / 3002 loss=2.647, ppl=6.26, wps=5802.5, ups=0.09, wpb=64829, bsz=128, num_updates=13699, lr=9.98984e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=157501
2021-06-20 14:23:57 | INFO | train_inner | epoch 005: 1773 / 3002 loss=2.405, ppl=5.3, wps=5922.1, ups=0.09, wpb=64883, bsz=128, num_updates=13700, lr=9.98984e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=157512
2021-06-20 14:24:08 | INFO | train_inner | epoch 005: 1774 / 3002 loss=2.48, ppl=5.58, wps=5931.3, ups=0.09, wpb=64826, bsz=128, num_updates=13701, lr=9.98984e-05, gnorm=1.965, loss_scale=8, train_wall=10, gb_free=2.8, wall=157523
2021-06-20 14:24:19 | INFO | train_inner | epoch 005: 1775 / 3002 loss=2.554, ppl=5.87, wps=5986, ups=0.09, wpb=64854, bsz=128, num_updates=13702, lr=9.98984e-05, gnorm=1.899, loss_scale=8, train_wall=10, gb_free=2.8, wall=157534
2021-06-20 14:24:30 | INFO | train_inner | epoch 005: 1776 / 3002 loss=2.535, ppl=5.8, wps=5783.8, ups=0.09, wpb=64826, bsz=128, num_updates=13703, lr=9.98984e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=157545
2021-06-20 14:24:41 | INFO | train_inner | epoch 005: 1777 / 3002 loss=2.461, ppl=5.5, wps=5872.3, ups=0.09, wpb=64900, bsz=128, num_updates=13704, lr=9.98984e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=157556
2021-06-20 14:24:53 | INFO | train_inner | epoch 005: 1778 / 3002 loss=2.471, ppl=5.55, wps=5804.9, ups=0.09, wpb=64805, bsz=128, num_updates=13705, lr=9.98984e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=157567
2021-06-20 14:25:04 | INFO | train_inner | epoch 005: 1779 / 3002 loss=2.576, ppl=5.96, wps=5688.1, ups=0.09, wpb=64696, bsz=128, num_updates=13706, lr=9.98983e-05, gnorm=2.281, loss_scale=8, train_wall=11, gb_free=2.8, wall=157578
2021-06-20 14:25:15 | INFO | train_inner | epoch 005: 1780 / 3002 loss=2.457, ppl=5.49, wps=5934.6, ups=0.09, wpb=64772, bsz=128, num_updates=13707, lr=9.98983e-05, gnorm=1.864, loss_scale=8, train_wall=10, gb_free=2.8, wall=157589
2021-06-20 14:25:26 | INFO | train_inner | epoch 005: 1781 / 3002 loss=2.609, ppl=6.1, wps=5865.9, ups=0.09, wpb=64821, bsz=128, num_updates=13708, lr=9.98983e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=157600
2021-06-20 14:25:37 | INFO | train_inner | epoch 005: 1782 / 3002 loss=2.436, ppl=5.41, wps=5688, ups=0.09, wpb=64793, bsz=128, num_updates=13709, lr=9.98983e-05, gnorm=2.127, loss_scale=8, train_wall=11, gb_free=2.8, wall=157612
2021-06-20 14:25:48 | INFO | train_inner | epoch 005: 1783 / 3002 loss=2.459, ppl=5.5, wps=5839.6, ups=0.09, wpb=64833, bsz=128, num_updates=13710, lr=9.98983e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=157623
2021-06-20 14:25:59 | INFO | train_inner | epoch 005: 1784 / 3002 loss=2.516, ppl=5.72, wps=5868.7, ups=0.09, wpb=64856, bsz=128, num_updates=13711, lr=9.98983e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=157634
2021-06-20 14:26:11 | INFO | train_inner | epoch 005: 1785 / 3002 loss=2.449, ppl=5.46, wps=5831.5, ups=0.09, wpb=64933, bsz=128, num_updates=13712, lr=9.98983e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=157645
2021-06-20 14:26:21 | INFO | train_inner | epoch 005: 1786 / 3002 loss=2.544, ppl=5.83, wps=5978.3, ups=0.09, wpb=64841, bsz=128, num_updates=13713, lr=9.98983e-05, gnorm=1.978, loss_scale=8, train_wall=10, gb_free=2.8, wall=157656
2021-06-20 14:26:33 | INFO | train_inner | epoch 005: 1787 / 3002 loss=2.508, ppl=5.69, wps=5775.7, ups=0.09, wpb=64846, bsz=128, num_updates=13714, lr=9.98983e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=157667
2021-06-20 14:26:44 | INFO | train_inner | epoch 005: 1788 / 3002 loss=2.491, ppl=5.62, wps=5999.3, ups=0.09, wpb=64818, bsz=128, num_updates=13715, lr=9.98983e-05, gnorm=1.94, loss_scale=8, train_wall=10, gb_free=2.8, wall=157678
2021-06-20 14:26:55 | INFO | train_inner | epoch 005: 1789 / 3002 loss=2.531, ppl=5.78, wps=5865.8, ups=0.09, wpb=64873, bsz=128, num_updates=13716, lr=9.98983e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=157689
2021-06-20 14:27:06 | INFO | train_inner | epoch 005: 1790 / 3002 loss=2.645, ppl=6.25, wps=5911.6, ups=0.09, wpb=64794, bsz=128, num_updates=13717, lr=9.98983e-05, gnorm=1.979, loss_scale=8, train_wall=10, gb_free=2.8, wall=157700
2021-06-20 14:27:17 | INFO | train_inner | epoch 005: 1791 / 3002 loss=2.607, ppl=6.09, wps=5857.9, ups=0.09, wpb=64801, bsz=128, num_updates=13718, lr=9.98982e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=157711
2021-06-20 14:27:28 | INFO | train_inner | epoch 005: 1792 / 3002 loss=2.618, ppl=6.14, wps=5873.9, ups=0.09, wpb=64767, bsz=128, num_updates=13719, lr=9.98982e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=157722
2021-06-20 14:27:39 | INFO | train_inner | epoch 005: 1793 / 3002 loss=2.464, ppl=5.52, wps=5893.1, ups=0.09, wpb=64830, bsz=128, num_updates=13720, lr=9.98982e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=157733
2021-06-20 14:27:50 | INFO | train_inner | epoch 005: 1794 / 3002 loss=2.512, ppl=5.7, wps=5739.5, ups=0.09, wpb=64829, bsz=128, num_updates=13721, lr=9.98982e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=157744
2021-06-20 14:28:01 | INFO | train_inner | epoch 005: 1795 / 3002 loss=2.428, ppl=5.38, wps=5754.5, ups=0.09, wpb=64791, bsz=128, num_updates=13722, lr=9.98982e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=157756
2021-06-20 14:28:12 | INFO | train_inner | epoch 005: 1796 / 3002 loss=2.555, ppl=5.88, wps=5934.6, ups=0.09, wpb=64876, bsz=128, num_updates=13723, lr=9.98982e-05, gnorm=1.943, loss_scale=8, train_wall=10, gb_free=2.8, wall=157766
2021-06-20 14:28:23 | INFO | train_inner | epoch 005: 1797 / 3002 loss=2.454, ppl=5.48, wps=5825.9, ups=0.09, wpb=64787, bsz=128, num_updates=13724, lr=9.98982e-05, gnorm=2.053, loss_scale=8, train_wall=11, gb_free=2.8, wall=157778
2021-06-20 14:28:34 | INFO | train_inner | epoch 005: 1798 / 3002 loss=2.46, ppl=5.5, wps=5875.7, ups=0.09, wpb=64898, bsz=128, num_updates=13725, lr=9.98982e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=157789
2021-06-20 14:28:45 | INFO | train_inner | epoch 005: 1799 / 3002 loss=2.41, ppl=5.32, wps=5902.3, ups=0.09, wpb=64865, bsz=128, num_updates=13726, lr=9.98982e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=157800
2021-06-20 14:28:56 | INFO | train_inner | epoch 005: 1800 / 3002 loss=2.399, ppl=5.27, wps=5973.5, ups=0.09, wpb=64922, bsz=128, num_updates=13727, lr=9.98982e-05, gnorm=1.964, loss_scale=8, train_wall=10, gb_free=2.8, wall=157810
2021-06-20 14:29:07 | INFO | train_inner | epoch 005: 1801 / 3002 loss=2.463, ppl=5.51, wps=5785.7, ups=0.09, wpb=64768, bsz=128, num_updates=13728, lr=9.98982e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=157822
2021-06-20 14:29:18 | INFO | train_inner | epoch 005: 1802 / 3002 loss=2.644, ppl=6.25, wps=5867.1, ups=0.09, wpb=64839, bsz=128, num_updates=13729, lr=9.98982e-05, gnorm=2.068, loss_scale=8, train_wall=11, gb_free=2.8, wall=157833
2021-06-20 14:29:29 | INFO | train_inner | epoch 005: 1803 / 3002 loss=2.618, ppl=6.14, wps=5947, ups=0.09, wpb=64851, bsz=128, num_updates=13730, lr=9.98982e-05, gnorm=2.005, loss_scale=8, train_wall=10, gb_free=2.8, wall=157844
2021-06-20 14:29:40 | INFO | train_inner | epoch 005: 1804 / 3002 loss=2.478, ppl=5.57, wps=5825.9, ups=0.09, wpb=64866, bsz=128, num_updates=13731, lr=9.98981e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=157855
2021-06-20 14:29:52 | INFO | train_inner | epoch 005: 1805 / 3002 loss=2.49, ppl=5.62, wps=5798.2, ups=0.09, wpb=64883, bsz=128, num_updates=13732, lr=9.98981e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=157866
2021-06-20 14:30:03 | INFO | train_inner | epoch 005: 1806 / 3002 loss=2.375, ppl=5.19, wps=5921.6, ups=0.09, wpb=64861, bsz=128, num_updates=13733, lr=9.98981e-05, gnorm=2.098, loss_scale=8, train_wall=11, gb_free=2.8, wall=157877
2021-06-20 14:30:14 | INFO | train_inner | epoch 005: 1807 / 3002 loss=2.43, ppl=5.39, wps=5731.1, ups=0.09, wpb=64794, bsz=128, num_updates=13734, lr=9.98981e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=157888
2021-06-20 14:30:25 | INFO | train_inner | epoch 005: 1808 / 3002 loss=2.455, ppl=5.48, wps=5835.9, ups=0.09, wpb=64877, bsz=128, num_updates=13735, lr=9.98981e-05, gnorm=2.423, loss_scale=8, train_wall=11, gb_free=2.8, wall=157899
2021-06-20 14:30:36 | INFO | train_inner | epoch 005: 1809 / 3002 loss=2.474, ppl=5.56, wps=5935, ups=0.09, wpb=64811, bsz=128, num_updates=13736, lr=9.98981e-05, gnorm=1.875, loss_scale=8, train_wall=10, gb_free=2.8, wall=157910
2021-06-20 14:30:47 | INFO | train_inner | epoch 005: 1810 / 3002 loss=2.439, ppl=5.42, wps=5918.9, ups=0.09, wpb=64827, bsz=128, num_updates=13737, lr=9.98981e-05, gnorm=1.889, loss_scale=8, train_wall=10, gb_free=2.8, wall=157921
2021-06-20 14:30:58 | INFO | train_inner | epoch 005: 1811 / 3002 loss=2.468, ppl=5.53, wps=5777.6, ups=0.09, wpb=64807, bsz=128, num_updates=13738, lr=9.98981e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=157932
2021-06-20 14:31:09 | INFO | train_inner | epoch 005: 1812 / 3002 loss=2.549, ppl=5.85, wps=5809.8, ups=0.09, wpb=64830, bsz=128, num_updates=13739, lr=9.98981e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=157944
2021-06-20 14:31:20 | INFO | train_inner | epoch 005: 1813 / 3002 loss=2.47, ppl=5.54, wps=5799.7, ups=0.09, wpb=64858, bsz=128, num_updates=13740, lr=9.98981e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=157955
2021-06-20 14:31:32 | INFO | train_inner | epoch 005: 1814 / 3002 loss=2.444, ppl=5.44, wps=5815.3, ups=0.09, wpb=64827, bsz=128, num_updates=13741, lr=9.98981e-05, gnorm=1.865, loss_scale=8, train_wall=11, gb_free=2.8, wall=157966
2021-06-20 14:31:43 | INFO | train_inner | epoch 005: 1815 / 3002 loss=2.482, ppl=5.59, wps=5729.5, ups=0.09, wpb=64842, bsz=128, num_updates=13742, lr=9.98981e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=157977
2021-06-20 14:31:54 | INFO | train_inner | epoch 005: 1816 / 3002 loss=2.525, ppl=5.76, wps=5828.3, ups=0.09, wpb=64905, bsz=128, num_updates=13743, lr=9.9898e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=157988
2021-06-20 14:32:05 | INFO | train_inner | epoch 005: 1817 / 3002 loss=2.707, ppl=6.53, wps=5834.4, ups=0.09, wpb=64856, bsz=128, num_updates=13744, lr=9.9898e-05, gnorm=2.08, loss_scale=8, train_wall=11, gb_free=2.8, wall=157999
2021-06-20 14:32:16 | INFO | train_inner | epoch 005: 1818 / 3002 loss=2.514, ppl=5.71, wps=5743, ups=0.09, wpb=64738, bsz=128, num_updates=13745, lr=9.9898e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=158011
2021-06-20 14:32:28 | INFO | train_inner | epoch 005: 1819 / 3002 loss=2.625, ppl=6.17, wps=5842.5, ups=0.09, wpb=64840, bsz=128, num_updates=13746, lr=9.9898e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=158022
2021-06-20 14:32:39 | INFO | train_inner | epoch 005: 1820 / 3002 loss=2.503, ppl=5.67, wps=5880.9, ups=0.09, wpb=64822, bsz=128, num_updates=13747, lr=9.9898e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=158033
2021-06-20 14:32:50 | INFO | train_inner | epoch 005: 1821 / 3002 loss=2.415, ppl=5.33, wps=5802.5, ups=0.09, wpb=64830, bsz=128, num_updates=13748, lr=9.9898e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=158044
2021-06-20 14:33:01 | INFO | train_inner | epoch 005: 1822 / 3002 loss=2.482, ppl=5.58, wps=5796.3, ups=0.09, wpb=64752, bsz=128, num_updates=13749, lr=9.9898e-05, gnorm=2.153, loss_scale=8, train_wall=11, gb_free=2.8, wall=158055
2021-06-20 14:33:12 | INFO | train_inner | epoch 005: 1823 / 3002 loss=2.58, ppl=5.98, wps=5889.3, ups=0.09, wpb=64845, bsz=128, num_updates=13750, lr=9.9898e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=158066
2021-06-20 14:33:23 | INFO | train_inner | epoch 005: 1824 / 3002 loss=2.574, ppl=5.95, wps=5867.1, ups=0.09, wpb=64864, bsz=128, num_updates=13751, lr=9.9898e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=158077
2021-06-20 14:33:34 | INFO | train_inner | epoch 005: 1825 / 3002 loss=2.321, ppl=5, wps=5752.9, ups=0.09, wpb=64815, bsz=128, num_updates=13752, lr=9.9898e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=158089
2021-06-20 14:33:45 | INFO | train_inner | epoch 005: 1826 / 3002 loss=2.568, ppl=5.93, wps=5765.6, ups=0.09, wpb=64844, bsz=128, num_updates=13753, lr=9.9898e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=158100
2021-06-20 14:33:56 | INFO | train_inner | epoch 005: 1827 / 3002 loss=2.536, ppl=5.8, wps=5894.8, ups=0.09, wpb=64891, bsz=128, num_updates=13754, lr=9.9898e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=158111
2021-06-20 14:34:08 | INFO | train_inner | epoch 005: 1828 / 3002 loss=2.405, ppl=5.3, wps=5848.2, ups=0.09, wpb=64821, bsz=128, num_updates=13755, lr=9.9898e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=158122
2021-06-20 14:34:19 | INFO | train_inner | epoch 005: 1829 / 3002 loss=2.474, ppl=5.55, wps=5846.6, ups=0.09, wpb=64845, bsz=128, num_updates=13756, lr=9.98979e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=158133
2021-06-20 14:34:30 | INFO | train_inner | epoch 005: 1830 / 3002 loss=2.492, ppl=5.63, wps=5794.2, ups=0.09, wpb=64735, bsz=128, num_updates=13757, lr=9.98979e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=158144
2021-06-20 14:34:41 | INFO | train_inner | epoch 005: 1831 / 3002 loss=2.433, ppl=5.4, wps=5879.2, ups=0.09, wpb=64913, bsz=128, num_updates=13758, lr=9.98979e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=158155
2021-06-20 14:34:52 | INFO | train_inner | epoch 005: 1832 / 3002 loss=2.598, ppl=6.05, wps=5870.5, ups=0.09, wpb=64791, bsz=128, num_updates=13759, lr=9.98979e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=158166
2021-06-20 14:35:03 | INFO | train_inner | epoch 005: 1833 / 3002 loss=2.323, ppl=5, wps=5769.6, ups=0.09, wpb=64802, bsz=128, num_updates=13760, lr=9.98979e-05, gnorm=1.876, loss_scale=8, train_wall=11, gb_free=2.8, wall=158177
2021-06-20 14:35:14 | INFO | train_inner | epoch 005: 1834 / 3002 loss=2.579, ppl=5.97, wps=5723.1, ups=0.09, wpb=64777, bsz=128, num_updates=13761, lr=9.98979e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=158189
2021-06-20 14:35:26 | INFO | train_inner | epoch 005: 1835 / 3002 loss=2.437, ppl=5.42, wps=5789.5, ups=0.09, wpb=64750, bsz=128, num_updates=13762, lr=9.98979e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=158200
2021-06-20 14:35:37 | INFO | train_inner | epoch 005: 1836 / 3002 loss=2.392, ppl=5.25, wps=5829.5, ups=0.09, wpb=64908, bsz=128, num_updates=13763, lr=9.98979e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=158211
2021-06-20 14:35:48 | INFO | train_inner | epoch 005: 1837 / 3002 loss=2.596, ppl=6.05, wps=5877.2, ups=0.09, wpb=64901, bsz=128, num_updates=13764, lr=9.98979e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=158222
2021-06-20 14:35:59 | INFO | train_inner | epoch 005: 1838 / 3002 loss=2.427, ppl=5.38, wps=5882.6, ups=0.09, wpb=64865, bsz=128, num_updates=13765, lr=9.98979e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=158233
2021-06-20 14:36:10 | INFO | train_inner | epoch 005: 1839 / 3002 loss=2.558, ppl=5.89, wps=5924.9, ups=0.09, wpb=64932, bsz=128, num_updates=13766, lr=9.98979e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=158244
2021-06-20 14:36:21 | INFO | train_inner | epoch 005: 1840 / 3002 loss=2.627, ppl=6.18, wps=5794.4, ups=0.09, wpb=64905, bsz=128, num_updates=13767, lr=9.98979e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=158255
2021-06-20 14:36:32 | INFO | train_inner | epoch 005: 1841 / 3002 loss=2.359, ppl=5.13, wps=5829.9, ups=0.09, wpb=64772, bsz=128, num_updates=13768, lr=9.98978e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=158266
2021-06-20 14:36:43 | INFO | train_inner | epoch 005: 1842 / 3002 loss=2.436, ppl=5.41, wps=5738.2, ups=0.09, wpb=64729, bsz=128, num_updates=13769, lr=9.98978e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=158278
2021-06-20 14:36:55 | INFO | train_inner | epoch 005: 1843 / 3002 loss=2.448, ppl=5.46, wps=5737.4, ups=0.09, wpb=64765, bsz=128, num_updates=13770, lr=9.98978e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=158289
2021-06-20 14:37:06 | INFO | train_inner | epoch 005: 1844 / 3002 loss=2.571, ppl=5.94, wps=5847.5, ups=0.09, wpb=64822, bsz=128, num_updates=13771, lr=9.98978e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=158300
2021-06-20 14:37:17 | INFO | train_inner | epoch 005: 1845 / 3002 loss=2.499, ppl=5.65, wps=5793.9, ups=0.09, wpb=64815, bsz=128, num_updates=13772, lr=9.98978e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=158311
2021-06-20 14:37:28 | INFO | train_inner | epoch 005: 1846 / 3002 loss=2.388, ppl=5.24, wps=5868.7, ups=0.09, wpb=64874, bsz=128, num_updates=13773, lr=9.98978e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=158322
2021-06-20 14:37:39 | INFO | train_inner | epoch 005: 1847 / 3002 loss=2.523, ppl=5.75, wps=5905.7, ups=0.09, wpb=64809, bsz=128, num_updates=13774, lr=9.98978e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=158333
2021-06-20 14:37:50 | INFO | train_inner | epoch 005: 1848 / 3002 loss=2.371, ppl=5.17, wps=5862.1, ups=0.09, wpb=64828, bsz=128, num_updates=13775, lr=9.98978e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=158344
2021-06-20 14:38:01 | INFO | train_inner | epoch 005: 1849 / 3002 loss=2.629, ppl=6.19, wps=5916.8, ups=0.09, wpb=64874, bsz=128, num_updates=13776, lr=9.98978e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=158355
2021-06-20 14:38:12 | INFO | train_inner | epoch 005: 1850 / 3002 loss=2.394, ppl=5.26, wps=5843.2, ups=0.09, wpb=64902, bsz=128, num_updates=13777, lr=9.98978e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=158366
2021-06-20 14:38:23 | INFO | train_inner | epoch 005: 1851 / 3002 loss=2.487, ppl=5.61, wps=5794.5, ups=0.09, wpb=64815, bsz=128, num_updates=13778, lr=9.98978e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=158378
2021-06-20 14:38:35 | INFO | train_inner | epoch 005: 1852 / 3002 loss=2.474, ppl=5.56, wps=5775.9, ups=0.09, wpb=64777, bsz=128, num_updates=13779, lr=9.98978e-05, gnorm=2.088, loss_scale=8, train_wall=11, gb_free=2.8, wall=158389
2021-06-20 14:38:45 | INFO | train_inner | epoch 005: 1853 / 3002 loss=2.535, ppl=5.79, wps=5965.1, ups=0.09, wpb=64827, bsz=128, num_updates=13780, lr=9.98978e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=158400
2021-06-20 14:38:57 | INFO | train_inner | epoch 005: 1854 / 3002 loss=2.526, ppl=5.76, wps=5815.9, ups=0.09, wpb=64806, bsz=128, num_updates=13781, lr=9.98977e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=158411
2021-06-20 14:39:08 | INFO | train_inner | epoch 005: 1855 / 3002 loss=2.337, ppl=5.05, wps=5839.5, ups=0.09, wpb=64894, bsz=128, num_updates=13782, lr=9.98977e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=158422
2021-06-20 14:39:19 | INFO | train_inner | epoch 005: 1856 / 3002 loss=2.604, ppl=6.08, wps=5808.2, ups=0.09, wpb=64809, bsz=128, num_updates=13783, lr=9.98977e-05, gnorm=2.138, loss_scale=8, train_wall=11, gb_free=2.8, wall=158433
2021-06-20 14:39:30 | INFO | train_inner | epoch 005: 1857 / 3002 loss=2.505, ppl=5.68, wps=5783.4, ups=0.09, wpb=64847, bsz=128, num_updates=13784, lr=9.98977e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=158444
2021-06-20 14:39:41 | INFO | train_inner | epoch 005: 1858 / 3002 loss=2.631, ppl=6.2, wps=5858.1, ups=0.09, wpb=64848, bsz=128, num_updates=13785, lr=9.98977e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=158455
2021-06-20 14:39:52 | INFO | train_inner | epoch 005: 1859 / 3002 loss=2.584, ppl=5.99, wps=5862.1, ups=0.09, wpb=64842, bsz=128, num_updates=13786, lr=9.98977e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=158466
2021-06-20 14:40:03 | INFO | train_inner | epoch 005: 1860 / 3002 loss=2.443, ppl=5.44, wps=5788.3, ups=0.09, wpb=64846, bsz=128, num_updates=13787, lr=9.98977e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=158478
2021-06-20 14:40:14 | INFO | train_inner | epoch 005: 1861 / 3002 loss=2.703, ppl=6.51, wps=5894, ups=0.09, wpb=64830, bsz=128, num_updates=13788, lr=9.98977e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=158489
2021-06-20 14:40:25 | INFO | train_inner | epoch 005: 1862 / 3002 loss=2.522, ppl=5.74, wps=5827.6, ups=0.09, wpb=64869, bsz=128, num_updates=13789, lr=9.98977e-05, gnorm=2.363, loss_scale=8, train_wall=11, gb_free=2.8, wall=158500
2021-06-20 14:40:36 | INFO | train_inner | epoch 005: 1863 / 3002 loss=2.478, ppl=5.57, wps=5888, ups=0.09, wpb=64851, bsz=128, num_updates=13790, lr=9.98977e-05, gnorm=1.825, loss_scale=8, train_wall=11, gb_free=2.8, wall=158511
2021-06-20 14:40:48 | INFO | train_inner | epoch 005: 1864 / 3002 loss=2.394, ppl=5.26, wps=5789.7, ups=0.09, wpb=64801, bsz=128, num_updates=13791, lr=9.98977e-05, gnorm=1.855, loss_scale=8, train_wall=11, gb_free=2.8, wall=158522
2021-06-20 14:40:59 | INFO | train_inner | epoch 005: 1865 / 3002 loss=2.592, ppl=6.03, wps=5859.6, ups=0.09, wpb=64770, bsz=128, num_updates=13792, lr=9.98977e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=158533
2021-06-20 14:41:10 | INFO | train_inner | epoch 005: 1866 / 3002 loss=2.462, ppl=5.51, wps=5827.2, ups=0.09, wpb=64862, bsz=128, num_updates=13793, lr=9.98976e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=158544
2021-06-20 14:41:21 | INFO | train_inner | epoch 005: 1867 / 3002 loss=2.511, ppl=5.7, wps=5916.5, ups=0.09, wpb=64821, bsz=128, num_updates=13794, lr=9.98976e-05, gnorm=2.178, loss_scale=8, train_wall=11, gb_free=2.8, wall=158555
2021-06-20 14:41:32 | INFO | train_inner | epoch 005: 1868 / 3002 loss=2.319, ppl=4.99, wps=5860.1, ups=0.09, wpb=64868, bsz=128, num_updates=13795, lr=9.98976e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=158566
2021-06-20 14:41:43 | INFO | train_inner | epoch 005: 1869 / 3002 loss=2.55, ppl=5.86, wps=5781.7, ups=0.09, wpb=64768, bsz=128, num_updates=13796, lr=9.98976e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=158577
2021-06-20 14:41:54 | INFO | train_inner | epoch 005: 1870 / 3002 loss=2.548, ppl=5.85, wps=5845.1, ups=0.09, wpb=64876, bsz=128, num_updates=13797, lr=9.98976e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=158589
2021-06-20 14:42:05 | INFO | train_inner | epoch 005: 1871 / 3002 loss=2.7, ppl=6.5, wps=5823.2, ups=0.09, wpb=64658, bsz=128, num_updates=13798, lr=9.98976e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=158600
2021-06-20 14:42:16 | INFO | train_inner | epoch 005: 1872 / 3002 loss=2.525, ppl=5.76, wps=5933.8, ups=0.09, wpb=64836, bsz=128, num_updates=13799, lr=9.98976e-05, gnorm=1.929, loss_scale=8, train_wall=10, gb_free=2.8, wall=158611
2021-06-20 14:42:27 | INFO | train_inner | epoch 005: 1873 / 3002 loss=2.573, ppl=5.95, wps=5880, ups=0.09, wpb=64796, bsz=128, num_updates=13800, lr=9.98976e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=158622
2021-06-20 14:42:39 | INFO | train_inner | epoch 005: 1874 / 3002 loss=2.403, ppl=5.29, wps=5747.9, ups=0.09, wpb=64825, bsz=128, num_updates=13801, lr=9.98976e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=158633
2021-06-20 14:42:50 | INFO | train_inner | epoch 005: 1875 / 3002 loss=2.575, ppl=5.96, wps=5822.9, ups=0.09, wpb=64784, bsz=128, num_updates=13802, lr=9.98976e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=158644
2021-06-20 14:43:01 | INFO | train_inner | epoch 005: 1876 / 3002 loss=2.379, ppl=5.2, wps=5829.8, ups=0.09, wpb=64836, bsz=128, num_updates=13803, lr=9.98976e-05, gnorm=2.158, loss_scale=8, train_wall=11, gb_free=2.8, wall=158655
2021-06-20 14:43:12 | INFO | train_inner | epoch 005: 1877 / 3002 loss=2.438, ppl=5.42, wps=5925.2, ups=0.09, wpb=64860, bsz=128, num_updates=13804, lr=9.98976e-05, gnorm=2.041, loss_scale=8, train_wall=10, gb_free=2.8, wall=158666
2021-06-20 14:43:23 | INFO | train_inner | epoch 005: 1878 / 3002 loss=2.407, ppl=5.31, wps=5865.2, ups=0.09, wpb=64870, bsz=128, num_updates=13805, lr=9.98976e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=158677
2021-06-20 14:43:34 | INFO | train_inner | epoch 005: 1879 / 3002 loss=2.575, ppl=5.96, wps=5762.9, ups=0.09, wpb=64861, bsz=128, num_updates=13806, lr=9.98975e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=158688
2021-06-20 14:43:45 | INFO | train_inner | epoch 005: 1880 / 3002 loss=2.588, ppl=6.01, wps=5918.4, ups=0.09, wpb=64790, bsz=128, num_updates=13807, lr=9.98975e-05, gnorm=1.92, loss_scale=8, train_wall=10, gb_free=2.8, wall=158699
2021-06-20 14:43:56 | INFO | train_inner | epoch 005: 1881 / 3002 loss=2.523, ppl=5.75, wps=5715.6, ups=0.09, wpb=64806, bsz=128, num_updates=13808, lr=9.98975e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=158711
2021-06-20 14:44:08 | INFO | train_inner | epoch 005: 1882 / 3002 loss=2.432, ppl=5.39, wps=5751.2, ups=0.09, wpb=64819, bsz=128, num_updates=13809, lr=9.98975e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=158722
2021-06-20 14:44:19 | INFO | train_inner | epoch 005: 1883 / 3002 loss=2.436, ppl=5.41, wps=5921.4, ups=0.09, wpb=64814, bsz=128, num_updates=13810, lr=9.98975e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=158733
2021-06-20 14:44:30 | INFO | train_inner | epoch 005: 1884 / 3002 loss=2.477, ppl=5.57, wps=5839.4, ups=0.09, wpb=64904, bsz=128, num_updates=13811, lr=9.98975e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=158744
2021-06-20 14:44:41 | INFO | train_inner | epoch 005: 1885 / 3002 loss=2.452, ppl=5.47, wps=5817.2, ups=0.09, wpb=64847, bsz=128, num_updates=13812, lr=9.98975e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=158755
2021-06-20 14:44:52 | INFO | train_inner | epoch 005: 1886 / 3002 loss=2.552, ppl=5.86, wps=5780.7, ups=0.09, wpb=64867, bsz=128, num_updates=13813, lr=9.98975e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=158766
2021-06-20 14:45:03 | INFO | train_inner | epoch 005: 1887 / 3002 loss=2.316, ppl=4.98, wps=5715.5, ups=0.09, wpb=64781, bsz=128, num_updates=13814, lr=9.98975e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=158778
2021-06-20 14:45:14 | INFO | train_inner | epoch 005: 1888 / 3002 loss=2.475, ppl=5.56, wps=5918.3, ups=0.09, wpb=64871, bsz=128, num_updates=13815, lr=9.98975e-05, gnorm=1.939, loss_scale=8, train_wall=10, gb_free=2.8, wall=158789
2021-06-20 14:45:25 | INFO | train_inner | epoch 005: 1889 / 3002 loss=2.456, ppl=5.49, wps=5796.1, ups=0.09, wpb=64792, bsz=128, num_updates=13816, lr=9.98975e-05, gnorm=2.626, loss_scale=8, train_wall=11, gb_free=2.8, wall=158800
2021-06-20 14:45:37 | INFO | train_inner | epoch 005: 1890 / 3002 loss=2.48, ppl=5.58, wps=5828, ups=0.09, wpb=64866, bsz=128, num_updates=13817, lr=9.98975e-05, gnorm=2.23, loss_scale=8, train_wall=11, gb_free=2.8, wall=158811
2021-06-20 14:45:48 | INFO | train_inner | epoch 005: 1891 / 3002 loss=2.548, ppl=5.85, wps=5914.5, ups=0.09, wpb=64796, bsz=128, num_updates=13818, lr=9.98974e-05, gnorm=1.941, loss_scale=16, train_wall=10, gb_free=2.8, wall=158822
2021-06-20 14:45:59 | INFO | train_inner | epoch 005: 1892 / 3002 loss=2.567, ppl=5.93, wps=5904.8, ups=0.09, wpb=64757, bsz=128, num_updates=13819, lr=9.98974e-05, gnorm=1.946, loss_scale=16, train_wall=10, gb_free=2.8, wall=158833
2021-06-20 14:46:10 | INFO | train_inner | epoch 005: 1893 / 3002 loss=2.416, ppl=5.34, wps=5864.5, ups=0.09, wpb=64822, bsz=128, num_updates=13820, lr=9.98974e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=158844
2021-06-20 14:46:21 | INFO | train_inner | epoch 005: 1894 / 3002 loss=2.525, ppl=5.76, wps=5790.2, ups=0.09, wpb=64738, bsz=128, num_updates=13821, lr=9.98974e-05, gnorm=2.044, loss_scale=16, train_wall=11, gb_free=2.8, wall=158855
2021-06-20 14:46:32 | INFO | train_inner | epoch 005: 1895 / 3002 loss=2.591, ppl=6.03, wps=5938.5, ups=0.09, wpb=64845, bsz=128, num_updates=13822, lr=9.98974e-05, gnorm=2.023, loss_scale=16, train_wall=10, gb_free=2.8, wall=158866
2021-06-20 14:46:43 | INFO | train_inner | epoch 005: 1896 / 3002 loss=2.47, ppl=5.54, wps=5891, ups=0.09, wpb=64819, bsz=128, num_updates=13823, lr=9.98974e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=158877
2021-06-20 14:46:54 | INFO | train_inner | epoch 005: 1897 / 3002 loss=2.37, ppl=5.17, wps=5840.9, ups=0.09, wpb=64777, bsz=128, num_updates=13824, lr=9.98974e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=158888
2021-06-20 14:47:05 | INFO | train_inner | epoch 005: 1898 / 3002 loss=2.594, ppl=6.04, wps=5922.7, ups=0.09, wpb=64847, bsz=128, num_updates=13825, lr=9.98974e-05, gnorm=1.98, loss_scale=16, train_wall=10, gb_free=2.8, wall=158899
2021-06-20 14:47:16 | INFO | train_inner | epoch 005: 1899 / 3002 loss=2.548, ppl=5.85, wps=5829.6, ups=0.09, wpb=64785, bsz=128, num_updates=13826, lr=9.98974e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=158910
2021-06-20 14:47:27 | INFO | train_inner | epoch 005: 1900 / 3002 loss=2.532, ppl=5.79, wps=5833.3, ups=0.09, wpb=64807, bsz=128, num_updates=13827, lr=9.98974e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=158921
2021-06-20 14:47:38 | INFO | train_inner | epoch 005: 1901 / 3002 loss=2.592, ppl=6.03, wps=5795.4, ups=0.09, wpb=64828, bsz=128, num_updates=13828, lr=9.98974e-05, gnorm=1.926, loss_scale=16, train_wall=11, gb_free=2.8, wall=158932
2021-06-20 14:47:49 | INFO | train_inner | epoch 005: 1902 / 3002 loss=2.43, ppl=5.39, wps=5936.7, ups=0.09, wpb=64850, bsz=128, num_updates=13829, lr=9.98974e-05, gnorm=1.974, loss_scale=16, train_wall=10, gb_free=2.8, wall=158943
2021-06-20 14:48:00 | INFO | train_inner | epoch 005: 1903 / 3002 loss=2.535, ppl=5.8, wps=5853.8, ups=0.09, wpb=64786, bsz=128, num_updates=13830, lr=9.98974e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=158954
2021-06-20 14:48:11 | INFO | train_inner | epoch 005: 1904 / 3002 loss=2.55, ppl=5.86, wps=5925.4, ups=0.09, wpb=64881, bsz=128, num_updates=13831, lr=9.98973e-05, gnorm=1.912, loss_scale=16, train_wall=10, gb_free=2.8, wall=158965
2021-06-20 14:48:22 | INFO | train_inner | epoch 005: 1905 / 3002 loss=2.66, ppl=6.32, wps=5725.8, ups=0.09, wpb=64843, bsz=128, num_updates=13832, lr=9.98973e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=158977
2021-06-20 14:48:34 | INFO | train_inner | epoch 005: 1906 / 3002 loss=2.58, ppl=5.98, wps=5725.6, ups=0.09, wpb=64833, bsz=128, num_updates=13833, lr=9.98973e-05, gnorm=2.042, loss_scale=16, train_wall=11, gb_free=2.8, wall=158988
2021-06-20 14:48:45 | INFO | train_inner | epoch 005: 1907 / 3002 loss=2.395, ppl=5.26, wps=5773.4, ups=0.09, wpb=64815, bsz=128, num_updates=13834, lr=9.98973e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=158999
2021-06-20 14:48:56 | INFO | train_inner | epoch 005: 1908 / 3002 loss=2.577, ppl=5.97, wps=5745.3, ups=0.09, wpb=64746, bsz=128, num_updates=13835, lr=9.98973e-05, gnorm=2.073, loss_scale=16, train_wall=11, gb_free=2.8, wall=159011
2021-06-20 14:49:07 | INFO | train_inner | epoch 005: 1909 / 3002 loss=2.607, ppl=6.09, wps=5893.7, ups=0.09, wpb=64834, bsz=128, num_updates=13836, lr=9.98973e-05, gnorm=2.202, loss_scale=16, train_wall=11, gb_free=2.8, wall=159022
2021-06-20 14:49:18 | INFO | train_inner | epoch 005: 1910 / 3002 loss=2.484, ppl=5.59, wps=5969.5, ups=0.09, wpb=64842, bsz=128, num_updates=13837, lr=9.98973e-05, gnorm=1.955, loss_scale=16, train_wall=10, gb_free=2.8, wall=159032
2021-06-20 14:49:29 | INFO | train_inner | epoch 005: 1911 / 3002 loss=2.621, ppl=6.15, wps=5866.2, ups=0.09, wpb=64898, bsz=128, num_updates=13838, lr=9.98973e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=159043
2021-06-20 14:49:40 | INFO | train_inner | epoch 005: 1912 / 3002 loss=2.441, ppl=5.43, wps=5796.5, ups=0.09, wpb=64749, bsz=128, num_updates=13839, lr=9.98973e-05, gnorm=2.702, loss_scale=16, train_wall=11, gb_free=2.8, wall=159055
2021-06-20 14:49:52 | INFO | train_inner | epoch 005: 1913 / 3002 loss=2.375, ppl=5.19, wps=5742.6, ups=0.09, wpb=64834, bsz=128, num_updates=13840, lr=9.98973e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=159066
2021-06-20 14:50:03 | INFO | train_inner | epoch 005: 1914 / 3002 loss=2.501, ppl=5.66, wps=5896.7, ups=0.09, wpb=64852, bsz=128, num_updates=13841, lr=9.98973e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=159077
2021-06-20 14:50:14 | INFO | train_inner | epoch 005: 1915 / 3002 loss=2.65, ppl=6.28, wps=5847.6, ups=0.09, wpb=64827, bsz=128, num_updates=13842, lr=9.98973e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=159088
2021-06-20 14:50:25 | INFO | train_inner | epoch 005: 1916 / 3002 loss=2.459, ppl=5.5, wps=5752.7, ups=0.09, wpb=64815, bsz=128, num_updates=13843, lr=9.98972e-05, gnorm=2.133, loss_scale=16, train_wall=11, gb_free=2.8, wall=159099
2021-06-20 14:50:36 | INFO | train_inner | epoch 005: 1917 / 3002 loss=2.373, ppl=5.18, wps=5769.2, ups=0.09, wpb=64806, bsz=128, num_updates=13844, lr=9.98972e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=159111
2021-06-20 14:50:47 | INFO | train_inner | epoch 005: 1918 / 3002 loss=2.493, ppl=5.63, wps=5750, ups=0.09, wpb=64821, bsz=128, num_updates=13845, lr=9.98972e-05, gnorm=2.065, loss_scale=16, train_wall=11, gb_free=2.8, wall=159122
2021-06-20 14:50:59 | INFO | train_inner | epoch 005: 1919 / 3002 loss=2.498, ppl=5.65, wps=5846.9, ups=0.09, wpb=64926, bsz=128, num_updates=13846, lr=9.98972e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=159133
2021-06-20 14:51:10 | INFO | train_inner | epoch 005: 1920 / 3002 loss=2.448, ppl=5.45, wps=5763.4, ups=0.09, wpb=64802, bsz=128, num_updates=13847, lr=9.98972e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=159144
2021-06-20 14:51:21 | INFO | train_inner | epoch 005: 1921 / 3002 loss=2.698, ppl=6.49, wps=5876.9, ups=0.09, wpb=64968, bsz=128, num_updates=13848, lr=9.98972e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=159155
2021-06-20 14:51:32 | INFO | train_inner | epoch 005: 1922 / 3002 loss=2.465, ppl=5.52, wps=5959.6, ups=0.09, wpb=64828, bsz=128, num_updates=13849, lr=9.98972e-05, gnorm=1.932, loss_scale=16, train_wall=10, gb_free=2.8, wall=159166
2021-06-20 14:51:43 | INFO | train_inner | epoch 005: 1923 / 3002 loss=2.52, ppl=5.73, wps=5786, ups=0.09, wpb=64830, bsz=128, num_updates=13850, lr=9.98972e-05, gnorm=2.103, loss_scale=16, train_wall=11, gb_free=2.8, wall=159177
2021-06-20 14:51:54 | INFO | train_inner | epoch 005: 1924 / 3002 loss=2.685, ppl=6.43, wps=5764.4, ups=0.09, wpb=64798, bsz=128, num_updates=13851, lr=9.98972e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=159189
2021-06-20 14:52:05 | INFO | train_inner | epoch 005: 1925 / 3002 loss=2.361, ppl=5.14, wps=5931, ups=0.09, wpb=64855, bsz=128, num_updates=13852, lr=9.98972e-05, gnorm=1.982, loss_scale=16, train_wall=10, gb_free=2.8, wall=159199
2021-06-20 14:52:16 | INFO | train_inner | epoch 005: 1926 / 3002 loss=2.602, ppl=6.07, wps=5996.8, ups=0.09, wpb=64861, bsz=128, num_updates=13853, lr=9.98972e-05, gnorm=2.071, loss_scale=16, train_wall=10, gb_free=2.8, wall=159210
2021-06-20 14:52:27 | INFO | train_inner | epoch 005: 1927 / 3002 loss=2.523, ppl=5.75, wps=5768.3, ups=0.09, wpb=64852, bsz=128, num_updates=13854, lr=9.98972e-05, gnorm=2.056, loss_scale=16, train_wall=11, gb_free=2.8, wall=159222
2021-06-20 14:52:38 | INFO | train_inner | epoch 005: 1928 / 3002 loss=2.603, ppl=6.07, wps=5877.1, ups=0.09, wpb=64803, bsz=128, num_updates=13855, lr=9.98972e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=159233
2021-06-20 14:52:49 | INFO | train_inner | epoch 005: 1929 / 3002 loss=2.543, ppl=5.83, wps=5861.7, ups=0.09, wpb=64825, bsz=128, num_updates=13856, lr=9.98971e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=159244
2021-06-20 14:53:00 | INFO | train_inner | epoch 005: 1930 / 3002 loss=2.506, ppl=5.68, wps=5801.4, ups=0.09, wpb=64838, bsz=128, num_updates=13857, lr=9.98971e-05, gnorm=1.963, loss_scale=16, train_wall=11, gb_free=2.8, wall=159255
2021-06-20 14:53:12 | INFO | train_inner | epoch 005: 1931 / 3002 loss=2.498, ppl=5.65, wps=5772.1, ups=0.09, wpb=64729, bsz=128, num_updates=13858, lr=9.98971e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=159266
2021-06-20 14:53:22 | INFO | train_inner | epoch 005: 1932 / 3002 loss=2.627, ppl=6.18, wps=6036.8, ups=0.09, wpb=64856, bsz=128, num_updates=13859, lr=9.98971e-05, gnorm=1.972, loss_scale=16, train_wall=10, gb_free=2.8, wall=159277
2021-06-20 14:53:34 | INFO | train_inner | epoch 005: 1933 / 3002 loss=2.607, ppl=6.09, wps=5771.2, ups=0.09, wpb=64802, bsz=128, num_updates=13860, lr=9.98971e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=159288
2021-06-20 14:53:45 | INFO | train_inner | epoch 005: 1934 / 3002 loss=2.556, ppl=5.88, wps=5680.8, ups=0.09, wpb=64774, bsz=128, num_updates=13861, lr=9.98971e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=159299
2021-06-20 14:53:56 | INFO | train_inner | epoch 005: 1935 / 3002 loss=2.44, ppl=5.43, wps=5789.7, ups=0.09, wpb=64810, bsz=128, num_updates=13862, lr=9.98971e-05, gnorm=2.343, loss_scale=16, train_wall=11, gb_free=2.8, wall=159311
2021-06-20 14:54:07 | INFO | train_inner | epoch 005: 1936 / 3002 loss=2.401, ppl=5.28, wps=5882.8, ups=0.09, wpb=64887, bsz=128, num_updates=13863, lr=9.98971e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=159322
2021-06-20 14:54:19 | INFO | train_inner | epoch 005: 1937 / 3002 loss=2.538, ppl=5.81, wps=5753.8, ups=0.09, wpb=64784, bsz=128, num_updates=13864, lr=9.98971e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=159333
2021-06-20 14:54:30 | INFO | train_inner | epoch 005: 1938 / 3002 loss=2.45, ppl=5.47, wps=5794, ups=0.09, wpb=64798, bsz=128, num_updates=13865, lr=9.98971e-05, gnorm=2.044, loss_scale=16, train_wall=11, gb_free=2.8, wall=159344
2021-06-20 14:54:41 | INFO | train_inner | epoch 005: 1939 / 3002 loss=2.487, ppl=5.6, wps=5840.7, ups=0.09, wpb=64731, bsz=128, num_updates=13866, lr=9.98971e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=159355
2021-06-20 14:54:52 | INFO | train_inner | epoch 005: 1940 / 3002 loss=2.49, ppl=5.62, wps=5923.1, ups=0.09, wpb=64864, bsz=128, num_updates=13867, lr=9.98971e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=159366
2021-06-20 14:55:03 | INFO | train_inner | epoch 005: 1941 / 3002 loss=2.379, ppl=5.2, wps=5768.3, ups=0.09, wpb=64798, bsz=128, num_updates=13868, lr=9.9897e-05, gnorm=1.93, loss_scale=16, train_wall=11, gb_free=2.8, wall=159377
2021-06-20 14:55:14 | INFO | train_inner | epoch 005: 1942 / 3002 loss=2.533, ppl=5.79, wps=5934.3, ups=0.09, wpb=64781, bsz=128, num_updates=13869, lr=9.9897e-05, gnorm=2.013, loss_scale=16, train_wall=10, gb_free=2.8, wall=159388
2021-06-20 14:55:25 | INFO | train_inner | epoch 005: 1943 / 3002 loss=2.608, ppl=6.1, wps=5711.5, ups=0.09, wpb=64758, bsz=128, num_updates=13870, lr=9.9897e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=159400
2021-06-20 14:55:36 | INFO | train_inner | epoch 005: 1944 / 3002 loss=2.501, ppl=5.66, wps=5817.2, ups=0.09, wpb=64882, bsz=128, num_updates=13871, lr=9.9897e-05, gnorm=2.069, loss_scale=16, train_wall=11, gb_free=2.8, wall=159411
2021-06-20 14:55:47 | INFO | train_inner | epoch 005: 1945 / 3002 loss=2.463, ppl=5.51, wps=5953.6, ups=0.09, wpb=64757, bsz=128, num_updates=13872, lr=9.9897e-05, gnorm=1.944, loss_scale=16, train_wall=10, gb_free=2.8, wall=159422
2021-06-20 14:55:58 | INFO | train_inner | epoch 005: 1946 / 3002 loss=2.451, ppl=5.47, wps=5793.4, ups=0.09, wpb=64839, bsz=128, num_updates=13873, lr=9.9897e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=159433
2021-06-20 14:56:10 | INFO | train_inner | epoch 005: 1947 / 3002 loss=2.424, ppl=5.37, wps=5747.9, ups=0.09, wpb=64854, bsz=128, num_updates=13874, lr=9.9897e-05, gnorm=1.992, loss_scale=16, train_wall=11, gb_free=2.8, wall=159444
2021-06-20 14:56:21 | INFO | train_inner | epoch 005: 1948 / 3002 loss=2.522, ppl=5.74, wps=5928.8, ups=0.09, wpb=64816, bsz=128, num_updates=13875, lr=9.9897e-05, gnorm=1.955, loss_scale=16, train_wall=10, gb_free=2.8, wall=159455
2021-06-20 14:56:32 | INFO | train_inner | epoch 005: 1949 / 3002 loss=2.444, ppl=5.44, wps=5859.3, ups=0.09, wpb=64859, bsz=128, num_updates=13876, lr=9.9897e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=159466
2021-06-20 14:56:43 | INFO | train_inner | epoch 005: 1950 / 3002 loss=2.452, ppl=5.47, wps=5790.8, ups=0.09, wpb=64875, bsz=128, num_updates=13877, lr=9.9897e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=159477
2021-06-20 14:56:54 | INFO | train_inner | epoch 005: 1951 / 3002 loss=2.494, ppl=5.63, wps=5888.3, ups=0.09, wpb=64812, bsz=128, num_updates=13878, lr=9.9897e-05, gnorm=1.997, loss_scale=16, train_wall=11, gb_free=2.8, wall=159488
2021-06-20 14:57:05 | INFO | train_inner | epoch 005: 1952 / 3002 loss=2.398, ppl=5.27, wps=5766, ups=0.09, wpb=64881, bsz=128, num_updates=13879, lr=9.9897e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=159500
2021-06-20 14:57:16 | INFO | train_inner | epoch 005: 1953 / 3002 loss=2.419, ppl=5.35, wps=5939.2, ups=0.09, wpb=64780, bsz=128, num_updates=13880, lr=9.9897e-05, gnorm=1.886, loss_scale=16, train_wall=10, gb_free=2.8, wall=159510
2021-06-20 14:57:27 | INFO | train_inner | epoch 005: 1954 / 3002 loss=2.717, ppl=6.58, wps=5886.5, ups=0.09, wpb=64789, bsz=128, num_updates=13881, lr=9.98969e-05, gnorm=2.024, loss_scale=16, train_wall=11, gb_free=2.8, wall=159521
2021-06-20 14:57:38 | INFO | train_inner | epoch 005: 1955 / 3002 loss=2.473, ppl=5.55, wps=5887.9, ups=0.09, wpb=64884, bsz=128, num_updates=13882, lr=9.98969e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=159532
2021-06-20 14:57:49 | INFO | train_inner | epoch 005: 1956 / 3002 loss=2.601, ppl=6.07, wps=5886.1, ups=0.09, wpb=64897, bsz=128, num_updates=13883, lr=9.98969e-05, gnorm=2.085, loss_scale=16, train_wall=11, gb_free=2.8, wall=159544
2021-06-20 14:58:00 | INFO | train_inner | epoch 005: 1957 / 3002 loss=2.612, ppl=6.11, wps=5912.4, ups=0.09, wpb=64759, bsz=128, num_updates=13884, lr=9.98969e-05, gnorm=2.158, loss_scale=16, train_wall=11, gb_free=2.8, wall=159554
2021-06-20 14:58:11 | INFO | train_inner | epoch 005: 1958 / 3002 loss=2.531, ppl=5.78, wps=5873, ups=0.09, wpb=64826, bsz=128, num_updates=13885, lr=9.98969e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=159566
2021-06-20 14:58:22 | INFO | train_inner | epoch 005: 1959 / 3002 loss=2.638, ppl=6.22, wps=5852.4, ups=0.09, wpb=64882, bsz=128, num_updates=13886, lr=9.98969e-05, gnorm=2.049, loss_scale=16, train_wall=11, gb_free=2.8, wall=159577
2021-06-20 14:58:33 | INFO | train_inner | epoch 005: 1960 / 3002 loss=2.588, ppl=6.01, wps=5842.7, ups=0.09, wpb=64833, bsz=128, num_updates=13887, lr=9.98969e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=159588
2021-06-20 14:58:44 | INFO | train_inner | epoch 005: 1961 / 3002 loss=2.596, ppl=6.05, wps=5821.9, ups=0.09, wpb=64813, bsz=128, num_updates=13888, lr=9.98969e-05, gnorm=1.997, loss_scale=16, train_wall=11, gb_free=2.8, wall=159599
2021-06-20 14:58:55 | INFO | train_inner | epoch 005: 1962 / 3002 loss=2.491, ppl=5.62, wps=5991, ups=0.09, wpb=64854, bsz=128, num_updates=13889, lr=9.98969e-05, gnorm=1.911, loss_scale=16, train_wall=10, gb_free=2.8, wall=159610
2021-06-20 14:59:06 | INFO | train_inner | epoch 005: 1963 / 3002 loss=2.513, ppl=5.71, wps=5842.3, ups=0.09, wpb=64865, bsz=128, num_updates=13890, lr=9.98969e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=159621
2021-06-20 14:59:18 | INFO | train_inner | epoch 005: 1964 / 3002 loss=2.433, ppl=5.4, wps=5787.4, ups=0.09, wpb=64901, bsz=128, num_updates=13891, lr=9.98969e-05, gnorm=1.862, loss_scale=16, train_wall=11, gb_free=2.8, wall=159632
2021-06-20 14:59:29 | INFO | train_inner | epoch 005: 1965 / 3002 loss=2.394, ppl=5.26, wps=5954.6, ups=0.09, wpb=64879, bsz=128, num_updates=13892, lr=9.98969e-05, gnorm=2.05, loss_scale=16, train_wall=10, gb_free=2.8, wall=159643
2021-06-20 14:59:40 | INFO | train_inner | epoch 005: 1966 / 3002 loss=2.492, ppl=5.63, wps=5806.1, ups=0.09, wpb=64858, bsz=128, num_updates=13893, lr=9.98968e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=159654
2021-06-20 14:59:51 | INFO | train_inner | epoch 005: 1967 / 3002 loss=2.426, ppl=5.38, wps=5846.2, ups=0.09, wpb=64771, bsz=128, num_updates=13894, lr=9.98968e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=159665
2021-06-20 15:00:02 | INFO | train_inner | epoch 005: 1968 / 3002 loss=2.439, ppl=5.42, wps=5833.9, ups=0.09, wpb=64825, bsz=128, num_updates=13895, lr=9.98968e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=159676
2021-06-20 15:00:13 | INFO | train_inner | epoch 005: 1969 / 3002 loss=2.47, ppl=5.54, wps=5763.9, ups=0.09, wpb=64788, bsz=128, num_updates=13896, lr=9.98968e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=159687
2021-06-20 15:00:24 | INFO | train_inner | epoch 005: 1970 / 3002 loss=2.548, ppl=5.85, wps=5761.1, ups=0.09, wpb=64868, bsz=128, num_updates=13897, lr=9.98968e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=159699
2021-06-20 15:00:36 | INFO | train_inner | epoch 005: 1971 / 3002 loss=2.556, ppl=5.88, wps=5782.7, ups=0.09, wpb=64813, bsz=128, num_updates=13898, lr=9.98968e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=159710
2021-06-20 15:00:47 | INFO | train_inner | epoch 005: 1972 / 3002 loss=2.485, ppl=5.6, wps=5769.1, ups=0.09, wpb=64797, bsz=128, num_updates=13899, lr=9.98968e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=159721
2021-06-20 15:00:58 | INFO | train_inner | epoch 005: 1973 / 3002 loss=2.309, ppl=4.95, wps=5873.4, ups=0.09, wpb=64924, bsz=128, num_updates=13900, lr=9.98968e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=159732
2021-06-20 15:01:09 | INFO | train_inner | epoch 005: 1974 / 3002 loss=2.512, ppl=5.71, wps=5784.2, ups=0.09, wpb=64847, bsz=128, num_updates=13901, lr=9.98968e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=159743
2021-06-20 15:01:20 | INFO | train_inner | epoch 005: 1975 / 3002 loss=2.494, ppl=5.63, wps=5790.4, ups=0.09, wpb=64823, bsz=128, num_updates=13902, lr=9.98968e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=159755
2021-06-20 15:01:32 | INFO | train_inner | epoch 005: 1976 / 3002 loss=2.465, ppl=5.52, wps=5771, ups=0.09, wpb=64886, bsz=128, num_updates=13903, lr=9.98968e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=159766
2021-06-20 15:01:43 | INFO | train_inner | epoch 005: 1977 / 3002 loss=2.508, ppl=5.69, wps=5766.9, ups=0.09, wpb=64758, bsz=128, num_updates=13904, lr=9.98968e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=159777
2021-06-20 15:01:54 | INFO | train_inner | epoch 005: 1978 / 3002 loss=2.499, ppl=5.65, wps=5894.9, ups=0.09, wpb=64936, bsz=128, num_updates=13905, lr=9.98968e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=159788
2021-06-20 15:02:05 | INFO | train_inner | epoch 005: 1979 / 3002 loss=2.365, ppl=5.15, wps=5854, ups=0.09, wpb=64860, bsz=128, num_updates=13906, lr=9.98967e-05, gnorm=1.887, loss_scale=16, train_wall=11, gb_free=2.8, wall=159799
2021-06-20 15:02:16 | INFO | train_inner | epoch 005: 1980 / 3002 loss=2.503, ppl=5.67, wps=5732.6, ups=0.09, wpb=64782, bsz=128, num_updates=13907, lr=9.98967e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=159810
2021-06-20 15:02:27 | INFO | train_inner | epoch 005: 1981 / 3002 loss=2.489, ppl=5.61, wps=5870.2, ups=0.09, wpb=64842, bsz=128, num_updates=13908, lr=9.98967e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=159822
2021-06-20 15:02:38 | INFO | train_inner | epoch 005: 1982 / 3002 loss=2.493, ppl=5.63, wps=5947.1, ups=0.09, wpb=64834, bsz=128, num_updates=13909, lr=9.98967e-05, gnorm=2.011, loss_scale=16, train_wall=10, gb_free=2.8, wall=159832
2021-06-20 15:02:49 | INFO | train_inner | epoch 005: 1983 / 3002 loss=2.525, ppl=5.76, wps=5884.4, ups=0.09, wpb=64859, bsz=128, num_updates=13910, lr=9.98967e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=159843
2021-06-20 15:03:00 | INFO | train_inner | epoch 005: 1984 / 3002 loss=2.549, ppl=5.85, wps=5907, ups=0.09, wpb=64825, bsz=128, num_updates=13911, lr=9.98967e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=159854
2021-06-20 15:03:11 | INFO | train_inner | epoch 005: 1985 / 3002 loss=2.471, ppl=5.54, wps=5791.5, ups=0.09, wpb=64795, bsz=128, num_updates=13912, lr=9.98967e-05, gnorm=2.008, loss_scale=16, train_wall=11, gb_free=2.8, wall=159866
2021-06-20 15:03:22 | INFO | train_inner | epoch 005: 1986 / 3002 loss=2.473, ppl=5.55, wps=5908.5, ups=0.09, wpb=64839, bsz=128, num_updates=13913, lr=9.98967e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=159877
2021-06-20 15:03:33 | INFO | train_inner | epoch 005: 1987 / 3002 loss=2.566, ppl=5.92, wps=5840.9, ups=0.09, wpb=64784, bsz=128, num_updates=13914, lr=9.98967e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=159888
2021-06-20 15:03:45 | INFO | train_inner | epoch 005: 1988 / 3002 loss=2.563, ppl=5.91, wps=5753.2, ups=0.09, wpb=64792, bsz=128, num_updates=13915, lr=9.98967e-05, gnorm=1.929, loss_scale=16, train_wall=11, gb_free=2.8, wall=159899
2021-06-20 15:03:56 | INFO | train_inner | epoch 005: 1989 / 3002 loss=2.635, ppl=6.21, wps=5869.4, ups=0.09, wpb=64814, bsz=128, num_updates=13916, lr=9.98967e-05, gnorm=2.015, loss_scale=16, train_wall=11, gb_free=2.8, wall=159910
2021-06-20 15:04:07 | INFO | train_inner | epoch 005: 1990 / 3002 loss=2.374, ppl=5.18, wps=5904.4, ups=0.09, wpb=64919, bsz=128, num_updates=13917, lr=9.98967e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=159921
2021-06-20 15:04:18 | INFO | train_inner | epoch 005: 1991 / 3002 loss=2.688, ppl=6.44, wps=5925, ups=0.09, wpb=64818, bsz=128, num_updates=13918, lr=9.98966e-05, gnorm=2.003, loss_scale=16, train_wall=10, gb_free=2.8, wall=159932
2021-06-20 15:04:29 | INFO | train_inner | epoch 005: 1992 / 3002 loss=2.407, ppl=5.3, wps=5837.2, ups=0.09, wpb=64846, bsz=128, num_updates=13919, lr=9.98966e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=159943
2021-06-20 15:04:40 | INFO | train_inner | epoch 005: 1993 / 3002 loss=2.573, ppl=5.95, wps=5763, ups=0.09, wpb=64861, bsz=128, num_updates=13920, lr=9.98966e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=159954
2021-06-20 15:04:51 | INFO | train_inner | epoch 005: 1994 / 3002 loss=2.52, ppl=5.74, wps=5879.7, ups=0.09, wpb=64799, bsz=128, num_updates=13921, lr=9.98966e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=159965
2021-06-20 15:05:02 | INFO | train_inner | epoch 005: 1995 / 3002 loss=2.497, ppl=5.64, wps=5916.5, ups=0.09, wpb=64811, bsz=128, num_updates=13922, lr=9.98966e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=159976
2021-06-20 15:05:13 | INFO | train_inner | epoch 005: 1996 / 3002 loss=2.46, ppl=5.5, wps=5807.4, ups=0.09, wpb=64825, bsz=128, num_updates=13923, lr=9.98966e-05, gnorm=2.044, loss_scale=16, train_wall=11, gb_free=2.8, wall=159987
2021-06-20 15:05:24 | INFO | train_inner | epoch 005: 1997 / 3002 loss=2.602, ppl=6.07, wps=5904.5, ups=0.09, wpb=64828, bsz=128, num_updates=13924, lr=9.98966e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=159998
2021-06-20 15:05:35 | INFO | train_inner | epoch 005: 1998 / 3002 loss=2.41, ppl=5.31, wps=5799.5, ups=0.09, wpb=64848, bsz=128, num_updates=13925, lr=9.98966e-05, gnorm=1.935, loss_scale=16, train_wall=11, gb_free=2.8, wall=160010
2021-06-20 15:05:46 | INFO | train_inner | epoch 005: 1999 / 3002 loss=2.455, ppl=5.48, wps=5819.8, ups=0.09, wpb=64806, bsz=128, num_updates=13926, lr=9.98966e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=160021
2021-06-20 15:05:58 | INFO | train_inner | epoch 005: 2000 / 3002 loss=2.602, ppl=6.07, wps=5712.5, ups=0.09, wpb=64751, bsz=128, num_updates=13927, lr=9.98966e-05, gnorm=2.074, loss_scale=16, train_wall=11, gb_free=2.8, wall=160032
2021-06-20 15:06:09 | INFO | train_inner | epoch 005: 2001 / 3002 loss=2.35, ppl=5.1, wps=5865.2, ups=0.09, wpb=64861, bsz=128, num_updates=13928, lr=9.98966e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=160043
2021-06-20 15:06:20 | INFO | train_inner | epoch 005: 2002 / 3002 loss=2.49, ppl=5.62, wps=5886.2, ups=0.09, wpb=64797, bsz=128, num_updates=13929, lr=9.98966e-05, gnorm=1.88, loss_scale=16, train_wall=11, gb_free=2.8, wall=160054
2021-06-20 15:06:31 | INFO | train_inner | epoch 005: 2003 / 3002 loss=2.472, ppl=5.55, wps=5869.2, ups=0.09, wpb=64841, bsz=128, num_updates=13930, lr=9.98966e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=160065
2021-06-20 15:06:42 | INFO | train_inner | epoch 005: 2004 / 3002 loss=2.563, ppl=5.91, wps=6042.7, ups=0.09, wpb=64874, bsz=128, num_updates=13931, lr=9.98965e-05, gnorm=1.872, loss_scale=16, train_wall=10, gb_free=2.8, wall=160076
2021-06-20 15:06:53 | INFO | train_inner | epoch 005: 2005 / 3002 loss=2.673, ppl=6.38, wps=5855.3, ups=0.09, wpb=64731, bsz=128, num_updates=13932, lr=9.98965e-05, gnorm=2.065, loss_scale=16, train_wall=11, gb_free=2.8, wall=160087
2021-06-20 15:07:04 | INFO | train_inner | epoch 005: 2006 / 3002 loss=2.564, ppl=5.91, wps=5895, ups=0.09, wpb=64859, bsz=128, num_updates=13933, lr=9.98965e-05, gnorm=1.823, loss_scale=16, train_wall=11, gb_free=2.8, wall=160098
2021-06-20 15:07:15 | INFO | train_inner | epoch 005: 2007 / 3002 loss=2.471, ppl=5.54, wps=5892.3, ups=0.09, wpb=64792, bsz=128, num_updates=13934, lr=9.98965e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=160109
2021-06-20 15:07:26 | INFO | train_inner | epoch 005: 2008 / 3002 loss=2.508, ppl=5.69, wps=5931.1, ups=0.09, wpb=64841, bsz=128, num_updates=13935, lr=9.98965e-05, gnorm=1.928, loss_scale=16, train_wall=10, gb_free=2.8, wall=160120
2021-06-20 15:07:37 | INFO | train_inner | epoch 005: 2009 / 3002 loss=2.679, ppl=6.4, wps=5816.3, ups=0.09, wpb=64795, bsz=128, num_updates=13936, lr=9.98965e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=160131
2021-06-20 15:07:48 | INFO | train_inner | epoch 005: 2010 / 3002 loss=2.475, ppl=5.56, wps=5848.1, ups=0.09, wpb=64891, bsz=128, num_updates=13937, lr=9.98965e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=160142
2021-06-20 15:07:59 | INFO | train_inner | epoch 005: 2011 / 3002 loss=2.56, ppl=5.9, wps=5900.4, ups=0.09, wpb=64830, bsz=128, num_updates=13938, lr=9.98965e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=160153
2021-06-20 15:08:10 | INFO | train_inner | epoch 005: 2012 / 3002 loss=2.441, ppl=5.43, wps=5827.5, ups=0.09, wpb=64877, bsz=128, num_updates=13939, lr=9.98965e-05, gnorm=1.946, loss_scale=16, train_wall=11, gb_free=2.8, wall=160164
2021-06-20 15:08:21 | INFO | train_inner | epoch 005: 2013 / 3002 loss=2.644, ppl=6.25, wps=5907.7, ups=0.09, wpb=64803, bsz=128, num_updates=13940, lr=9.98965e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=160175
2021-06-20 15:08:32 | INFO | train_inner | epoch 005: 2014 / 3002 loss=2.526, ppl=5.76, wps=5846.5, ups=0.09, wpb=64878, bsz=128, num_updates=13941, lr=9.98965e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=160186
2021-06-20 15:08:43 | INFO | train_inner | epoch 005: 2015 / 3002 loss=2.417, ppl=5.34, wps=5900, ups=0.09, wpb=64787, bsz=128, num_updates=13942, lr=9.98965e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=160197
2021-06-20 15:08:54 | INFO | train_inner | epoch 005: 2016 / 3002 loss=2.571, ppl=5.94, wps=5859, ups=0.09, wpb=64817, bsz=128, num_updates=13943, lr=9.98964e-05, gnorm=2.144, loss_scale=16, train_wall=11, gb_free=2.8, wall=160208
2021-06-20 15:09:05 | INFO | train_inner | epoch 005: 2017 / 3002 loss=2.629, ppl=6.19, wps=5868.7, ups=0.09, wpb=64856, bsz=128, num_updates=13944, lr=9.98964e-05, gnorm=2.17, loss_scale=16, train_wall=11, gb_free=2.8, wall=160219
2021-06-20 15:09:16 | INFO | train_inner | epoch 005: 2018 / 3002 loss=2.501, ppl=5.66, wps=5807.5, ups=0.09, wpb=64874, bsz=128, num_updates=13945, lr=9.98964e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=160231
2021-06-20 15:09:27 | INFO | train_inner | epoch 005: 2019 / 3002 loss=2.565, ppl=5.92, wps=5799.8, ups=0.09, wpb=64855, bsz=128, num_updates=13946, lr=9.98964e-05, gnorm=1.923, loss_scale=32, train_wall=11, gb_free=2.8, wall=160242
2021-06-20 15:09:38 | INFO | train_inner | epoch 005: 2020 / 3002 loss=2.542, ppl=5.82, wps=5930.6, ups=0.09, wpb=64829, bsz=128, num_updates=13947, lr=9.98964e-05, gnorm=2.036, loss_scale=32, train_wall=10, gb_free=2.8, wall=160253
2021-06-20 15:09:50 | INFO | train_inner | epoch 005: 2021 / 3002 loss=2.595, ppl=6.04, wps=5803, ups=0.09, wpb=64773, bsz=128, num_updates=13948, lr=9.98964e-05, gnorm=1.968, loss_scale=32, train_wall=11, gb_free=2.8, wall=160264
2021-06-20 15:10:00 | INFO | train_inner | epoch 005: 2022 / 3002 loss=2.505, ppl=5.68, wps=5948.9, ups=0.09, wpb=64820, bsz=128, num_updates=13949, lr=9.98964e-05, gnorm=1.926, loss_scale=32, train_wall=10, gb_free=2.8, wall=160275
2021-06-20 15:10:12 | INFO | train_inner | epoch 005: 2023 / 3002 loss=2.574, ppl=5.95, wps=5795.7, ups=0.09, wpb=64836, bsz=128, num_updates=13950, lr=9.98964e-05, gnorm=1.887, loss_scale=32, train_wall=11, gb_free=2.8, wall=160286
2021-06-20 15:10:23 | INFO | train_inner | epoch 005: 2024 / 3002 loss=2.625, ppl=6.17, wps=5810.3, ups=0.09, wpb=64679, bsz=128, num_updates=13951, lr=9.98964e-05, gnorm=1.949, loss_scale=32, train_wall=11, gb_free=2.8, wall=160297
2021-06-20 15:10:34 | INFO | train_inner | epoch 005: 2025 / 3002 loss=2.56, ppl=5.9, wps=5884.3, ups=0.09, wpb=64764, bsz=128, num_updates=13952, lr=9.98964e-05, gnorm=1.899, loss_scale=32, train_wall=11, gb_free=2.8, wall=160308
2021-06-20 15:10:45 | INFO | train_inner | epoch 005: 2026 / 3002 loss=2.533, ppl=5.79, wps=5829.6, ups=0.09, wpb=64818, bsz=128, num_updates=13953, lr=9.98964e-05, gnorm=1.931, loss_scale=32, train_wall=11, gb_free=2.8, wall=160319
2021-06-20 15:10:56 | INFO | train_inner | epoch 005: 2027 / 3002 loss=2.591, ppl=6.02, wps=5781.4, ups=0.09, wpb=64798, bsz=128, num_updates=13954, lr=9.98964e-05, gnorm=2.007, loss_scale=32, train_wall=11, gb_free=2.8, wall=160330
2021-06-20 15:11:07 | INFO | train_inner | epoch 005: 2028 / 3002 loss=2.502, ppl=5.66, wps=5797.1, ups=0.09, wpb=64745, bsz=128, num_updates=13955, lr=9.98964e-05, gnorm=1.913, loss_scale=32, train_wall=11, gb_free=2.8, wall=160342
2021-06-20 15:11:18 | INFO | train_inner | epoch 005: 2029 / 3002 loss=2.714, ppl=6.56, wps=5843.2, ups=0.09, wpb=64823, bsz=128, num_updates=13956, lr=9.98963e-05, gnorm=2.058, loss_scale=32, train_wall=11, gb_free=2.8, wall=160353
2021-06-20 15:11:29 | INFO | train_inner | epoch 005: 2030 / 3002 loss=2.518, ppl=5.73, wps=5868.1, ups=0.09, wpb=64817, bsz=128, num_updates=13957, lr=9.98963e-05, gnorm=1.945, loss_scale=32, train_wall=11, gb_free=2.8, wall=160364
2021-06-20 15:11:41 | INFO | train_inner | epoch 005: 2031 / 3002 loss=2.534, ppl=5.79, wps=5749.3, ups=0.09, wpb=64829, bsz=128, num_updates=13958, lr=9.98963e-05, gnorm=1.876, loss_scale=32, train_wall=11, gb_free=2.8, wall=160375
2021-06-20 15:11:52 | INFO | train_inner | epoch 005: 2032 / 3002 loss=2.52, ppl=5.74, wps=5838.6, ups=0.09, wpb=64798, bsz=128, num_updates=13959, lr=9.98963e-05, gnorm=2.002, loss_scale=32, train_wall=11, gb_free=2.8, wall=160386
2021-06-20 15:12:03 | INFO | train_inner | epoch 005: 2033 / 3002 loss=2.434, ppl=5.4, wps=5899, ups=0.09, wpb=64815, bsz=128, num_updates=13960, lr=9.98963e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=160397
2021-06-20 15:12:14 | INFO | train_inner | epoch 005: 2034 / 3002 loss=2.457, ppl=5.49, wps=5796.5, ups=0.09, wpb=64826, bsz=128, num_updates=13961, lr=9.98963e-05, gnorm=1.982, loss_scale=32, train_wall=11, gb_free=2.8, wall=160408
2021-06-20 15:12:25 | INFO | train_inner | epoch 005: 2035 / 3002 loss=2.392, ppl=5.25, wps=5787.8, ups=0.09, wpb=64805, bsz=128, num_updates=13962, lr=9.98963e-05, gnorm=2.023, loss_scale=32, train_wall=11, gb_free=2.8, wall=160419
2021-06-20 15:12:36 | INFO | train_inner | epoch 005: 2036 / 3002 loss=2.426, ppl=5.37, wps=5736, ups=0.09, wpb=64750, bsz=128, num_updates=13963, lr=9.98963e-05, gnorm=1.909, loss_scale=32, train_wall=11, gb_free=2.8, wall=160431
2021-06-20 15:12:48 | INFO | train_inner | epoch 005: 2037 / 3002 loss=2.477, ppl=5.57, wps=5813.3, ups=0.09, wpb=64888, bsz=128, num_updates=13964, lr=9.98963e-05, gnorm=2.017, loss_scale=32, train_wall=11, gb_free=2.8, wall=160442
2021-06-20 15:12:59 | INFO | train_inner | epoch 005: 2038 / 3002 loss=2.36, ppl=5.13, wps=5940.1, ups=0.09, wpb=64816, bsz=128, num_updates=13965, lr=9.98963e-05, gnorm=1.951, loss_scale=32, train_wall=10, gb_free=2.8, wall=160453
2021-06-20 15:13:10 | INFO | train_inner | epoch 005: 2039 / 3002 loss=2.541, ppl=5.82, wps=5787.9, ups=0.09, wpb=64859, bsz=128, num_updates=13966, lr=9.98963e-05, gnorm=1.915, loss_scale=32, train_wall=11, gb_free=2.8, wall=160464
2021-06-20 15:13:21 | INFO | train_inner | epoch 005: 2040 / 3002 loss=2.415, ppl=5.33, wps=5871.5, ups=0.09, wpb=64904, bsz=128, num_updates=13967, lr=9.98963e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=160475
2021-06-20 15:13:32 | INFO | train_inner | epoch 005: 2041 / 3002 loss=2.381, ppl=5.21, wps=5789.1, ups=0.09, wpb=64762, bsz=128, num_updates=13968, lr=9.98962e-05, gnorm=1.925, loss_scale=32, train_wall=11, gb_free=2.8, wall=160486
2021-06-20 15:13:43 | INFO | train_inner | epoch 005: 2042 / 3002 loss=2.556, ppl=5.88, wps=5790.9, ups=0.09, wpb=64869, bsz=128, num_updates=13969, lr=9.98962e-05, gnorm=1.987, loss_scale=32, train_wall=11, gb_free=2.8, wall=160497
2021-06-20 15:13:54 | INFO | train_inner | epoch 005: 2043 / 3002 loss=2.655, ppl=6.3, wps=5986.1, ups=0.09, wpb=64878, bsz=128, num_updates=13970, lr=9.98962e-05, gnorm=1.992, loss_scale=32, train_wall=10, gb_free=2.8, wall=160508
2021-06-20 15:14:05 | INFO | train_inner | epoch 005: 2044 / 3002 loss=2.393, ppl=5.25, wps=5811.8, ups=0.09, wpb=64923, bsz=128, num_updates=13971, lr=9.98962e-05, gnorm=2.006, loss_scale=32, train_wall=11, gb_free=2.8, wall=160520
2021-06-20 15:14:16 | INFO | train_inner | epoch 005: 2045 / 3002 loss=2.612, ppl=6.11, wps=5908, ups=0.09, wpb=64765, bsz=128, num_updates=13972, lr=9.98962e-05, gnorm=2.057, loss_scale=32, train_wall=11, gb_free=2.8, wall=160530
2021-06-20 15:14:27 | INFO | train_inner | epoch 005: 2046 / 3002 loss=2.588, ppl=6.01, wps=5910.9, ups=0.09, wpb=64844, bsz=128, num_updates=13973, lr=9.98962e-05, gnorm=1.977, loss_scale=32, train_wall=11, gb_free=2.8, wall=160541
2021-06-20 15:14:38 | INFO | train_inner | epoch 005: 2047 / 3002 loss=2.519, ppl=5.73, wps=5813.5, ups=0.09, wpb=64804, bsz=128, num_updates=13974, lr=9.98962e-05, gnorm=1.949, loss_scale=32, train_wall=11, gb_free=2.8, wall=160553
2021-06-20 15:14:49 | INFO | train_inner | epoch 005: 2048 / 3002 loss=2.427, ppl=5.38, wps=5767.6, ups=0.09, wpb=64854, bsz=128, num_updates=13975, lr=9.98962e-05, gnorm=2.071, loss_scale=32, train_wall=11, gb_free=2.8, wall=160564
2021-06-20 15:15:01 | INFO | train_inner | epoch 005: 2049 / 3002 loss=2.512, ppl=5.7, wps=5806.4, ups=0.09, wpb=64849, bsz=128, num_updates=13976, lr=9.98962e-05, gnorm=2.032, loss_scale=32, train_wall=11, gb_free=2.8, wall=160575
2021-06-20 15:15:12 | INFO | train_inner | epoch 005: 2050 / 3002 loss=2.498, ppl=5.65, wps=5801, ups=0.09, wpb=64735, bsz=128, num_updates=13977, lr=9.98962e-05, gnorm=1.926, loss_scale=32, train_wall=11, gb_free=2.8, wall=160586
2021-06-20 15:15:23 | INFO | train_inner | epoch 005: 2051 / 3002 loss=2.465, ppl=5.52, wps=5873.3, ups=0.09, wpb=64844, bsz=128, num_updates=13978, lr=9.98962e-05, gnorm=1.905, loss_scale=32, train_wall=11, gb_free=2.8, wall=160597
2021-06-20 15:15:34 | INFO | train_inner | epoch 005: 2052 / 3002 loss=2.311, ppl=4.96, wps=5757.5, ups=0.09, wpb=64831, bsz=128, num_updates=13979, lr=9.98962e-05, gnorm=2.058, loss_scale=32, train_wall=11, gb_free=2.8, wall=160608
2021-06-20 15:15:45 | INFO | train_inner | epoch 005: 2053 / 3002 loss=2.441, ppl=5.43, wps=5886.7, ups=0.09, wpb=64808, bsz=128, num_updates=13980, lr=9.98962e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=160619
2021-06-20 15:15:56 | INFO | train_inner | epoch 005: 2054 / 3002 loss=2.469, ppl=5.54, wps=5766, ups=0.09, wpb=64761, bsz=128, num_updates=13981, lr=9.98961e-05, gnorm=2.003, loss_scale=32, train_wall=11, gb_free=2.8, wall=160631
2021-06-20 15:16:08 | INFO | train_inner | epoch 005: 2055 / 3002 loss=2.586, ppl=6, wps=5759.1, ups=0.09, wpb=64886, bsz=128, num_updates=13982, lr=9.98961e-05, gnorm=2.059, loss_scale=32, train_wall=11, gb_free=2.8, wall=160642
2021-06-20 15:16:19 | INFO | train_inner | epoch 005: 2056 / 3002 loss=2.522, ppl=5.75, wps=5911.8, ups=0.09, wpb=64769, bsz=128, num_updates=13983, lr=9.98961e-05, gnorm=1.936, loss_scale=32, train_wall=10, gb_free=2.8, wall=160653
2021-06-20 15:16:30 | INFO | train_inner | epoch 005: 2057 / 3002 loss=2.599, ppl=6.06, wps=5810.9, ups=0.09, wpb=64819, bsz=128, num_updates=13984, lr=9.98961e-05, gnorm=2.114, loss_scale=32, train_wall=11, gb_free=2.8, wall=160664
2021-06-20 15:16:41 | INFO | train_inner | epoch 005: 2058 / 3002 loss=2.485, ppl=5.6, wps=5754.3, ups=0.09, wpb=64798, bsz=128, num_updates=13985, lr=9.98961e-05, gnorm=1.935, loss_scale=32, train_wall=11, gb_free=2.8, wall=160675
2021-06-20 15:16:52 | INFO | train_inner | epoch 005: 2059 / 3002 loss=2.566, ppl=5.92, wps=5773.9, ups=0.09, wpb=64797, bsz=128, num_updates=13986, lr=9.98961e-05, gnorm=1.914, loss_scale=32, train_wall=11, gb_free=2.8, wall=160687
2021-06-20 15:17:03 | INFO | train_inner | epoch 005: 2060 / 3002 loss=2.586, ppl=6.01, wps=5941.9, ups=0.09, wpb=64824, bsz=128, num_updates=13987, lr=9.98961e-05, gnorm=2.053, loss_scale=32, train_wall=10, gb_free=2.8, wall=160697
2021-06-20 15:17:14 | INFO | train_inner | epoch 005: 2061 / 3002 loss=2.584, ppl=6, wps=5943, ups=0.09, wpb=64790, bsz=128, num_updates=13988, lr=9.98961e-05, gnorm=1.977, loss_scale=32, train_wall=10, gb_free=2.8, wall=160708
2021-06-20 15:17:25 | INFO | train_inner | epoch 005: 2062 / 3002 loss=2.407, ppl=5.3, wps=5840.9, ups=0.09, wpb=64817, bsz=128, num_updates=13989, lr=9.98961e-05, gnorm=1.983, loss_scale=32, train_wall=11, gb_free=2.8, wall=160719
2021-06-20 15:17:36 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0
2021-06-20 15:17:47 | INFO | train_inner | epoch 005: 2064 / 3002 loss=2.509, ppl=5.69, wps=2968.3, ups=0.05, wpb=64873, bsz=128, num_updates=13990, lr=9.98961e-05, gnorm=2.002, loss_scale=16, train_wall=21, gb_free=2.8, wall=160741
2021-06-20 15:17:58 | INFO | train_inner | epoch 005: 2065 / 3002 loss=2.642, ppl=6.24, wps=5791.1, ups=0.09, wpb=64831, bsz=128, num_updates=13991, lr=9.98961e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=160753
2021-06-20 15:18:09 | INFO | train_inner | epoch 005: 2066 / 3002 loss=2.496, ppl=5.64, wps=5860, ups=0.09, wpb=64878, bsz=128, num_updates=13992, lr=9.98961e-05, gnorm=2.072, loss_scale=16, train_wall=11, gb_free=2.8, wall=160764
2021-06-20 15:18:21 | INFO | train_inner | epoch 005: 2067 / 3002 loss=2.547, ppl=5.84, wps=5722.2, ups=0.09, wpb=64860, bsz=128, num_updates=13993, lr=9.9896e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=160775
2021-06-20 15:18:32 | INFO | train_inner | epoch 005: 2068 / 3002 loss=2.439, ppl=5.42, wps=5816.7, ups=0.09, wpb=64821, bsz=128, num_updates=13994, lr=9.9896e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=160786
2021-06-20 15:18:43 | INFO | train_inner | epoch 005: 2069 / 3002 loss=2.467, ppl=5.53, wps=5879.5, ups=0.09, wpb=64820, bsz=128, num_updates=13995, lr=9.9896e-05, gnorm=2.004, loss_scale=16, train_wall=11, gb_free=2.8, wall=160797
2021-06-20 15:18:54 | INFO | train_inner | epoch 005: 2070 / 3002 loss=2.391, ppl=5.24, wps=5804.1, ups=0.09, wpb=64833, bsz=128, num_updates=13996, lr=9.9896e-05, gnorm=1.884, loss_scale=16, train_wall=11, gb_free=2.8, wall=160808
2021-06-20 15:19:05 | INFO | train_inner | epoch 005: 2071 / 3002 loss=2.733, ppl=6.65, wps=5792.6, ups=0.09, wpb=64802, bsz=128, num_updates=13997, lr=9.9896e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=160819
2021-06-20 15:19:16 | INFO | train_inner | epoch 005: 2072 / 3002 loss=2.471, ppl=5.54, wps=5870.3, ups=0.09, wpb=64830, bsz=128, num_updates=13998, lr=9.9896e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=160830
2021-06-20 15:19:27 | INFO | train_inner | epoch 005: 2073 / 3002 loss=2.405, ppl=5.3, wps=5793.5, ups=0.09, wpb=64877, bsz=128, num_updates=13999, lr=9.9896e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=160842
2021-06-20 15:19:38 | INFO | train_inner | epoch 005: 2074 / 3002 loss=2.557, ppl=5.89, wps=5969, ups=0.09, wpb=64890, bsz=128, num_updates=14000, lr=9.9896e-05, gnorm=1.909, loss_scale=16, train_wall=10, gb_free=2.8, wall=160853
2021-06-20 15:19:49 | INFO | train_inner | epoch 005: 2075 / 3002 loss=2.401, ppl=5.28, wps=5847.9, ups=0.09, wpb=64902, bsz=128, num_updates=14001, lr=9.9896e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=160864
2021-06-20 15:20:01 | INFO | train_inner | epoch 005: 2076 / 3002 loss=2.58, ppl=5.98, wps=5777.8, ups=0.09, wpb=64811, bsz=128, num_updates=14002, lr=9.9896e-05, gnorm=1.987, loss_scale=16, train_wall=11, gb_free=2.8, wall=160875
2021-06-20 15:20:12 | INFO | train_inner | epoch 005: 2077 / 3002 loss=2.567, ppl=5.92, wps=5897.3, ups=0.09, wpb=64778, bsz=128, num_updates=14003, lr=9.9896e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=160886
2021-06-20 15:20:23 | INFO | train_inner | epoch 005: 2078 / 3002 loss=2.508, ppl=5.69, wps=5778.5, ups=0.09, wpb=64884, bsz=128, num_updates=14004, lr=9.9896e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=160897
2021-06-20 15:20:34 | INFO | train_inner | epoch 005: 2079 / 3002 loss=2.334, ppl=5.04, wps=5798.2, ups=0.09, wpb=64879, bsz=128, num_updates=14005, lr=9.9896e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=160908
2021-06-20 15:20:45 | INFO | train_inner | epoch 005: 2080 / 3002 loss=2.481, ppl=5.58, wps=5800, ups=0.09, wpb=64864, bsz=128, num_updates=14006, lr=9.98959e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=160919
2021-06-20 15:20:56 | INFO | train_inner | epoch 005: 2081 / 3002 loss=2.485, ppl=5.6, wps=5730.3, ups=0.09, wpb=64822, bsz=128, num_updates=14007, lr=9.98959e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=160931
2021-06-20 15:21:07 | INFO | train_inner | epoch 005: 2082 / 3002 loss=2.573, ppl=5.95, wps=5903.7, ups=0.09, wpb=64812, bsz=128, num_updates=14008, lr=9.98959e-05, gnorm=2.103, loss_scale=16, train_wall=11, gb_free=2.8, wall=160942
2021-06-20 15:21:18 | INFO | train_inner | epoch 005: 2083 / 3002 loss=2.383, ppl=5.21, wps=5927, ups=0.09, wpb=64941, bsz=128, num_updates=14009, lr=9.98959e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=160953
2021-06-20 15:21:30 | INFO | train_inner | epoch 005: 2084 / 3002 loss=2.342, ppl=5.07, wps=5707.8, ups=0.09, wpb=64889, bsz=128, num_updates=14010, lr=9.98959e-05, gnorm=1.865, loss_scale=16, train_wall=11, gb_free=2.8, wall=160964
2021-06-20 15:21:41 | INFO | train_inner | epoch 005: 2085 / 3002 loss=2.398, ppl=5.27, wps=5988.4, ups=0.09, wpb=64896, bsz=128, num_updates=14011, lr=9.98959e-05, gnorm=1.85, loss_scale=16, train_wall=10, gb_free=2.8, wall=160975
2021-06-20 15:21:52 | INFO | train_inner | epoch 005: 2086 / 3002 loss=2.494, ppl=5.63, wps=5866.7, ups=0.09, wpb=64799, bsz=128, num_updates=14012, lr=9.98959e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=160986
2021-06-20 15:22:03 | INFO | train_inner | epoch 005: 2087 / 3002 loss=2.492, ppl=5.63, wps=5724.8, ups=0.09, wpb=64941, bsz=128, num_updates=14013, lr=9.98959e-05, gnorm=1.988, loss_scale=16, train_wall=11, gb_free=2.8, wall=160997
2021-06-20 15:22:14 | INFO | train_inner | epoch 005: 2088 / 3002 loss=2.401, ppl=5.28, wps=5892.5, ups=0.09, wpb=64881, bsz=128, num_updates=14014, lr=9.98959e-05, gnorm=2.031, loss_scale=16, train_wall=11, gb_free=2.8, wall=161008
2021-06-20 15:22:25 | INFO | train_inner | epoch 005: 2089 / 3002 loss=2.469, ppl=5.54, wps=5887.3, ups=0.09, wpb=64786, bsz=128, num_updates=14015, lr=9.98959e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=161019
2021-06-20 15:22:36 | INFO | train_inner | epoch 005: 2090 / 3002 loss=2.53, ppl=5.78, wps=5804.7, ups=0.09, wpb=64849, bsz=128, num_updates=14016, lr=9.98959e-05, gnorm=2.055, loss_scale=16, train_wall=11, gb_free=2.8, wall=161031
2021-06-20 15:22:47 | INFO | train_inner | epoch 005: 2091 / 3002 loss=2.487, ppl=5.61, wps=5833.8, ups=0.09, wpb=64907, bsz=128, num_updates=14017, lr=9.98959e-05, gnorm=1.891, loss_scale=16, train_wall=11, gb_free=2.8, wall=161042
2021-06-20 15:22:58 | INFO | train_inner | epoch 005: 2092 / 3002 loss=2.504, ppl=5.67, wps=5928.3, ups=0.09, wpb=64896, bsz=128, num_updates=14018, lr=9.98958e-05, gnorm=1.85, loss_scale=16, train_wall=10, gb_free=2.8, wall=161053
2021-06-20 15:23:10 | INFO | train_inner | epoch 005: 2093 / 3002 loss=2.538, ppl=5.81, wps=5754.3, ups=0.09, wpb=64830, bsz=128, num_updates=14019, lr=9.98958e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=161064
2021-06-20 15:23:20 | INFO | train_inner | epoch 005: 2094 / 3002 loss=2.469, ppl=5.54, wps=5906.3, ups=0.09, wpb=64765, bsz=128, num_updates=14020, lr=9.98958e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=161075
2021-06-20 15:23:31 | INFO | train_inner | epoch 005: 2095 / 3002 loss=2.434, ppl=5.4, wps=5905.5, ups=0.09, wpb=64885, bsz=128, num_updates=14021, lr=9.98958e-05, gnorm=2.036, loss_scale=16, train_wall=11, gb_free=2.8, wall=161086
2021-06-20 15:23:43 | INFO | train_inner | epoch 005: 2096 / 3002 loss=2.479, ppl=5.57, wps=5843.1, ups=0.09, wpb=64786, bsz=128, num_updates=14022, lr=9.98958e-05, gnorm=2.049, loss_scale=16, train_wall=11, gb_free=2.8, wall=161097
2021-06-20 15:23:53 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 15:24:05 | INFO | train_inner | epoch 005: 2098 / 3002 loss=2.463, ppl=5.52, wps=2940.7, ups=0.05, wpb=64880, bsz=128, num_updates=14023, lr=9.98958e-05, gnorm=2.104, loss_scale=8, train_wall=21, gb_free=2.8, wall=161119
2021-06-20 15:24:16 | INFO | train_inner | epoch 005: 2099 / 3002 loss=2.513, ppl=5.71, wps=5794.1, ups=0.09, wpb=64744, bsz=128, num_updates=14024, lr=9.98958e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=161130
2021-06-20 15:24:27 | INFO | train_inner | epoch 005: 2100 / 3002 loss=2.47, ppl=5.54, wps=5978.9, ups=0.09, wpb=64805, bsz=128, num_updates=14025, lr=9.98958e-05, gnorm=1.875, loss_scale=8, train_wall=10, gb_free=2.8, wall=161141
2021-06-20 15:24:37 | INFO | train_inner | epoch 005: 2101 / 3002 loss=2.55, ppl=5.86, wps=5993.5, ups=0.09, wpb=64883, bsz=128, num_updates=14026, lr=9.98958e-05, gnorm=2.035, loss_scale=8, train_wall=10, gb_free=2.8, wall=161152
2021-06-20 15:24:49 | INFO | train_inner | epoch 005: 2102 / 3002 loss=2.526, ppl=5.76, wps=5847.5, ups=0.09, wpb=64814, bsz=128, num_updates=14027, lr=9.98958e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=161163
2021-06-20 15:25:00 | INFO | train_inner | epoch 005: 2103 / 3002 loss=2.433, ppl=5.4, wps=5861.7, ups=0.09, wpb=64880, bsz=128, num_updates=14028, lr=9.98958e-05, gnorm=1.856, loss_scale=8, train_wall=11, gb_free=2.8, wall=161174
2021-06-20 15:25:11 | INFO | train_inner | epoch 005: 2104 / 3002 loss=2.467, ppl=5.53, wps=5816.1, ups=0.09, wpb=64840, bsz=128, num_updates=14029, lr=9.98958e-05, gnorm=1.881, loss_scale=8, train_wall=11, gb_free=2.8, wall=161185
2021-06-20 15:25:22 | INFO | train_inner | epoch 005: 2105 / 3002 loss=2.328, ppl=5.02, wps=5858.7, ups=0.09, wpb=64805, bsz=128, num_updates=14030, lr=9.98958e-05, gnorm=1.853, loss_scale=8, train_wall=11, gb_free=2.8, wall=161196
2021-06-20 15:25:33 | INFO | train_inner | epoch 005: 2106 / 3002 loss=2.476, ppl=5.56, wps=5807, ups=0.09, wpb=64877, bsz=128, num_updates=14031, lr=9.98957e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=161207
2021-06-20 15:25:44 | INFO | train_inner | epoch 005: 2107 / 3002 loss=2.487, ppl=5.61, wps=5807.6, ups=0.09, wpb=64857, bsz=128, num_updates=14032, lr=9.98957e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=161218
2021-06-20 15:25:55 | INFO | train_inner | epoch 005: 2108 / 3002 loss=2.48, ppl=5.58, wps=6029.8, ups=0.09, wpb=64893, bsz=128, num_updates=14033, lr=9.98957e-05, gnorm=1.974, loss_scale=8, train_wall=10, gb_free=2.8, wall=161229
2021-06-20 15:26:06 | INFO | train_inner | epoch 005: 2109 / 3002 loss=2.492, ppl=5.62, wps=5882.5, ups=0.09, wpb=64792, bsz=128, num_updates=14034, lr=9.98957e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=161240
2021-06-20 15:26:17 | INFO | train_inner | epoch 005: 2110 / 3002 loss=2.462, ppl=5.51, wps=5902.7, ups=0.09, wpb=64870, bsz=128, num_updates=14035, lr=9.98957e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=161251
2021-06-20 15:26:28 | INFO | train_inner | epoch 005: 2111 / 3002 loss=2.69, ppl=6.45, wps=5842, ups=0.09, wpb=64823, bsz=128, num_updates=14036, lr=9.98957e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=161262
2021-06-20 15:26:39 | INFO | train_inner | epoch 005: 2112 / 3002 loss=2.534, ppl=5.79, wps=5860.9, ups=0.09, wpb=64846, bsz=128, num_updates=14037, lr=9.98957e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=161273
2021-06-20 15:26:50 | INFO | train_inner | epoch 005: 2113 / 3002 loss=2.517, ppl=5.72, wps=5757, ups=0.09, wpb=64879, bsz=128, num_updates=14038, lr=9.98957e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=161285
2021-06-20 15:27:01 | INFO | train_inner | epoch 005: 2114 / 3002 loss=2.606, ppl=6.09, wps=5869.7, ups=0.09, wpb=64841, bsz=128, num_updates=14039, lr=9.98957e-05, gnorm=2.303, loss_scale=8, train_wall=11, gb_free=2.8, wall=161296
2021-06-20 15:27:12 | INFO | train_inner | epoch 005: 2115 / 3002 loss=2.473, ppl=5.55, wps=5982.9, ups=0.09, wpb=64881, bsz=128, num_updates=14040, lr=9.98957e-05, gnorm=1.984, loss_scale=8, train_wall=10, gb_free=2.8, wall=161307
2021-06-20 15:27:23 | INFO | train_inner | epoch 005: 2116 / 3002 loss=2.624, ppl=6.17, wps=5822.3, ups=0.09, wpb=64779, bsz=128, num_updates=14041, lr=9.98957e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=161318
2021-06-20 15:27:35 | INFO | train_inner | epoch 005: 2117 / 3002 loss=2.418, ppl=5.34, wps=5757.4, ups=0.09, wpb=64829, bsz=128, num_updates=14042, lr=9.98957e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=161329
2021-06-20 15:27:46 | INFO | train_inner | epoch 005: 2118 / 3002 loss=2.468, ppl=5.53, wps=5788.4, ups=0.09, wpb=64887, bsz=128, num_updates=14043, lr=9.98956e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=161340
2021-06-20 15:27:57 | INFO | train_inner | epoch 005: 2119 / 3002 loss=2.663, ppl=6.33, wps=5892.8, ups=0.09, wpb=64797, bsz=128, num_updates=14044, lr=9.98956e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=161351
2021-06-20 15:28:08 | INFO | train_inner | epoch 005: 2120 / 3002 loss=2.556, ppl=5.88, wps=5907.3, ups=0.09, wpb=64829, bsz=128, num_updates=14045, lr=9.98956e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=161362
2021-06-20 15:28:19 | INFO | train_inner | epoch 005: 2121 / 3002 loss=2.603, ppl=6.08, wps=5856, ups=0.09, wpb=64851, bsz=128, num_updates=14046, lr=9.98956e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=161373
2021-06-20 15:28:30 | INFO | train_inner | epoch 005: 2122 / 3002 loss=2.515, ppl=5.72, wps=5913, ups=0.09, wpb=64848, bsz=128, num_updates=14047, lr=9.98956e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=161384
2021-06-20 15:28:41 | INFO | train_inner | epoch 005: 2123 / 3002 loss=2.426, ppl=5.37, wps=5866.9, ups=0.09, wpb=64784, bsz=128, num_updates=14048, lr=9.98956e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=161395
2021-06-20 15:28:52 | INFO | train_inner | epoch 005: 2124 / 3002 loss=2.394, ppl=5.26, wps=5818.7, ups=0.09, wpb=64879, bsz=128, num_updates=14049, lr=9.98956e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=161406
2021-06-20 15:29:03 | INFO | train_inner | epoch 005: 2125 / 3002 loss=2.618, ppl=6.14, wps=5792.5, ups=0.09, wpb=64815, bsz=128, num_updates=14050, lr=9.98956e-05, gnorm=2.062, loss_scale=8, train_wall=11, gb_free=2.8, wall=161418
2021-06-20 15:29:14 | INFO | train_inner | epoch 005: 2126 / 3002 loss=2.482, ppl=5.59, wps=5891.8, ups=0.09, wpb=64820, bsz=128, num_updates=14051, lr=9.98956e-05, gnorm=2.295, loss_scale=8, train_wall=11, gb_free=2.8, wall=161429
2021-06-20 15:29:25 | INFO | train_inner | epoch 005: 2127 / 3002 loss=2.431, ppl=5.39, wps=5889.6, ups=0.09, wpb=64805, bsz=128, num_updates=14052, lr=9.98956e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=161440
2021-06-20 15:29:36 | INFO | train_inner | epoch 005: 2128 / 3002 loss=2.454, ppl=5.48, wps=5782.1, ups=0.09, wpb=64822, bsz=128, num_updates=14053, lr=9.98956e-05, gnorm=1.87, loss_scale=8, train_wall=11, gb_free=2.8, wall=161451
2021-06-20 15:29:47 | INFO | train_inner | epoch 005: 2129 / 3002 loss=2.473, ppl=5.55, wps=5942.2, ups=0.09, wpb=64860, bsz=128, num_updates=14054, lr=9.98956e-05, gnorm=1.941, loss_scale=8, train_wall=10, gb_free=2.8, wall=161462
2021-06-20 15:29:59 | INFO | train_inner | epoch 005: 2130 / 3002 loss=2.318, ppl=4.98, wps=5804.6, ups=0.09, wpb=64863, bsz=128, num_updates=14055, lr=9.98956e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=161473
2021-06-20 15:30:10 | INFO | train_inner | epoch 005: 2131 / 3002 loss=2.529, ppl=5.77, wps=5870.7, ups=0.09, wpb=64793, bsz=128, num_updates=14056, lr=9.98955e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=161484
2021-06-20 15:30:21 | INFO | train_inner | epoch 005: 2132 / 3002 loss=2.607, ppl=6.09, wps=5834.4, ups=0.09, wpb=64830, bsz=128, num_updates=14057, lr=9.98955e-05, gnorm=2.017, loss_scale=8, train_wall=11, gb_free=2.8, wall=161495
2021-06-20 15:30:32 | INFO | train_inner | epoch 005: 2133 / 3002 loss=2.462, ppl=5.51, wps=5888.4, ups=0.09, wpb=64842, bsz=128, num_updates=14058, lr=9.98955e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=161506
2021-06-20 15:30:43 | INFO | train_inner | epoch 005: 2134 / 3002 loss=2.461, ppl=5.51, wps=5847.4, ups=0.09, wpb=64848, bsz=128, num_updates=14059, lr=9.98955e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=161517
2021-06-20 15:30:54 | INFO | train_inner | epoch 005: 2135 / 3002 loss=2.457, ppl=5.49, wps=5824.8, ups=0.09, wpb=64818, bsz=128, num_updates=14060, lr=9.98955e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=161528
2021-06-20 15:31:05 | INFO | train_inner | epoch 005: 2136 / 3002 loss=2.493, ppl=5.63, wps=5734.4, ups=0.09, wpb=64795, bsz=128, num_updates=14061, lr=9.98955e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=161540
2021-06-20 15:31:16 | INFO | train_inner | epoch 005: 2137 / 3002 loss=2.447, ppl=5.45, wps=5952, ups=0.09, wpb=64852, bsz=128, num_updates=14062, lr=9.98955e-05, gnorm=1.926, loss_scale=8, train_wall=10, gb_free=2.8, wall=161550
2021-06-20 15:31:27 | INFO | train_inner | epoch 005: 2138 / 3002 loss=2.44, ppl=5.43, wps=5792.3, ups=0.09, wpb=64860, bsz=128, num_updates=14063, lr=9.98955e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=161562
2021-06-20 15:31:38 | INFO | train_inner | epoch 005: 2139 / 3002 loss=2.477, ppl=5.57, wps=5816, ups=0.09, wpb=64846, bsz=128, num_updates=14064, lr=9.98955e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=161573
2021-06-20 15:31:50 | INFO | train_inner | epoch 005: 2140 / 3002 loss=2.503, ppl=5.67, wps=5840.8, ups=0.09, wpb=64847, bsz=128, num_updates=14065, lr=9.98955e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=161584
2021-06-20 15:32:01 | INFO | train_inner | epoch 005: 2141 / 3002 loss=2.432, ppl=5.4, wps=5882.9, ups=0.09, wpb=64852, bsz=128, num_updates=14066, lr=9.98955e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=161595
2021-06-20 15:32:12 | INFO | train_inner | epoch 005: 2142 / 3002 loss=2.539, ppl=5.81, wps=5790, ups=0.09, wpb=64789, bsz=128, num_updates=14067, lr=9.98955e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=161606
2021-06-20 15:32:23 | INFO | train_inner | epoch 005: 2143 / 3002 loss=2.596, ppl=6.05, wps=5775, ups=0.09, wpb=64858, bsz=128, num_updates=14068, lr=9.98954e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=161617
2021-06-20 15:32:34 | INFO | train_inner | epoch 005: 2144 / 3002 loss=2.546, ppl=5.84, wps=5906.8, ups=0.09, wpb=64748, bsz=128, num_updates=14069, lr=9.98954e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=161628
2021-06-20 15:32:45 | INFO | train_inner | epoch 005: 2145 / 3002 loss=2.599, ppl=6.06, wps=5873.1, ups=0.09, wpb=64855, bsz=128, num_updates=14070, lr=9.98954e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=161639
2021-06-20 15:32:56 | INFO | train_inner | epoch 005: 2146 / 3002 loss=2.45, ppl=5.46, wps=5879.2, ups=0.09, wpb=64755, bsz=128, num_updates=14071, lr=9.98954e-05, gnorm=2.056, loss_scale=8, train_wall=11, gb_free=2.8, wall=161650
2021-06-20 15:33:07 | INFO | train_inner | epoch 005: 2147 / 3002 loss=2.5, ppl=5.66, wps=5905.6, ups=0.09, wpb=64853, bsz=128, num_updates=14072, lr=9.98954e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=161661
2021-06-20 15:33:18 | INFO | train_inner | epoch 005: 2148 / 3002 loss=2.56, ppl=5.9, wps=5915, ups=0.09, wpb=64805, bsz=128, num_updates=14073, lr=9.98954e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=161672
2021-06-20 15:33:29 | INFO | train_inner | epoch 005: 2149 / 3002 loss=2.522, ppl=5.74, wps=5711.3, ups=0.09, wpb=64840, bsz=128, num_updates=14074, lr=9.98954e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=161684
2021-06-20 15:33:40 | INFO | train_inner | epoch 005: 2150 / 3002 loss=2.556, ppl=5.88, wps=5802.7, ups=0.09, wpb=64790, bsz=128, num_updates=14075, lr=9.98954e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=161695
2021-06-20 15:33:51 | INFO | train_inner | epoch 005: 2151 / 3002 loss=2.489, ppl=5.61, wps=5921.7, ups=0.09, wpb=64813, bsz=128, num_updates=14076, lr=9.98954e-05, gnorm=1.997, loss_scale=8, train_wall=10, gb_free=2.8, wall=161706
2021-06-20 15:34:03 | INFO | train_inner | epoch 005: 2152 / 3002 loss=2.483, ppl=5.59, wps=5790.5, ups=0.09, wpb=64854, bsz=128, num_updates=14077, lr=9.98954e-05, gnorm=2.106, loss_scale=8, train_wall=11, gb_free=2.8, wall=161717
2021-06-20 15:34:14 | INFO | train_inner | epoch 005: 2153 / 3002 loss=2.421, ppl=5.36, wps=5754.3, ups=0.09, wpb=64940, bsz=128, num_updates=14078, lr=9.98954e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=161728
2021-06-20 15:34:25 | INFO | train_inner | epoch 005: 2154 / 3002 loss=2.527, ppl=5.76, wps=5894, ups=0.09, wpb=64873, bsz=128, num_updates=14079, lr=9.98954e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=161739
2021-06-20 15:34:36 | INFO | train_inner | epoch 005: 2155 / 3002 loss=2.53, ppl=5.77, wps=5771.9, ups=0.09, wpb=64857, bsz=128, num_updates=14080, lr=9.98954e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=161751
2021-06-20 15:34:47 | INFO | train_inner | epoch 005: 2156 / 3002 loss=2.735, ppl=6.66, wps=5861.4, ups=0.09, wpb=64711, bsz=128, num_updates=14081, lr=9.98953e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=161762
2021-06-20 15:34:58 | INFO | train_inner | epoch 005: 2157 / 3002 loss=2.567, ppl=5.92, wps=5875.1, ups=0.09, wpb=64836, bsz=128, num_updates=14082, lr=9.98953e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=161773
2021-06-20 15:35:09 | INFO | train_inner | epoch 005: 2158 / 3002 loss=2.491, ppl=5.62, wps=5944.1, ups=0.09, wpb=64791, bsz=128, num_updates=14083, lr=9.98953e-05, gnorm=1.965, loss_scale=8, train_wall=10, gb_free=2.8, wall=161783
2021-06-20 15:35:20 | INFO | train_inner | epoch 005: 2159 / 3002 loss=2.618, ppl=6.14, wps=5863.2, ups=0.09, wpb=64819, bsz=128, num_updates=14084, lr=9.98953e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=161795
2021-06-20 15:35:31 | INFO | train_inner | epoch 005: 2160 / 3002 loss=2.686, ppl=6.43, wps=5778.5, ups=0.09, wpb=64854, bsz=128, num_updates=14085, lr=9.98953e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=161806
2021-06-20 15:35:42 | INFO | train_inner | epoch 005: 2161 / 3002 loss=2.388, ppl=5.24, wps=5919.3, ups=0.09, wpb=64910, bsz=128, num_updates=14086, lr=9.98953e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=161817
2021-06-20 15:35:53 | INFO | train_inner | epoch 005: 2162 / 3002 loss=2.703, ppl=6.51, wps=5858.3, ups=0.09, wpb=64810, bsz=128, num_updates=14087, lr=9.98953e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=161828
2021-06-20 15:36:05 | INFO | train_inner | epoch 005: 2163 / 3002 loss=2.569, ppl=5.93, wps=5847.8, ups=0.09, wpb=64854, bsz=128, num_updates=14088, lr=9.98953e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=161839
2021-06-20 15:36:16 | INFO | train_inner | epoch 005: 2164 / 3002 loss=2.599, ppl=6.06, wps=5777.4, ups=0.09, wpb=64829, bsz=128, num_updates=14089, lr=9.98953e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=161850
2021-06-20 15:36:27 | INFO | train_inner | epoch 005: 2165 / 3002 loss=2.529, ppl=5.77, wps=5876.6, ups=0.09, wpb=64751, bsz=128, num_updates=14090, lr=9.98953e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=161861
2021-06-20 15:36:38 | INFO | train_inner | epoch 005: 2166 / 3002 loss=2.662, ppl=6.33, wps=5890.2, ups=0.09, wpb=64754, bsz=128, num_updates=14091, lr=9.98953e-05, gnorm=2.1, loss_scale=8, train_wall=11, gb_free=2.8, wall=161872
2021-06-20 15:36:49 | INFO | train_inner | epoch 005: 2167 / 3002 loss=2.446, ppl=5.45, wps=5783.2, ups=0.09, wpb=64803, bsz=128, num_updates=14092, lr=9.98953e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=161883
2021-06-20 15:37:00 | INFO | train_inner | epoch 005: 2168 / 3002 loss=2.559, ppl=5.89, wps=5656, ups=0.09, wpb=64810, bsz=128, num_updates=14093, lr=9.98952e-05, gnorm=2.211, loss_scale=8, train_wall=11, gb_free=2.8, wall=161895
2021-06-20 15:37:12 | INFO | train_inner | epoch 005: 2169 / 3002 loss=2.5, ppl=5.66, wps=5810.2, ups=0.09, wpb=64792, bsz=128, num_updates=14094, lr=9.98952e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=161906
2021-06-20 15:37:23 | INFO | train_inner | epoch 005: 2170 / 3002 loss=2.587, ppl=6.01, wps=5813.2, ups=0.09, wpb=64816, bsz=128, num_updates=14095, lr=9.98952e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=161917
2021-06-20 15:37:34 | INFO | train_inner | epoch 005: 2171 / 3002 loss=2.51, ppl=5.7, wps=5883.5, ups=0.09, wpb=64804, bsz=128, num_updates=14096, lr=9.98952e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=161928
2021-06-20 15:37:45 | INFO | train_inner | epoch 005: 2172 / 3002 loss=2.416, ppl=5.34, wps=5817, ups=0.09, wpb=64842, bsz=128, num_updates=14097, lr=9.98952e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=161939
2021-06-20 15:37:56 | INFO | train_inner | epoch 005: 2173 / 3002 loss=2.55, ppl=5.86, wps=5889.6, ups=0.09, wpb=64810, bsz=128, num_updates=14098, lr=9.98952e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=161950
2021-06-20 15:38:07 | INFO | train_inner | epoch 005: 2174 / 3002 loss=2.387, ppl=5.23, wps=5820.6, ups=0.09, wpb=64870, bsz=128, num_updates=14099, lr=9.98952e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=161961
2021-06-20 15:38:18 | INFO | train_inner | epoch 005: 2175 / 3002 loss=2.377, ppl=5.19, wps=5768.8, ups=0.09, wpb=64906, bsz=128, num_updates=14100, lr=9.98952e-05, gnorm=2.076, loss_scale=8, train_wall=11, gb_free=2.8, wall=161973
2021-06-20 15:38:29 | INFO | train_inner | epoch 005: 2176 / 3002 loss=2.487, ppl=5.6, wps=5906.3, ups=0.09, wpb=64890, bsz=128, num_updates=14101, lr=9.98952e-05, gnorm=2.009, loss_scale=8, train_wall=10, gb_free=2.8, wall=161984
2021-06-20 15:38:40 | INFO | train_inner | epoch 005: 2177 / 3002 loss=2.624, ppl=6.16, wps=5787.3, ups=0.09, wpb=64811, bsz=128, num_updates=14102, lr=9.98952e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=161995
2021-06-20 15:38:51 | INFO | train_inner | epoch 005: 2178 / 3002 loss=2.518, ppl=5.73, wps=5928.7, ups=0.09, wpb=64863, bsz=128, num_updates=14103, lr=9.98952e-05, gnorm=2.119, loss_scale=8, train_wall=10, gb_free=2.8, wall=162006
2021-06-20 15:39:03 | INFO | train_inner | epoch 005: 2179 / 3002 loss=2.488, ppl=5.61, wps=5856.4, ups=0.09, wpb=64854, bsz=128, num_updates=14104, lr=9.98952e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=162017
2021-06-20 15:39:14 | INFO | train_inner | epoch 005: 2180 / 3002 loss=2.427, ppl=5.38, wps=5838.1, ups=0.09, wpb=64862, bsz=128, num_updates=14105, lr=9.98952e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=162028
2021-06-20 15:39:25 | INFO | train_inner | epoch 005: 2181 / 3002 loss=2.335, ppl=5.05, wps=5949.7, ups=0.09, wpb=64884, bsz=128, num_updates=14106, lr=9.98951e-05, gnorm=2.004, loss_scale=8, train_wall=10, gb_free=2.8, wall=162039
2021-06-20 15:39:36 | INFO | train_inner | epoch 005: 2182 / 3002 loss=2.529, ppl=5.77, wps=5791.4, ups=0.09, wpb=64869, bsz=128, num_updates=14107, lr=9.98951e-05, gnorm=2.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=162050
2021-06-20 15:39:47 | INFO | train_inner | epoch 005: 2183 / 3002 loss=2.555, ppl=5.88, wps=5782.9, ups=0.09, wpb=64846, bsz=128, num_updates=14108, lr=9.98951e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=162061
2021-06-20 15:39:58 | INFO | train_inner | epoch 005: 2184 / 3002 loss=2.571, ppl=5.94, wps=5927.2, ups=0.09, wpb=64799, bsz=128, num_updates=14109, lr=9.98951e-05, gnorm=2.03, loss_scale=8, train_wall=10, gb_free=2.8, wall=162072
2021-06-20 15:40:09 | INFO | train_inner | epoch 005: 2185 / 3002 loss=2.519, ppl=5.73, wps=5834.7, ups=0.09, wpb=64866, bsz=128, num_updates=14110, lr=9.98951e-05, gnorm=2.012, loss_scale=8, train_wall=11, gb_free=2.8, wall=162083
2021-06-20 15:40:20 | INFO | train_inner | epoch 005: 2186 / 3002 loss=2.561, ppl=5.9, wps=5868.4, ups=0.09, wpb=64807, bsz=128, num_updates=14111, lr=9.98951e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=162094
2021-06-20 15:40:31 | INFO | train_inner | epoch 005: 2187 / 3002 loss=2.648, ppl=6.27, wps=5886.4, ups=0.09, wpb=64821, bsz=128, num_updates=14112, lr=9.98951e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=162105
2021-06-20 15:40:42 | INFO | train_inner | epoch 005: 2188 / 3002 loss=2.584, ppl=6, wps=5828.9, ups=0.09, wpb=64848, bsz=128, num_updates=14113, lr=9.98951e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=162117
2021-06-20 15:40:53 | INFO | train_inner | epoch 005: 2189 / 3002 loss=2.423, ppl=5.36, wps=5840.5, ups=0.09, wpb=64852, bsz=128, num_updates=14114, lr=9.98951e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=162128
2021-06-20 15:41:04 | INFO | train_inner | epoch 005: 2190 / 3002 loss=2.453, ppl=5.47, wps=5867.9, ups=0.09, wpb=64871, bsz=128, num_updates=14115, lr=9.98951e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=162139
2021-06-20 15:41:15 | INFO | train_inner | epoch 005: 2191 / 3002 loss=2.594, ppl=6.04, wps=5881.5, ups=0.09, wpb=64934, bsz=128, num_updates=14116, lr=9.98951e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=162150
2021-06-20 15:41:27 | INFO | train_inner | epoch 005: 2192 / 3002 loss=2.59, ppl=6.02, wps=5816.1, ups=0.09, wpb=64821, bsz=128, num_updates=14117, lr=9.98951e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=162161
2021-06-20 15:41:38 | INFO | train_inner | epoch 005: 2193 / 3002 loss=2.367, ppl=5.16, wps=5877, ups=0.09, wpb=64818, bsz=128, num_updates=14118, lr=9.9895e-05, gnorm=1.832, loss_scale=8, train_wall=11, gb_free=2.8, wall=162172
2021-06-20 15:41:49 | INFO | train_inner | epoch 005: 2194 / 3002 loss=2.42, ppl=5.35, wps=5814.9, ups=0.09, wpb=64795, bsz=128, num_updates=14119, lr=9.9895e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=162183
2021-06-20 15:42:00 | INFO | train_inner | epoch 005: 2195 / 3002 loss=2.34, ppl=5.06, wps=5766.5, ups=0.09, wpb=64820, bsz=128, num_updates=14120, lr=9.9895e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=162194
2021-06-20 15:42:11 | INFO | train_inner | epoch 005: 2196 / 3002 loss=2.355, ppl=5.12, wps=5920.8, ups=0.09, wpb=64805, bsz=128, num_updates=14121, lr=9.9895e-05, gnorm=1.929, loss_scale=8, train_wall=10, gb_free=2.8, wall=162205
2021-06-20 15:42:22 | INFO | train_inner | epoch 005: 2197 / 3002 loss=2.354, ppl=5.11, wps=5872.2, ups=0.09, wpb=64859, bsz=128, num_updates=14122, lr=9.9895e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=162216
2021-06-20 15:42:33 | INFO | train_inner | epoch 005: 2198 / 3002 loss=2.529, ppl=5.77, wps=5811.4, ups=0.09, wpb=64756, bsz=128, num_updates=14123, lr=9.9895e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=162227
2021-06-20 15:42:44 | INFO | train_inner | epoch 005: 2199 / 3002 loss=2.641, ppl=6.24, wps=5866.2, ups=0.09, wpb=64853, bsz=128, num_updates=14124, lr=9.9895e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=162238
2021-06-20 15:42:55 | INFO | train_inner | epoch 005: 2200 / 3002 loss=2.634, ppl=6.21, wps=5794.4, ups=0.09, wpb=64839, bsz=128, num_updates=14125, lr=9.9895e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=162250
2021-06-20 15:43:06 | INFO | train_inner | epoch 005: 2201 / 3002 loss=2.478, ppl=5.57, wps=5912.4, ups=0.09, wpb=64811, bsz=128, num_updates=14126, lr=9.9895e-05, gnorm=1.926, loss_scale=8, train_wall=10, gb_free=2.8, wall=162261
2021-06-20 15:43:17 | INFO | train_inner | epoch 005: 2202 / 3002 loss=2.562, ppl=5.91, wps=5802, ups=0.09, wpb=64776, bsz=128, num_updates=14127, lr=9.9895e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=162272
2021-06-20 15:43:29 | INFO | train_inner | epoch 005: 2203 / 3002 loss=2.492, ppl=5.63, wps=5769.9, ups=0.09, wpb=64772, bsz=128, num_updates=14128, lr=9.9895e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=162283
2021-06-20 15:43:40 | INFO | train_inner | epoch 005: 2204 / 3002 loss=2.689, ppl=6.45, wps=5864.2, ups=0.09, wpb=64733, bsz=128, num_updates=14129, lr=9.9895e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=162294
2021-06-20 15:43:51 | INFO | train_inner | epoch 005: 2205 / 3002 loss=2.487, ppl=5.61, wps=5850.5, ups=0.09, wpb=64848, bsz=128, num_updates=14130, lr=9.9895e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=162305
2021-06-20 15:44:02 | INFO | train_inner | epoch 005: 2206 / 3002 loss=2.52, ppl=5.73, wps=5803.3, ups=0.09, wpb=64776, bsz=128, num_updates=14131, lr=9.98949e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=162316
2021-06-20 15:44:13 | INFO | train_inner | epoch 005: 2207 / 3002 loss=2.645, ppl=6.26, wps=5811, ups=0.09, wpb=64740, bsz=128, num_updates=14132, lr=9.98949e-05, gnorm=2.103, loss_scale=8, train_wall=11, gb_free=2.8, wall=162327
2021-06-20 15:44:24 | INFO | train_inner | epoch 005: 2208 / 3002 loss=2.517, ppl=5.72, wps=6067, ups=0.09, wpb=64839, bsz=128, num_updates=14133, lr=9.98949e-05, gnorm=2.022, loss_scale=8, train_wall=10, gb_free=2.8, wall=162338
2021-06-20 15:44:35 | INFO | train_inner | epoch 005: 2209 / 3002 loss=2.437, ppl=5.42, wps=5789.7, ups=0.09, wpb=64844, bsz=128, num_updates=14134, lr=9.98949e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=162349
2021-06-20 15:44:46 | INFO | train_inner | epoch 005: 2210 / 3002 loss=2.592, ppl=6.03, wps=5770.8, ups=0.09, wpb=64810, bsz=128, num_updates=14135, lr=9.98949e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=162361
2021-06-20 15:44:57 | INFO | train_inner | epoch 005: 2211 / 3002 loss=2.552, ppl=5.86, wps=5815.6, ups=0.09, wpb=64717, bsz=128, num_updates=14136, lr=9.98949e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=162372
2021-06-20 15:45:09 | INFO | train_inner | epoch 005: 2212 / 3002 loss=2.586, ppl=6.01, wps=5687.8, ups=0.09, wpb=64829, bsz=128, num_updates=14137, lr=9.98949e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=162383
2021-06-20 15:45:20 | INFO | train_inner | epoch 005: 2213 / 3002 loss=2.589, ppl=6.02, wps=5961.8, ups=0.09, wpb=64793, bsz=128, num_updates=14138, lr=9.98949e-05, gnorm=1.9, loss_scale=8, train_wall=10, gb_free=2.8, wall=162394
2021-06-20 15:45:30 | INFO | train_inner | epoch 005: 2214 / 3002 loss=2.431, ppl=5.39, wps=5962.6, ups=0.09, wpb=64832, bsz=128, num_updates=14139, lr=9.98949e-05, gnorm=1.877, loss_scale=8, train_wall=10, gb_free=2.8, wall=162405
2021-06-20 15:45:42 | INFO | train_inner | epoch 005: 2215 / 3002 loss=2.56, ppl=5.9, wps=5866.5, ups=0.09, wpb=64852, bsz=128, num_updates=14140, lr=9.98949e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=162416
2021-06-20 15:45:53 | INFO | train_inner | epoch 005: 2216 / 3002 loss=2.525, ppl=5.75, wps=5872.4, ups=0.09, wpb=64751, bsz=128, num_updates=14141, lr=9.98949e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=162427
2021-06-20 15:46:04 | INFO | train_inner | epoch 005: 2217 / 3002 loss=2.521, ppl=5.74, wps=5750, ups=0.09, wpb=64883, bsz=128, num_updates=14142, lr=9.98949e-05, gnorm=1.863, loss_scale=8, train_wall=11, gb_free=2.8, wall=162438
2021-06-20 15:46:15 | INFO | train_inner | epoch 005: 2218 / 3002 loss=2.459, ppl=5.5, wps=5796.8, ups=0.09, wpb=64904, bsz=128, num_updates=14143, lr=9.98948e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=162449
2021-06-20 15:46:26 | INFO | train_inner | epoch 005: 2219 / 3002 loss=2.545, ppl=5.84, wps=5749.4, ups=0.09, wpb=64694, bsz=128, num_updates=14144, lr=9.98948e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=162461
2021-06-20 15:46:37 | INFO | train_inner | epoch 005: 2220 / 3002 loss=2.627, ppl=6.18, wps=5890.6, ups=0.09, wpb=64823, bsz=128, num_updates=14145, lr=9.98948e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=162472
2021-06-20 15:46:48 | INFO | train_inner | epoch 005: 2221 / 3002 loss=2.542, ppl=5.82, wps=5871.7, ups=0.09, wpb=64896, bsz=128, num_updates=14146, lr=9.98948e-05, gnorm=2.108, loss_scale=8, train_wall=11, gb_free=2.8, wall=162483
2021-06-20 15:47:00 | INFO | train_inner | epoch 005: 2222 / 3002 loss=2.597, ppl=6.05, wps=5751.6, ups=0.09, wpb=64780, bsz=128, num_updates=14147, lr=9.98948e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=162494
2021-06-20 15:47:11 | INFO | train_inner | epoch 005: 2223 / 3002 loss=2.362, ppl=5.14, wps=5873.5, ups=0.09, wpb=64872, bsz=128, num_updates=14148, lr=9.98948e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=162505
2021-06-20 15:47:22 | INFO | train_inner | epoch 005: 2224 / 3002 loss=2.505, ppl=5.67, wps=5768.1, ups=0.09, wpb=64806, bsz=128, num_updates=14149, lr=9.98948e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=162516
2021-06-20 15:47:33 | INFO | train_inner | epoch 005: 2225 / 3002 loss=2.509, ppl=5.69, wps=5897, ups=0.09, wpb=64839, bsz=128, num_updates=14150, lr=9.98948e-05, gnorm=1.864, loss_scale=16, train_wall=11, gb_free=2.8, wall=162527
2021-06-20 15:47:44 | INFO | train_inner | epoch 005: 2226 / 3002 loss=2.575, ppl=5.96, wps=5880.8, ups=0.09, wpb=64877, bsz=128, num_updates=14151, lr=9.98948e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=162538
2021-06-20 15:47:55 | INFO | train_inner | epoch 005: 2227 / 3002 loss=2.486, ppl=5.6, wps=5794.8, ups=0.09, wpb=64845, bsz=128, num_updates=14152, lr=9.98948e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=162549
2021-06-20 15:48:06 | INFO | train_inner | epoch 005: 2228 / 3002 loss=2.683, ppl=6.42, wps=5838.7, ups=0.09, wpb=64757, bsz=128, num_updates=14153, lr=9.98948e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=162561
2021-06-20 15:48:17 | INFO | train_inner | epoch 005: 2229 / 3002 loss=2.634, ppl=6.21, wps=5854.9, ups=0.09, wpb=64773, bsz=128, num_updates=14154, lr=9.98948e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=162572
2021-06-20 15:48:28 | INFO | train_inner | epoch 005: 2230 / 3002 loss=2.371, ppl=5.17, wps=5828.8, ups=0.09, wpb=64783, bsz=128, num_updates=14155, lr=9.98948e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=162583
2021-06-20 15:48:39 | INFO | train_inner | epoch 005: 2231 / 3002 loss=2.465, ppl=5.52, wps=5863.6, ups=0.09, wpb=64818, bsz=128, num_updates=14156, lr=9.98947e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=162594
2021-06-20 15:48:50 | INFO | train_inner | epoch 005: 2232 / 3002 loss=2.456, ppl=5.49, wps=5872.2, ups=0.09, wpb=64826, bsz=128, num_updates=14157, lr=9.98947e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=162605
2021-06-20 15:49:01 | INFO | train_inner | epoch 005: 2233 / 3002 loss=2.388, ppl=5.24, wps=5926.1, ups=0.09, wpb=64907, bsz=128, num_updates=14158, lr=9.98947e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=162616
2021-06-20 15:49:13 | INFO | train_inner | epoch 005: 2234 / 3002 loss=2.508, ppl=5.69, wps=5734, ups=0.09, wpb=64757, bsz=128, num_updates=14159, lr=9.98947e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=162627
2021-06-20 15:49:24 | INFO | train_inner | epoch 005: 2235 / 3002 loss=2.597, ppl=6.05, wps=5779.1, ups=0.09, wpb=64844, bsz=128, num_updates=14160, lr=9.98947e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=162638
2021-06-20 15:49:35 | INFO | train_inner | epoch 005: 2236 / 3002 loss=2.444, ppl=5.44, wps=5745.9, ups=0.09, wpb=64895, bsz=128, num_updates=14161, lr=9.98947e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=162650
2021-06-20 15:49:46 | INFO | train_inner | epoch 005: 2237 / 3002 loss=2.533, ppl=5.79, wps=5977.6, ups=0.09, wpb=64889, bsz=128, num_updates=14162, lr=9.98947e-05, gnorm=1.961, loss_scale=16, train_wall=10, gb_free=2.8, wall=162660
2021-06-20 15:49:57 | INFO | train_inner | epoch 005: 2238 / 3002 loss=2.465, ppl=5.52, wps=5833.1, ups=0.09, wpb=64758, bsz=128, num_updates=14163, lr=9.98947e-05, gnorm=2.073, loss_scale=16, train_wall=11, gb_free=2.8, wall=162672
2021-06-20 15:50:08 | INFO | train_inner | epoch 005: 2239 / 3002 loss=2.449, ppl=5.46, wps=5870.1, ups=0.09, wpb=64776, bsz=128, num_updates=14164, lr=9.98947e-05, gnorm=1.956, loss_scale=16, train_wall=11, gb_free=2.8, wall=162683
2021-06-20 15:50:19 | INFO | train_inner | epoch 005: 2240 / 3002 loss=2.41, ppl=5.32, wps=5932.6, ups=0.09, wpb=64905, bsz=128, num_updates=14165, lr=9.98947e-05, gnorm=2.166, loss_scale=16, train_wall=10, gb_free=2.8, wall=162694
2021-06-20 15:50:30 | INFO | train_inner | epoch 005: 2241 / 3002 loss=2.483, ppl=5.59, wps=5844.1, ups=0.09, wpb=64808, bsz=128, num_updates=14166, lr=9.98947e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=162705
2021-06-20 15:50:41 | INFO | train_inner | epoch 005: 2242 / 3002 loss=2.567, ppl=5.93, wps=5873.9, ups=0.09, wpb=64842, bsz=128, num_updates=14167, lr=9.98947e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=162716
2021-06-20 15:50:52 | INFO | train_inner | epoch 005: 2243 / 3002 loss=2.427, ppl=5.38, wps=6047.3, ups=0.09, wpb=64873, bsz=128, num_updates=14168, lr=9.98946e-05, gnorm=1.898, loss_scale=16, train_wall=10, gb_free=2.8, wall=162726
2021-06-20 15:51:03 | INFO | train_inner | epoch 005: 2244 / 3002 loss=2.641, ppl=6.24, wps=5940.2, ups=0.09, wpb=64903, bsz=128, num_updates=14169, lr=9.98946e-05, gnorm=1.876, loss_scale=16, train_wall=10, gb_free=2.8, wall=162737
2021-06-20 15:51:14 | INFO | train_inner | epoch 005: 2245 / 3002 loss=2.496, ppl=5.64, wps=5876, ups=0.09, wpb=64880, bsz=128, num_updates=14170, lr=9.98946e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=162748
2021-06-20 15:51:25 | INFO | train_inner | epoch 005: 2246 / 3002 loss=2.513, ppl=5.71, wps=5868.2, ups=0.09, wpb=64841, bsz=128, num_updates=14171, lr=9.98946e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=162759
2021-06-20 15:51:36 | INFO | train_inner | epoch 005: 2247 / 3002 loss=2.551, ppl=5.86, wps=5835.9, ups=0.09, wpb=64794, bsz=128, num_updates=14172, lr=9.98946e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=162770
2021-06-20 15:51:47 | INFO | train_inner | epoch 005: 2248 / 3002 loss=2.728, ppl=6.62, wps=5939.6, ups=0.09, wpb=64801, bsz=128, num_updates=14173, lr=9.98946e-05, gnorm=2.031, loss_scale=16, train_wall=10, gb_free=2.8, wall=162781
2021-06-20 15:51:58 | INFO | train_inner | epoch 005: 2249 / 3002 loss=2.415, ppl=5.33, wps=5887.5, ups=0.09, wpb=64955, bsz=128, num_updates=14174, lr=9.98946e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=162792
2021-06-20 15:52:09 | INFO | train_inner | epoch 005: 2250 / 3002 loss=2.505, ppl=5.67, wps=5867.4, ups=0.09, wpb=64774, bsz=128, num_updates=14175, lr=9.98946e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=162803
2021-06-20 15:52:20 | INFO | train_inner | epoch 005: 2251 / 3002 loss=2.507, ppl=5.68, wps=5862.3, ups=0.09, wpb=64899, bsz=128, num_updates=14176, lr=9.98946e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=162815
2021-06-20 15:52:31 | INFO | train_inner | epoch 005: 2252 / 3002 loss=2.561, ppl=5.9, wps=5842.2, ups=0.09, wpb=64786, bsz=128, num_updates=14177, lr=9.98946e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=162826
2021-06-20 15:52:42 | INFO | train_inner | epoch 005: 2253 / 3002 loss=2.412, ppl=5.32, wps=5859.5, ups=0.09, wpb=64948, bsz=128, num_updates=14178, lr=9.98946e-05, gnorm=1.898, loss_scale=16, train_wall=11, gb_free=2.8, wall=162837
2021-06-20 15:52:53 | INFO | train_inner | epoch 005: 2254 / 3002 loss=2.649, ppl=6.27, wps=5847.4, ups=0.09, wpb=64798, bsz=128, num_updates=14179, lr=9.98946e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=162848
2021-06-20 15:53:05 | INFO | train_inner | epoch 005: 2255 / 3002 loss=2.47, ppl=5.54, wps=5777, ups=0.09, wpb=64798, bsz=128, num_updates=14180, lr=9.98946e-05, gnorm=2.023, loss_scale=16, train_wall=11, gb_free=2.8, wall=162859
2021-06-20 15:53:16 | INFO | train_inner | epoch 005: 2256 / 3002 loss=2.587, ppl=6.01, wps=5846.3, ups=0.09, wpb=64753, bsz=128, num_updates=14181, lr=9.98945e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=162870
2021-06-20 15:53:27 | INFO | train_inner | epoch 005: 2257 / 3002 loss=2.538, ppl=5.81, wps=5756.4, ups=0.09, wpb=64775, bsz=128, num_updates=14182, lr=9.98945e-05, gnorm=2.001, loss_scale=16, train_wall=11, gb_free=2.8, wall=162881
2021-06-20 15:53:38 | INFO | train_inner | epoch 005: 2258 / 3002 loss=2.592, ppl=6.03, wps=5905.7, ups=0.09, wpb=64847, bsz=128, num_updates=14183, lr=9.98945e-05, gnorm=2.047, loss_scale=16, train_wall=11, gb_free=2.8, wall=162892
2021-06-20 15:53:49 | INFO | train_inner | epoch 005: 2259 / 3002 loss=2.676, ppl=6.39, wps=5820.4, ups=0.09, wpb=64792, bsz=128, num_updates=14184, lr=9.98945e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=162903
2021-06-20 15:54:00 | INFO | train_inner | epoch 005: 2260 / 3002 loss=2.416, ppl=5.34, wps=5960.5, ups=0.09, wpb=64811, bsz=128, num_updates=14185, lr=9.98945e-05, gnorm=1.932, loss_scale=16, train_wall=10, gb_free=2.8, wall=162914
2021-06-20 15:54:11 | INFO | train_inner | epoch 005: 2261 / 3002 loss=2.462, ppl=5.51, wps=5827.9, ups=0.09, wpb=64820, bsz=128, num_updates=14186, lr=9.98945e-05, gnorm=2.045, loss_scale=16, train_wall=11, gb_free=2.8, wall=162925
2021-06-20 15:54:22 | INFO | train_inner | epoch 005: 2262 / 3002 loss=2.427, ppl=5.38, wps=5901, ups=0.09, wpb=64824, bsz=128, num_updates=14187, lr=9.98945e-05, gnorm=1.838, loss_scale=16, train_wall=11, gb_free=2.8, wall=162936
2021-06-20 15:54:33 | INFO | train_inner | epoch 005: 2263 / 3002 loss=2.501, ppl=5.66, wps=5939.4, ups=0.09, wpb=64773, bsz=128, num_updates=14188, lr=9.98945e-05, gnorm=1.848, loss_scale=16, train_wall=10, gb_free=2.8, wall=162947
2021-06-20 15:54:44 | INFO | train_inner | epoch 005: 2264 / 3002 loss=2.588, ppl=6.01, wps=5846.9, ups=0.09, wpb=64838, bsz=128, num_updates=14189, lr=9.98945e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=162958
2021-06-20 15:54:55 | INFO | train_inner | epoch 005: 2265 / 3002 loss=2.539, ppl=5.81, wps=5798, ups=0.09, wpb=64833, bsz=128, num_updates=14190, lr=9.98945e-05, gnorm=2.429, loss_scale=16, train_wall=11, gb_free=2.8, wall=162970
2021-06-20 15:55:06 | INFO | train_inner | epoch 005: 2266 / 3002 loss=2.535, ppl=5.8, wps=5843.9, ups=0.09, wpb=64699, bsz=128, num_updates=14191, lr=9.98945e-05, gnorm=2.064, loss_scale=16, train_wall=11, gb_free=2.8, wall=162981
2021-06-20 15:55:18 | INFO | train_inner | epoch 005: 2267 / 3002 loss=2.541, ppl=5.82, wps=5802.9, ups=0.09, wpb=64800, bsz=128, num_updates=14192, lr=9.98945e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=162992
2021-06-20 15:55:28 | INFO | train_inner | epoch 005: 2268 / 3002 loss=2.68, ppl=6.41, wps=5927.8, ups=0.09, wpb=64849, bsz=128, num_updates=14193, lr=9.98944e-05, gnorm=2.203, loss_scale=16, train_wall=10, gb_free=2.8, wall=163003
2021-06-20 15:55:39 | INFO | train_inner | epoch 005: 2269 / 3002 loss=2.54, ppl=5.81, wps=6010.4, ups=0.09, wpb=64841, bsz=128, num_updates=14194, lr=9.98944e-05, gnorm=2.725, loss_scale=16, train_wall=10, gb_free=2.8, wall=163014
2021-06-20 15:55:50 | INFO | train_inner | epoch 005: 2270 / 3002 loss=2.573, ppl=5.95, wps=5910.1, ups=0.09, wpb=64858, bsz=128, num_updates=14195, lr=9.98944e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=163025
2021-06-20 15:56:01 | INFO | train_inner | epoch 005: 2271 / 3002 loss=2.613, ppl=6.12, wps=5875.8, ups=0.09, wpb=64917, bsz=128, num_updates=14196, lr=9.98944e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=163036
2021-06-20 15:56:12 | INFO | train_inner | epoch 005: 2272 / 3002 loss=2.558, ppl=5.89, wps=5822.5, ups=0.09, wpb=64883, bsz=128, num_updates=14197, lr=9.98944e-05, gnorm=2.122, loss_scale=16, train_wall=11, gb_free=2.8, wall=163047
2021-06-20 15:56:24 | INFO | train_inner | epoch 005: 2273 / 3002 loss=2.751, ppl=6.73, wps=5675.4, ups=0.09, wpb=64751, bsz=128, num_updates=14198, lr=9.98944e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=163058
2021-06-20 15:56:35 | INFO | train_inner | epoch 005: 2274 / 3002 loss=2.479, ppl=5.58, wps=5835.1, ups=0.09, wpb=64819, bsz=128, num_updates=14199, lr=9.98944e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=163069
2021-06-20 15:56:46 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 15:56:57 | INFO | train_inner | epoch 005: 2276 / 3002 loss=2.603, ppl=6.07, wps=2889.1, ups=0.04, wpb=64810, bsz=128, num_updates=14200, lr=9.98944e-05, gnorm=1.887, loss_scale=8, train_wall=22, gb_free=2.8, wall=163092
2021-06-20 15:57:08 | INFO | train_inner | epoch 005: 2277 / 3002 loss=2.447, ppl=5.45, wps=5898.8, ups=0.09, wpb=64840, bsz=128, num_updates=14201, lr=9.98944e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=163103
2021-06-20 15:57:20 | INFO | train_inner | epoch 005: 2278 / 3002 loss=2.439, ppl=5.42, wps=5788.2, ups=0.09, wpb=64878, bsz=128, num_updates=14202, lr=9.98944e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=163114
2021-06-20 15:57:31 | INFO | train_inner | epoch 005: 2279 / 3002 loss=2.564, ppl=5.91, wps=5836.6, ups=0.09, wpb=64800, bsz=128, num_updates=14203, lr=9.98944e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=163125
2021-06-20 15:57:42 | INFO | train_inner | epoch 005: 2280 / 3002 loss=2.588, ppl=6.01, wps=5880.3, ups=0.09, wpb=64858, bsz=128, num_updates=14204, lr=9.98944e-05, gnorm=2.125, loss_scale=8, train_wall=11, gb_free=2.8, wall=163136
2021-06-20 15:57:53 | INFO | train_inner | epoch 005: 2281 / 3002 loss=2.527, ppl=5.76, wps=5762, ups=0.09, wpb=64818, bsz=128, num_updates=14205, lr=9.98944e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=163147
2021-06-20 15:58:04 | INFO | train_inner | epoch 005: 2282 / 3002 loss=2.513, ppl=5.71, wps=5824, ups=0.09, wpb=64853, bsz=128, num_updates=14206, lr=9.98943e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=163158
2021-06-20 15:58:15 | INFO | train_inner | epoch 005: 2283 / 3002 loss=2.536, ppl=5.8, wps=5821.5, ups=0.09, wpb=64810, bsz=128, num_updates=14207, lr=9.98943e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=163170
2021-06-20 15:58:26 | INFO | train_inner | epoch 005: 2284 / 3002 loss=2.509, ppl=5.69, wps=5937, ups=0.09, wpb=64804, bsz=128, num_updates=14208, lr=9.98943e-05, gnorm=1.873, loss_scale=8, train_wall=10, gb_free=2.8, wall=163180
2021-06-20 15:58:37 | INFO | train_inner | epoch 005: 2285 / 3002 loss=2.589, ppl=6.02, wps=5841.7, ups=0.09, wpb=64799, bsz=128, num_updates=14209, lr=9.98943e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=163192
2021-06-20 15:58:48 | INFO | train_inner | epoch 005: 2286 / 3002 loss=2.526, ppl=5.76, wps=5827.6, ups=0.09, wpb=64789, bsz=128, num_updates=14210, lr=9.98943e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=163203
2021-06-20 15:58:59 | INFO | train_inner | epoch 005: 2287 / 3002 loss=2.424, ppl=5.37, wps=5874.2, ups=0.09, wpb=64875, bsz=128, num_updates=14211, lr=9.98943e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=163214
2021-06-20 15:59:11 | INFO | train_inner | epoch 005: 2288 / 3002 loss=2.509, ppl=5.69, wps=5673.5, ups=0.09, wpb=64827, bsz=128, num_updates=14212, lr=9.98943e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=163225
2021-06-20 15:59:22 | INFO | train_inner | epoch 005: 2289 / 3002 loss=2.483, ppl=5.59, wps=5863.9, ups=0.09, wpb=64827, bsz=128, num_updates=14213, lr=9.98943e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=163236
2021-06-20 15:59:33 | INFO | train_inner | epoch 005: 2290 / 3002 loss=2.568, ppl=5.93, wps=5951.1, ups=0.09, wpb=64839, bsz=128, num_updates=14214, lr=9.98943e-05, gnorm=1.993, loss_scale=8, train_wall=10, gb_free=2.8, wall=163247
2021-06-20 15:59:44 | INFO | train_inner | epoch 005: 2291 / 3002 loss=2.596, ppl=6.05, wps=5798.8, ups=0.09, wpb=64822, bsz=128, num_updates=14215, lr=9.98943e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=163258
2021-06-20 15:59:55 | INFO | train_inner | epoch 005: 2292 / 3002 loss=2.466, ppl=5.53, wps=5769.7, ups=0.09, wpb=64775, bsz=128, num_updates=14216, lr=9.98943e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=163270
2021-06-20 16:00:06 | INFO | train_inner | epoch 005: 2293 / 3002 loss=2.471, ppl=5.54, wps=5923.2, ups=0.09, wpb=64713, bsz=128, num_updates=14217, lr=9.98943e-05, gnorm=2.129, loss_scale=8, train_wall=10, gb_free=2.8, wall=163280
2021-06-20 16:00:17 | INFO | train_inner | epoch 005: 2294 / 3002 loss=2.471, ppl=5.54, wps=5831.4, ups=0.09, wpb=64911, bsz=128, num_updates=14218, lr=9.98942e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=163292
2021-06-20 16:00:28 | INFO | train_inner | epoch 005: 2295 / 3002 loss=2.785, ppl=6.89, wps=5891.2, ups=0.09, wpb=64714, bsz=128, num_updates=14219, lr=9.98942e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=163303
2021-06-20 16:00:39 | INFO | train_inner | epoch 005: 2296 / 3002 loss=2.417, ppl=5.34, wps=5904.8, ups=0.09, wpb=64909, bsz=128, num_updates=14220, lr=9.98942e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=163314
2021-06-20 16:00:50 | INFO | train_inner | epoch 005: 2297 / 3002 loss=2.462, ppl=5.51, wps=5867.3, ups=0.09, wpb=64733, bsz=128, num_updates=14221, lr=9.98942e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=163325
2021-06-20 16:01:01 | INFO | train_inner | epoch 005: 2298 / 3002 loss=2.462, ppl=5.51, wps=5769.3, ups=0.09, wpb=64828, bsz=128, num_updates=14222, lr=9.98942e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=163336
2021-06-20 16:01:12 | INFO | train_inner | epoch 005: 2299 / 3002 loss=2.389, ppl=5.24, wps=5879.9, ups=0.09, wpb=64801, bsz=128, num_updates=14223, lr=9.98942e-05, gnorm=4.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=163347
2021-06-20 16:01:24 | INFO | train_inner | epoch 005: 2300 / 3002 loss=2.38, ppl=5.2, wps=5767.3, ups=0.09, wpb=64925, bsz=128, num_updates=14224, lr=9.98942e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=163358
2021-06-20 16:01:35 | INFO | train_inner | epoch 005: 2301 / 3002 loss=2.499, ppl=5.65, wps=5972.2, ups=0.09, wpb=64847, bsz=128, num_updates=14225, lr=9.98942e-05, gnorm=1.99, loss_scale=8, train_wall=10, gb_free=2.8, wall=163369
2021-06-20 16:01:46 | INFO | train_inner | epoch 005: 2302 / 3002 loss=2.516, ppl=5.72, wps=5799.8, ups=0.09, wpb=64817, bsz=128, num_updates=14226, lr=9.98942e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=163380
2021-06-20 16:01:57 | INFO | train_inner | epoch 005: 2303 / 3002 loss=2.606, ppl=6.09, wps=5763, ups=0.09, wpb=64774, bsz=128, num_updates=14227, lr=9.98942e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=163391
2021-06-20 16:02:08 | INFO | train_inner | epoch 005: 2304 / 3002 loss=2.457, ppl=5.49, wps=5855.3, ups=0.09, wpb=64764, bsz=128, num_updates=14228, lr=9.98942e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=163402
2021-06-20 16:02:19 | INFO | train_inner | epoch 005: 2305 / 3002 loss=2.44, ppl=5.43, wps=5825.6, ups=0.09, wpb=64836, bsz=128, num_updates=14229, lr=9.98942e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=163414
2021-06-20 16:02:30 | INFO | train_inner | epoch 005: 2306 / 3002 loss=2.52, ppl=5.74, wps=5883.1, ups=0.09, wpb=64898, bsz=128, num_updates=14230, lr=9.98942e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=163425
2021-06-20 16:02:41 | INFO | train_inner | epoch 005: 2307 / 3002 loss=2.555, ppl=5.88, wps=5806.9, ups=0.09, wpb=64814, bsz=128, num_updates=14231, lr=9.98941e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=163436
2021-06-20 16:02:52 | INFO | train_inner | epoch 005: 2308 / 3002 loss=2.544, ppl=5.83, wps=5849.1, ups=0.09, wpb=64760, bsz=128, num_updates=14232, lr=9.98941e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=163447
2021-06-20 16:03:04 | INFO | train_inner | epoch 005: 2309 / 3002 loss=2.422, ppl=5.36, wps=5777.2, ups=0.09, wpb=64888, bsz=128, num_updates=14233, lr=9.98941e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=163458
2021-06-20 16:03:15 | INFO | train_inner | epoch 005: 2310 / 3002 loss=2.465, ppl=5.52, wps=5793.1, ups=0.09, wpb=64762, bsz=128, num_updates=14234, lr=9.98941e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=163469
2021-06-20 16:03:26 | INFO | train_inner | epoch 005: 2311 / 3002 loss=2.596, ppl=6.05, wps=5818.3, ups=0.09, wpb=64817, bsz=128, num_updates=14235, lr=9.98941e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=163480
2021-06-20 16:03:37 | INFO | train_inner | epoch 005: 2312 / 3002 loss=2.562, ppl=5.9, wps=5751.9, ups=0.09, wpb=64796, bsz=128, num_updates=14236, lr=9.98941e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=163492
2021-06-20 16:03:48 | INFO | train_inner | epoch 005: 2313 / 3002 loss=2.501, ppl=5.66, wps=5825, ups=0.09, wpb=64810, bsz=128, num_updates=14237, lr=9.98941e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=163503
2021-06-20 16:04:00 | INFO | train_inner | epoch 005: 2314 / 3002 loss=2.514, ppl=5.71, wps=5854.4, ups=0.09, wpb=64887, bsz=128, num_updates=14238, lr=9.98941e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=163514
2021-06-20 16:04:11 | INFO | train_inner | epoch 005: 2315 / 3002 loss=2.475, ppl=5.56, wps=5833.7, ups=0.09, wpb=64753, bsz=128, num_updates=14239, lr=9.98941e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=163525
2021-06-20 16:04:22 | INFO | train_inner | epoch 005: 2316 / 3002 loss=2.366, ppl=5.15, wps=5944, ups=0.09, wpb=64839, bsz=128, num_updates=14240, lr=9.98941e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=163536
2021-06-20 16:04:33 | INFO | train_inner | epoch 005: 2317 / 3002 loss=2.404, ppl=5.29, wps=5810.5, ups=0.09, wpb=64834, bsz=128, num_updates=14241, lr=9.98941e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=163547
2021-06-20 16:04:44 | INFO | train_inner | epoch 005: 2318 / 3002 loss=2.428, ppl=5.38, wps=5794.7, ups=0.09, wpb=64777, bsz=128, num_updates=14242, lr=9.98941e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=163558
2021-06-20 16:04:55 | INFO | train_inner | epoch 005: 2319 / 3002 loss=2.482, ppl=5.59, wps=5800.6, ups=0.09, wpb=64875, bsz=128, num_updates=14243, lr=9.9894e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=163569
2021-06-20 16:05:06 | INFO | train_inner | epoch 005: 2320 / 3002 loss=2.554, ppl=5.87, wps=5862.4, ups=0.09, wpb=64788, bsz=128, num_updates=14244, lr=9.9894e-05, gnorm=2.133, loss_scale=8, train_wall=11, gb_free=2.8, wall=163580
2021-06-20 16:05:17 | INFO | train_inner | epoch 005: 2321 / 3002 loss=2.471, ppl=5.54, wps=5868.5, ups=0.09, wpb=64859, bsz=128, num_updates=14245, lr=9.9894e-05, gnorm=2.011, loss_scale=8, train_wall=11, gb_free=2.8, wall=163591
2021-06-20 16:05:28 | INFO | train_inner | epoch 005: 2322 / 3002 loss=2.414, ppl=5.33, wps=5847.5, ups=0.09, wpb=64829, bsz=128, num_updates=14246, lr=9.9894e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=163603
2021-06-20 16:05:40 | INFO | train_inner | epoch 005: 2323 / 3002 loss=2.642, ppl=6.24, wps=5738.4, ups=0.09, wpb=64848, bsz=128, num_updates=14247, lr=9.9894e-05, gnorm=2.078, loss_scale=8, train_wall=11, gb_free=2.8, wall=163614
2021-06-20 16:05:50 | INFO | train_inner | epoch 005: 2324 / 3002 loss=2.604, ppl=6.08, wps=5937, ups=0.09, wpb=64792, bsz=128, num_updates=14248, lr=9.9894e-05, gnorm=2.2, loss_scale=8, train_wall=10, gb_free=2.8, wall=163625
2021-06-20 16:06:01 | INFO | train_inner | epoch 005: 2325 / 3002 loss=2.558, ppl=5.89, wps=5966.8, ups=0.09, wpb=64856, bsz=128, num_updates=14249, lr=9.9894e-05, gnorm=1.977, loss_scale=8, train_wall=10, gb_free=2.8, wall=163636
2021-06-20 16:06:12 | INFO | train_inner | epoch 005: 2326 / 3002 loss=2.444, ppl=5.44, wps=5844.6, ups=0.09, wpb=64853, bsz=128, num_updates=14250, lr=9.9894e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=163647
2021-06-20 16:06:24 | INFO | train_inner | epoch 005: 2327 / 3002 loss=2.514, ppl=5.71, wps=5783.1, ups=0.09, wpb=64822, bsz=128, num_updates=14251, lr=9.9894e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=163658
2021-06-20 16:06:35 | INFO | train_inner | epoch 005: 2328 / 3002 loss=2.555, ppl=5.88, wps=5768.2, ups=0.09, wpb=64883, bsz=128, num_updates=14252, lr=9.9894e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=163669
2021-06-20 16:06:46 | INFO | train_inner | epoch 005: 2329 / 3002 loss=2.56, ppl=5.9, wps=5658.7, ups=0.09, wpb=64796, bsz=128, num_updates=14253, lr=9.9894e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=163681
2021-06-20 16:06:57 | INFO | train_inner | epoch 005: 2330 / 3002 loss=2.369, ppl=5.17, wps=5914.5, ups=0.09, wpb=64812, bsz=128, num_updates=14254, lr=9.9894e-05, gnorm=1.954, loss_scale=8, train_wall=10, gb_free=2.8, wall=163692
2021-06-20 16:07:08 | INFO | train_inner | epoch 005: 2331 / 3002 loss=2.543, ppl=5.83, wps=5904.1, ups=0.09, wpb=64951, bsz=128, num_updates=14255, lr=9.9894e-05, gnorm=1.995, loss_scale=8, train_wall=10, gb_free=2.8, wall=163703
2021-06-20 16:07:19 | INFO | train_inner | epoch 005: 2332 / 3002 loss=2.632, ppl=6.2, wps=5829.7, ups=0.09, wpb=64861, bsz=128, num_updates=14256, lr=9.98939e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=163714
2021-06-20 16:07:30 | INFO | train_inner | epoch 005: 2333 / 3002 loss=2.401, ppl=5.28, wps=5915.2, ups=0.09, wpb=64792, bsz=128, num_updates=14257, lr=9.98939e-05, gnorm=1.993, loss_scale=8, train_wall=10, gb_free=2.8, wall=163725
2021-06-20 16:07:42 | INFO | train_inner | epoch 005: 2334 / 3002 loss=2.33, ppl=5.03, wps=5663.3, ups=0.09, wpb=64892, bsz=128, num_updates=14258, lr=9.98939e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=163736
2021-06-20 16:07:53 | INFO | train_inner | epoch 005: 2335 / 3002 loss=2.466, ppl=5.52, wps=5866.9, ups=0.09, wpb=64796, bsz=128, num_updates=14259, lr=9.98939e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=163747
2021-06-20 16:08:04 | INFO | train_inner | epoch 005: 2336 / 3002 loss=2.481, ppl=5.58, wps=5910.7, ups=0.09, wpb=64816, bsz=128, num_updates=14260, lr=9.98939e-05, gnorm=2.063, loss_scale=8, train_wall=11, gb_free=2.8, wall=163758
2021-06-20 16:08:15 | INFO | train_inner | epoch 005: 2337 / 3002 loss=2.471, ppl=5.54, wps=5934.5, ups=0.09, wpb=64830, bsz=128, num_updates=14261, lr=9.98939e-05, gnorm=2.048, loss_scale=8, train_wall=10, gb_free=2.8, wall=163769
2021-06-20 16:08:26 | INFO | train_inner | epoch 005: 2338 / 3002 loss=2.544, ppl=5.83, wps=5797.7, ups=0.09, wpb=64805, bsz=128, num_updates=14262, lr=9.98939e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=163780
2021-06-20 16:08:37 | INFO | train_inner | epoch 005: 2339 / 3002 loss=2.795, ppl=6.94, wps=5891.2, ups=0.09, wpb=64895, bsz=128, num_updates=14263, lr=9.98939e-05, gnorm=2.163, loss_scale=8, train_wall=11, gb_free=2.8, wall=163791
2021-06-20 16:08:48 | INFO | train_inner | epoch 005: 2340 / 3002 loss=2.349, ppl=5.1, wps=5837.9, ups=0.09, wpb=64774, bsz=128, num_updates=14264, lr=9.98939e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=163802
2021-06-20 16:08:59 | INFO | train_inner | epoch 005: 2341 / 3002 loss=2.403, ppl=5.29, wps=5777.4, ups=0.09, wpb=64901, bsz=128, num_updates=14265, lr=9.98939e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=163814
2021-06-20 16:09:10 | INFO | train_inner | epoch 005: 2342 / 3002 loss=2.629, ppl=6.19, wps=5846.5, ups=0.09, wpb=64834, bsz=128, num_updates=14266, lr=9.98939e-05, gnorm=2.064, loss_scale=8, train_wall=11, gb_free=2.8, wall=163825
2021-06-20 16:09:22 | INFO | train_inner | epoch 005: 2343 / 3002 loss=2.535, ppl=5.8, wps=5791.3, ups=0.09, wpb=64794, bsz=128, num_updates=14267, lr=9.98939e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=163836
2021-06-20 16:09:33 | INFO | train_inner | epoch 005: 2344 / 3002 loss=2.397, ppl=5.27, wps=5818.3, ups=0.09, wpb=64897, bsz=128, num_updates=14268, lr=9.98938e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=163847
2021-06-20 16:09:44 | INFO | train_inner | epoch 005: 2345 / 3002 loss=2.575, ppl=5.96, wps=5792.3, ups=0.09, wpb=64867, bsz=128, num_updates=14269, lr=9.98938e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=163858
2021-06-20 16:09:55 | INFO | train_inner | epoch 005: 2346 / 3002 loss=2.472, ppl=5.55, wps=5862.6, ups=0.09, wpb=64887, bsz=128, num_updates=14270, lr=9.98938e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=163869
2021-06-20 16:10:06 | INFO | train_inner | epoch 005: 2347 / 3002 loss=2.458, ppl=5.49, wps=5760.8, ups=0.09, wpb=64789, bsz=128, num_updates=14271, lr=9.98938e-05, gnorm=1.855, loss_scale=8, train_wall=11, gb_free=2.8, wall=163881
2021-06-20 16:10:17 | INFO | train_inner | epoch 005: 2348 / 3002 loss=2.447, ppl=5.45, wps=5821.3, ups=0.09, wpb=64782, bsz=128, num_updates=14272, lr=9.98938e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=163892
2021-06-20 16:10:28 | INFO | train_inner | epoch 005: 2349 / 3002 loss=2.578, ppl=5.97, wps=5824, ups=0.09, wpb=64838, bsz=128, num_updates=14273, lr=9.98938e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=163903
2021-06-20 16:10:40 | INFO | train_inner | epoch 005: 2350 / 3002 loss=2.472, ppl=5.55, wps=5863, ups=0.09, wpb=64795, bsz=128, num_updates=14274, lr=9.98938e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=163914
2021-06-20 16:10:51 | INFO | train_inner | epoch 005: 2351 / 3002 loss=2.535, ppl=5.79, wps=5879.4, ups=0.09, wpb=64750, bsz=128, num_updates=14275, lr=9.98938e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=163925
2021-06-20 16:11:02 | INFO | train_inner | epoch 005: 2352 / 3002 loss=2.48, ppl=5.58, wps=5913.7, ups=0.09, wpb=64833, bsz=128, num_updates=14276, lr=9.98938e-05, gnorm=2.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=163936
2021-06-20 16:11:13 | INFO | train_inner | epoch 005: 2353 / 3002 loss=2.392, ppl=5.25, wps=5878.5, ups=0.09, wpb=64917, bsz=128, num_updates=14277, lr=9.98938e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=163947
2021-06-20 16:11:24 | INFO | train_inner | epoch 005: 2354 / 3002 loss=2.425, ppl=5.37, wps=5801.2, ups=0.09, wpb=64781, bsz=128, num_updates=14278, lr=9.98938e-05, gnorm=1.885, loss_scale=8, train_wall=11, gb_free=2.8, wall=163958
2021-06-20 16:11:35 | INFO | train_inner | epoch 005: 2355 / 3002 loss=2.516, ppl=5.72, wps=5843.4, ups=0.09, wpb=64733, bsz=128, num_updates=14279, lr=9.98938e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=163969
2021-06-20 16:11:46 | INFO | train_inner | epoch 005: 2356 / 3002 loss=2.517, ppl=5.72, wps=5854.9, ups=0.09, wpb=64846, bsz=128, num_updates=14280, lr=9.98938e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=163980
2021-06-20 16:11:57 | INFO | train_inner | epoch 005: 2357 / 3002 loss=2.649, ppl=6.27, wps=5847.8, ups=0.09, wpb=64876, bsz=128, num_updates=14281, lr=9.98937e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=163991
2021-06-20 16:12:08 | INFO | train_inner | epoch 005: 2358 / 3002 loss=2.496, ppl=5.64, wps=5893.3, ups=0.09, wpb=64874, bsz=128, num_updates=14282, lr=9.98937e-05, gnorm=1.864, loss_scale=8, train_wall=11, gb_free=2.8, wall=164002
2021-06-20 16:12:19 | INFO | train_inner | epoch 005: 2359 / 3002 loss=2.49, ppl=5.62, wps=5885.5, ups=0.09, wpb=64860, bsz=128, num_updates=14283, lr=9.98937e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=164013
2021-06-20 16:12:30 | INFO | train_inner | epoch 005: 2360 / 3002 loss=2.524, ppl=5.75, wps=5888.9, ups=0.09, wpb=64823, bsz=128, num_updates=14284, lr=9.98937e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=164024
2021-06-20 16:12:41 | INFO | train_inner | epoch 005: 2361 / 3002 loss=2.615, ppl=6.13, wps=5816, ups=0.09, wpb=64876, bsz=128, num_updates=14285, lr=9.98937e-05, gnorm=2.209, loss_scale=8, train_wall=11, gb_free=2.8, wall=164036
2021-06-20 16:12:52 | INFO | train_inner | epoch 005: 2362 / 3002 loss=2.56, ppl=5.9, wps=5800.7, ups=0.09, wpb=64806, bsz=128, num_updates=14286, lr=9.98937e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=164047
2021-06-20 16:13:04 | INFO | train_inner | epoch 005: 2363 / 3002 loss=2.661, ppl=6.33, wps=5785.5, ups=0.09, wpb=64806, bsz=128, num_updates=14287, lr=9.98937e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=164058
2021-06-20 16:13:15 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-20 16:13:26 | INFO | train_inner | epoch 005: 2365 / 3002 loss=2.65, ppl=6.27, wps=2944, ups=0.05, wpb=64822, bsz=128, num_updates=14288, lr=9.98937e-05, gnorm=1.929, loss_scale=4, train_wall=21, gb_free=2.8, wall=164080
2021-06-20 16:13:37 | INFO | train_inner | epoch 005: 2366 / 3002 loss=2.546, ppl=5.84, wps=5740.2, ups=0.09, wpb=64783, bsz=128, num_updates=14289, lr=9.98937e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=164091
2021-06-20 16:13:48 | INFO | train_inner | epoch 005: 2367 / 3002 loss=2.49, ppl=5.62, wps=5904.6, ups=0.09, wpb=64965, bsz=128, num_updates=14290, lr=9.98937e-05, gnorm=1.951, loss_scale=4, train_wall=11, gb_free=2.8, wall=164102
2021-06-20 16:13:59 | INFO | train_inner | epoch 005: 2368 / 3002 loss=2.469, ppl=5.54, wps=5932.4, ups=0.09, wpb=64791, bsz=128, num_updates=14291, lr=9.98937e-05, gnorm=2.023, loss_scale=4, train_wall=10, gb_free=2.8, wall=164113
2021-06-20 16:14:10 | INFO | train_inner | epoch 005: 2369 / 3002 loss=2.437, ppl=5.42, wps=5934.9, ups=0.09, wpb=64861, bsz=128, num_updates=14292, lr=9.98937e-05, gnorm=1.918, loss_scale=4, train_wall=10, gb_free=2.8, wall=164124
2021-06-20 16:14:21 | INFO | train_inner | epoch 005: 2370 / 3002 loss=2.576, ppl=5.96, wps=5832.5, ups=0.09, wpb=64957, bsz=128, num_updates=14293, lr=9.98936e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=164135
2021-06-20 16:14:32 | INFO | train_inner | epoch 005: 2371 / 3002 loss=2.488, ppl=5.61, wps=5899, ups=0.09, wpb=64791, bsz=128, num_updates=14294, lr=9.98936e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=164146
2021-06-20 16:14:43 | INFO | train_inner | epoch 005: 2372 / 3002 loss=2.621, ppl=6.15, wps=5840.7, ups=0.09, wpb=64846, bsz=128, num_updates=14295, lr=9.98936e-05, gnorm=1.944, loss_scale=4, train_wall=11, gb_free=2.8, wall=164157
2021-06-20 16:14:54 | INFO | train_inner | epoch 005: 2373 / 3002 loss=2.613, ppl=6.12, wps=5839.5, ups=0.09, wpb=64848, bsz=128, num_updates=14296, lr=9.98936e-05, gnorm=1.936, loss_scale=4, train_wall=11, gb_free=2.8, wall=164168
2021-06-20 16:15:05 | INFO | train_inner | epoch 005: 2374 / 3002 loss=2.728, ppl=6.63, wps=5930.1, ups=0.09, wpb=64783, bsz=128, num_updates=14297, lr=9.98936e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=164179
2021-06-20 16:15:16 | INFO | train_inner | epoch 005: 2375 / 3002 loss=2.415, ppl=5.33, wps=5891.9, ups=0.09, wpb=64874, bsz=128, num_updates=14298, lr=9.98936e-05, gnorm=1.922, loss_scale=4, train_wall=11, gb_free=2.8, wall=164190
2021-06-20 16:15:27 | INFO | train_inner | epoch 005: 2376 / 3002 loss=2.425, ppl=5.37, wps=5830.4, ups=0.09, wpb=64905, bsz=128, num_updates=14299, lr=9.98936e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=164201
2021-06-20 16:15:38 | INFO | train_inner | epoch 005: 2377 / 3002 loss=2.443, ppl=5.44, wps=5925.8, ups=0.09, wpb=64879, bsz=128, num_updates=14300, lr=9.98936e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=164212
2021-06-20 16:15:49 | INFO | train_inner | epoch 005: 2378 / 3002 loss=2.602, ppl=6.07, wps=5838.3, ups=0.09, wpb=64850, bsz=128, num_updates=14301, lr=9.98936e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=164223
2021-06-20 16:16:00 | INFO | train_inner | epoch 005: 2379 / 3002 loss=2.546, ppl=5.84, wps=5777.1, ups=0.09, wpb=64832, bsz=128, num_updates=14302, lr=9.98936e-05, gnorm=2.049, loss_scale=4, train_wall=11, gb_free=2.8, wall=164235
2021-06-20 16:16:12 | INFO | train_inner | epoch 005: 2380 / 3002 loss=2.402, ppl=5.28, wps=5752.6, ups=0.09, wpb=64742, bsz=128, num_updates=14303, lr=9.98936e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=164246
2021-06-20 16:16:23 | INFO | train_inner | epoch 005: 2381 / 3002 loss=2.39, ppl=5.24, wps=5785.1, ups=0.09, wpb=64790, bsz=128, num_updates=14304, lr=9.98936e-05, gnorm=1.877, loss_scale=4, train_wall=11, gb_free=2.8, wall=164257
2021-06-20 16:16:34 | INFO | train_inner | epoch 005: 2382 / 3002 loss=2.507, ppl=5.68, wps=5944, ups=0.09, wpb=64784, bsz=128, num_updates=14305, lr=9.98936e-05, gnorm=1.901, loss_scale=4, train_wall=10, gb_free=2.8, wall=164268
2021-06-20 16:16:45 | INFO | train_inner | epoch 005: 2383 / 3002 loss=2.352, ppl=5.1, wps=5999.5, ups=0.09, wpb=64838, bsz=128, num_updates=14306, lr=9.98935e-05, gnorm=1.96, loss_scale=4, train_wall=10, gb_free=2.8, wall=164279
2021-06-20 16:16:56 | INFO | train_inner | epoch 005: 2384 / 3002 loss=2.27, ppl=4.82, wps=5808.2, ups=0.09, wpb=64851, bsz=128, num_updates=14307, lr=9.98935e-05, gnorm=1.885, loss_scale=4, train_wall=11, gb_free=2.8, wall=164290
2021-06-20 16:17:07 | INFO | train_inner | epoch 005: 2385 / 3002 loss=2.427, ppl=5.38, wps=5826.7, ups=0.09, wpb=64849, bsz=128, num_updates=14308, lr=9.98935e-05, gnorm=2.174, loss_scale=4, train_wall=11, gb_free=2.8, wall=164301
2021-06-20 16:17:18 | INFO | train_inner | epoch 005: 2386 / 3002 loss=2.612, ppl=6.11, wps=5827.9, ups=0.09, wpb=64803, bsz=128, num_updates=14309, lr=9.98935e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=164312
2021-06-20 16:17:29 | INFO | train_inner | epoch 005: 2387 / 3002 loss=2.636, ppl=6.22, wps=5751.9, ups=0.09, wpb=64886, bsz=128, num_updates=14310, lr=9.98935e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=164324
2021-06-20 16:17:40 | INFO | train_inner | epoch 005: 2388 / 3002 loss=2.502, ppl=5.67, wps=5819.3, ups=0.09, wpb=64813, bsz=128, num_updates=14311, lr=9.98935e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=164335
2021-06-20 16:17:51 | INFO | train_inner | epoch 005: 2389 / 3002 loss=2.584, ppl=6, wps=5923, ups=0.09, wpb=64800, bsz=128, num_updates=14312, lr=9.98935e-05, gnorm=1.924, loss_scale=4, train_wall=10, gb_free=2.8, wall=164346
2021-06-20 16:18:02 | INFO | train_inner | epoch 005: 2390 / 3002 loss=2.462, ppl=5.51, wps=5822.6, ups=0.09, wpb=64885, bsz=128, num_updates=14313, lr=9.98935e-05, gnorm=1.953, loss_scale=4, train_wall=11, gb_free=2.8, wall=164357
2021-06-20 16:18:14 | INFO | train_inner | epoch 005: 2391 / 3002 loss=2.393, ppl=5.25, wps=5797.2, ups=0.09, wpb=64855, bsz=128, num_updates=14314, lr=9.98935e-05, gnorm=1.84, loss_scale=4, train_wall=11, gb_free=2.8, wall=164368
2021-06-20 16:18:25 | INFO | train_inner | epoch 005: 2392 / 3002 loss=2.509, ppl=5.69, wps=5939, ups=0.09, wpb=64842, bsz=128, num_updates=14315, lr=9.98935e-05, gnorm=1.906, loss_scale=4, train_wall=10, gb_free=2.8, wall=164379
2021-06-20 16:18:36 | INFO | train_inner | epoch 005: 2393 / 3002 loss=2.472, ppl=5.55, wps=5768.6, ups=0.09, wpb=64785, bsz=128, num_updates=14316, lr=9.98935e-05, gnorm=1.957, loss_scale=4, train_wall=11, gb_free=2.8, wall=164390
2021-06-20 16:18:47 | INFO | train_inner | epoch 005: 2394 / 3002 loss=2.367, ppl=5.16, wps=5825.5, ups=0.09, wpb=64832, bsz=128, num_updates=14317, lr=9.98935e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=164401
2021-06-20 16:18:58 | INFO | train_inner | epoch 005: 2395 / 3002 loss=2.471, ppl=5.55, wps=5818.8, ups=0.09, wpb=64825, bsz=128, num_updates=14318, lr=9.98934e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=164412
2021-06-20 16:19:09 | INFO | train_inner | epoch 005: 2396 / 3002 loss=2.52, ppl=5.74, wps=5893.4, ups=0.09, wpb=64883, bsz=128, num_updates=14319, lr=9.98934e-05, gnorm=1.906, loss_scale=4, train_wall=11, gb_free=2.8, wall=164423
2021-06-20 16:19:20 | INFO | train_inner | epoch 005: 2397 / 3002 loss=2.518, ppl=5.73, wps=5810, ups=0.09, wpb=64799, bsz=128, num_updates=14320, lr=9.98934e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=164435
2021-06-20 16:19:32 | INFO | train_inner | epoch 005: 2398 / 3002 loss=2.381, ppl=5.21, wps=5744.7, ups=0.09, wpb=64783, bsz=128, num_updates=14321, lr=9.98934e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=164446
2021-06-20 16:19:43 | INFO | train_inner | epoch 005: 2399 / 3002 loss=2.505, ppl=5.67, wps=5784.8, ups=0.09, wpb=64783, bsz=128, num_updates=14322, lr=9.98934e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=164457
2021-06-20 16:19:54 | INFO | train_inner | epoch 005: 2400 / 3002 loss=2.612, ppl=6.11, wps=5914.4, ups=0.09, wpb=64936, bsz=128, num_updates=14323, lr=9.98934e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=164468
2021-06-20 16:20:05 | INFO | train_inner | epoch 005: 2401 / 3002 loss=2.525, ppl=5.75, wps=5916.6, ups=0.09, wpb=64805, bsz=128, num_updates=14324, lr=9.98934e-05, gnorm=3.706, loss_scale=4, train_wall=10, gb_free=2.8, wall=164479
2021-06-20 16:20:16 | INFO | train_inner | epoch 005: 2402 / 3002 loss=2.425, ppl=5.37, wps=5861.8, ups=0.09, wpb=64729, bsz=128, num_updates=14325, lr=9.98934e-05, gnorm=2.001, loss_scale=4, train_wall=11, gb_free=2.8, wall=164490
2021-06-20 16:20:27 | INFO | train_inner | epoch 005: 2403 / 3002 loss=2.538, ppl=5.81, wps=5886.8, ups=0.09, wpb=64791, bsz=128, num_updates=14326, lr=9.98934e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=164501
2021-06-20 16:20:38 | INFO | train_inner | epoch 005: 2404 / 3002 loss=2.525, ppl=5.75, wps=5787.8, ups=0.09, wpb=64829, bsz=128, num_updates=14327, lr=9.98934e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=164512
2021-06-20 16:20:49 | INFO | train_inner | epoch 005: 2405 / 3002 loss=2.525, ppl=5.76, wps=5793, ups=0.09, wpb=64858, bsz=128, num_updates=14328, lr=9.98934e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=164523
2021-06-20 16:21:00 | INFO | train_inner | epoch 005: 2406 / 3002 loss=2.417, ppl=5.34, wps=5863.8, ups=0.09, wpb=64796, bsz=128, num_updates=14329, lr=9.98934e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=164534
2021-06-20 16:21:11 | INFO | train_inner | epoch 005: 2407 / 3002 loss=2.482, ppl=5.59, wps=5869.1, ups=0.09, wpb=64855, bsz=128, num_updates=14330, lr=9.98934e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=164546
2021-06-20 16:21:22 | INFO | train_inner | epoch 005: 2408 / 3002 loss=2.601, ppl=6.07, wps=5898.1, ups=0.09, wpb=64796, bsz=128, num_updates=14331, lr=9.98933e-05, gnorm=1.895, loss_scale=4, train_wall=11, gb_free=2.8, wall=164557
2021-06-20 16:21:33 | INFO | train_inner | epoch 005: 2409 / 3002 loss=2.352, ppl=5.1, wps=5993.5, ups=0.09, wpb=64818, bsz=128, num_updates=14332, lr=9.98933e-05, gnorm=1.97, loss_scale=4, train_wall=10, gb_free=2.8, wall=164567
2021-06-20 16:21:44 | INFO | train_inner | epoch 005: 2410 / 3002 loss=2.534, ppl=5.79, wps=5895.4, ups=0.09, wpb=64907, bsz=128, num_updates=14333, lr=9.98933e-05, gnorm=1.915, loss_scale=4, train_wall=11, gb_free=2.8, wall=164578
2021-06-20 16:21:55 | INFO | train_inner | epoch 005: 2411 / 3002 loss=2.545, ppl=5.84, wps=5969.5, ups=0.09, wpb=64789, bsz=128, num_updates=14334, lr=9.98933e-05, gnorm=1.971, loss_scale=4, train_wall=10, gb_free=2.8, wall=164589
2021-06-20 16:22:06 | INFO | train_inner | epoch 005: 2412 / 3002 loss=2.56, ppl=5.9, wps=6000.7, ups=0.09, wpb=64816, bsz=128, num_updates=14335, lr=9.98933e-05, gnorm=1.954, loss_scale=4, train_wall=10, gb_free=2.8, wall=164600
2021-06-20 16:22:17 | INFO | train_inner | epoch 005: 2413 / 3002 loss=2.583, ppl=5.99, wps=5949, ups=0.09, wpb=64856, bsz=128, num_updates=14336, lr=9.98933e-05, gnorm=1.9, loss_scale=4, train_wall=10, gb_free=2.8, wall=164611
2021-06-20 16:22:28 | INFO | train_inner | epoch 005: 2414 / 3002 loss=2.684, ppl=6.43, wps=5796.1, ups=0.09, wpb=64798, bsz=128, num_updates=14337, lr=9.98933e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=164622
2021-06-20 16:22:39 | INFO | train_inner | epoch 005: 2415 / 3002 loss=2.6, ppl=6.06, wps=5888.9, ups=0.09, wpb=64877, bsz=128, num_updates=14338, lr=9.98933e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=164633
2021-06-20 16:22:50 | INFO | train_inner | epoch 005: 2416 / 3002 loss=2.55, ppl=5.86, wps=5771.2, ups=0.09, wpb=64772, bsz=128, num_updates=14339, lr=9.98933e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=164644
2021-06-20 16:23:01 | INFO | train_inner | epoch 005: 2417 / 3002 loss=2.576, ppl=5.96, wps=5686.6, ups=0.09, wpb=64768, bsz=128, num_updates=14340, lr=9.98933e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=164656
2021-06-20 16:23:13 | INFO | train_inner | epoch 005: 2418 / 3002 loss=2.576, ppl=5.96, wps=5815.6, ups=0.09, wpb=64896, bsz=128, num_updates=14341, lr=9.98933e-05, gnorm=1.919, loss_scale=4, train_wall=11, gb_free=2.8, wall=164667
2021-06-20 16:23:24 | INFO | train_inner | epoch 005: 2419 / 3002 loss=2.549, ppl=5.85, wps=5896.4, ups=0.09, wpb=64834, bsz=128, num_updates=14342, lr=9.98933e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=164678
2021-06-20 16:23:34 | INFO | train_inner | epoch 005: 2420 / 3002 loss=2.334, ppl=5.04, wps=5941.3, ups=0.09, wpb=64824, bsz=128, num_updates=14343, lr=9.98932e-05, gnorm=1.885, loss_scale=4, train_wall=10, gb_free=2.8, wall=164689
2021-06-20 16:23:46 | INFO | train_inner | epoch 005: 2421 / 3002 loss=2.348, ppl=5.09, wps=5833.5, ups=0.09, wpb=64879, bsz=128, num_updates=14344, lr=9.98932e-05, gnorm=1.935, loss_scale=4, train_wall=11, gb_free=2.8, wall=164700
2021-06-20 16:23:57 | INFO | train_inner | epoch 005: 2422 / 3002 loss=2.657, ppl=6.31, wps=5849.6, ups=0.09, wpb=64810, bsz=128, num_updates=14345, lr=9.98932e-05, gnorm=2.122, loss_scale=4, train_wall=11, gb_free=2.8, wall=164711
2021-06-20 16:24:08 | INFO | train_inner | epoch 005: 2423 / 3002 loss=2.571, ppl=5.94, wps=5927.6, ups=0.09, wpb=64880, bsz=128, num_updates=14346, lr=9.98932e-05, gnorm=2.033, loss_scale=4, train_wall=10, gb_free=2.8, wall=164722
2021-06-20 16:24:19 | INFO | train_inner | epoch 005: 2424 / 3002 loss=2.434, ppl=5.4, wps=5802.6, ups=0.09, wpb=64820, bsz=128, num_updates=14347, lr=9.98932e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=164733
2021-06-20 16:24:30 | INFO | train_inner | epoch 005: 2425 / 3002 loss=2.394, ppl=5.25, wps=5835.5, ups=0.09, wpb=64922, bsz=128, num_updates=14348, lr=9.98932e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=164744
2021-06-20 16:24:41 | INFO | train_inner | epoch 005: 2426 / 3002 loss=2.507, ppl=5.69, wps=5727.7, ups=0.09, wpb=64802, bsz=128, num_updates=14349, lr=9.98932e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=164756
2021-06-20 16:24:52 | INFO | train_inner | epoch 005: 2427 / 3002 loss=2.5, ppl=5.66, wps=5813.9, ups=0.09, wpb=64812, bsz=128, num_updates=14350, lr=9.98932e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=164767
2021-06-20 16:25:04 | INFO | train_inner | epoch 005: 2428 / 3002 loss=2.488, ppl=5.61, wps=5714.8, ups=0.09, wpb=64814, bsz=128, num_updates=14351, lr=9.98932e-05, gnorm=2.007, loss_scale=4, train_wall=11, gb_free=2.8, wall=164778
2021-06-20 16:25:15 | INFO | train_inner | epoch 005: 2429 / 3002 loss=2.361, ppl=5.14, wps=5845.2, ups=0.09, wpb=64827, bsz=128, num_updates=14352, lr=9.98932e-05, gnorm=1.944, loss_scale=4, train_wall=11, gb_free=2.8, wall=164789
2021-06-20 16:25:26 | INFO | train_inner | epoch 005: 2430 / 3002 loss=2.445, ppl=5.44, wps=5820.7, ups=0.09, wpb=64747, bsz=128, num_updates=14353, lr=9.98932e-05, gnorm=1.936, loss_scale=4, train_wall=11, gb_free=2.8, wall=164800
2021-06-20 16:25:37 | INFO | train_inner | epoch 005: 2431 / 3002 loss=2.567, ppl=5.93, wps=5910, ups=0.09, wpb=64958, bsz=128, num_updates=14354, lr=9.98932e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=164811
2021-06-20 16:25:48 | INFO | train_inner | epoch 005: 2432 / 3002 loss=2.5, ppl=5.66, wps=5800.1, ups=0.09, wpb=64833, bsz=128, num_updates=14355, lr=9.98932e-05, gnorm=2.111, loss_scale=4, train_wall=11, gb_free=2.8, wall=164822
2021-06-20 16:25:59 | INFO | train_inner | epoch 005: 2433 / 3002 loss=2.429, ppl=5.39, wps=5851.2, ups=0.09, wpb=64838, bsz=128, num_updates=14356, lr=9.98931e-05, gnorm=1.909, loss_scale=4, train_wall=11, gb_free=2.8, wall=164833
2021-06-20 16:26:10 | INFO | train_inner | epoch 005: 2434 / 3002 loss=2.477, ppl=5.57, wps=5947.4, ups=0.09, wpb=64758, bsz=128, num_updates=14357, lr=9.98931e-05, gnorm=1.938, loss_scale=4, train_wall=10, gb_free=2.8, wall=164844
2021-06-20 16:26:21 | INFO | train_inner | epoch 005: 2435 / 3002 loss=2.577, ppl=5.97, wps=5807.4, ups=0.09, wpb=64712, bsz=128, num_updates=14358, lr=9.98931e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=164856
2021-06-20 16:26:32 | INFO | train_inner | epoch 005: 2436 / 3002 loss=2.546, ppl=5.84, wps=5889, ups=0.09, wpb=64764, bsz=128, num_updates=14359, lr=9.98931e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=164867
2021-06-20 16:26:43 | INFO | train_inner | epoch 005: 2437 / 3002 loss=2.398, ppl=5.27, wps=5847.6, ups=0.09, wpb=64833, bsz=128, num_updates=14360, lr=9.98931e-05, gnorm=2.009, loss_scale=4, train_wall=11, gb_free=2.8, wall=164878
2021-06-20 16:26:54 | INFO | train_inner | epoch 005: 2438 / 3002 loss=2.371, ppl=5.17, wps=5996, ups=0.09, wpb=64951, bsz=128, num_updates=14361, lr=9.98931e-05, gnorm=2.05, loss_scale=4, train_wall=10, gb_free=2.8, wall=164888
2021-06-20 16:27:05 | INFO | train_inner | epoch 005: 2439 / 3002 loss=2.484, ppl=5.59, wps=5703.4, ups=0.09, wpb=64784, bsz=128, num_updates=14362, lr=9.98931e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=164900
2021-06-20 16:27:16 | INFO | train_inner | epoch 005: 2440 / 3002 loss=2.256, ppl=4.78, wps=5915, ups=0.09, wpb=64802, bsz=128, num_updates=14363, lr=9.98931e-05, gnorm=2.01, loss_scale=4, train_wall=11, gb_free=2.8, wall=164911
2021-06-20 16:27:28 | INFO | train_inner | epoch 005: 2441 / 3002 loss=2.546, ppl=5.84, wps=5798, ups=0.09, wpb=64812, bsz=128, num_updates=14364, lr=9.98931e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=164922
2021-06-20 16:27:39 | INFO | train_inner | epoch 005: 2442 / 3002 loss=2.561, ppl=5.9, wps=5833.2, ups=0.09, wpb=64865, bsz=128, num_updates=14365, lr=9.98931e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=164933
2021-06-20 16:27:50 | INFO | train_inner | epoch 005: 2443 / 3002 loss=2.675, ppl=6.39, wps=5814.1, ups=0.09, wpb=64812, bsz=128, num_updates=14366, lr=9.98931e-05, gnorm=2.026, loss_scale=4, train_wall=11, gb_free=2.8, wall=164944
2021-06-20 16:28:01 | INFO | train_inner | epoch 005: 2444 / 3002 loss=2.562, ppl=5.91, wps=5948.8, ups=0.09, wpb=64757, bsz=128, num_updates=14367, lr=9.98931e-05, gnorm=2.039, loss_scale=4, train_wall=10, gb_free=2.8, wall=164955
2021-06-20 16:28:12 | INFO | train_inner | epoch 005: 2445 / 3002 loss=2.397, ppl=5.27, wps=5777, ups=0.09, wpb=64767, bsz=128, num_updates=14368, lr=9.9893e-05, gnorm=2.083, loss_scale=4, train_wall=11, gb_free=2.8, wall=164966
2021-06-20 16:28:23 | INFO | train_inner | epoch 005: 2446 / 3002 loss=2.58, ppl=5.98, wps=5829.4, ups=0.09, wpb=64909, bsz=128, num_updates=14369, lr=9.9893e-05, gnorm=2.53, loss_scale=4, train_wall=11, gb_free=2.8, wall=164977
2021-06-20 16:28:34 | INFO | train_inner | epoch 005: 2447 / 3002 loss=2.416, ppl=5.34, wps=5865.3, ups=0.09, wpb=64925, bsz=128, num_updates=14370, lr=9.9893e-05, gnorm=1.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=164988
2021-06-20 16:28:45 | INFO | train_inner | epoch 005: 2448 / 3002 loss=2.416, ppl=5.34, wps=5749.8, ups=0.09, wpb=64737, bsz=128, num_updates=14371, lr=9.9893e-05, gnorm=2.007, loss_scale=4, train_wall=11, gb_free=2.8, wall=165000
2021-06-20 16:28:57 | INFO | train_inner | epoch 005: 2449 / 3002 loss=2.447, ppl=5.45, wps=5787.9, ups=0.09, wpb=64797, bsz=128, num_updates=14372, lr=9.9893e-05, gnorm=2.024, loss_scale=4, train_wall=11, gb_free=2.8, wall=165011
2021-06-20 16:29:08 | INFO | train_inner | epoch 005: 2450 / 3002 loss=2.607, ppl=6.09, wps=5763.4, ups=0.09, wpb=64830, bsz=128, num_updates=14373, lr=9.9893e-05, gnorm=34.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=165022
2021-06-20 16:29:19 | INFO | train_inner | epoch 005: 2451 / 3002 loss=2.635, ppl=6.21, wps=5878, ups=0.09, wpb=64834, bsz=128, num_updates=14374, lr=9.9893e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=165033
2021-06-20 16:29:30 | INFO | train_inner | epoch 005: 2452 / 3002 loss=2.515, ppl=5.72, wps=5811.8, ups=0.09, wpb=64825, bsz=128, num_updates=14375, lr=9.9893e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=165044
2021-06-20 16:29:41 | INFO | train_inner | epoch 005: 2453 / 3002 loss=2.645, ppl=6.26, wps=5985.6, ups=0.09, wpb=64843, bsz=128, num_updates=14376, lr=9.9893e-05, gnorm=2.061, loss_scale=4, train_wall=10, gb_free=2.8, wall=165055
2021-06-20 16:29:52 | INFO | train_inner | epoch 005: 2454 / 3002 loss=2.51, ppl=5.7, wps=5840.8, ups=0.09, wpb=64874, bsz=128, num_updates=14377, lr=9.9893e-05, gnorm=2.015, loss_scale=4, train_wall=11, gb_free=2.8, wall=165066
2021-06-20 16:30:03 | INFO | train_inner | epoch 005: 2455 / 3002 loss=2.471, ppl=5.55, wps=5865.6, ups=0.09, wpb=64838, bsz=128, num_updates=14378, lr=9.9893e-05, gnorm=2.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=165077
2021-06-20 16:30:14 | INFO | train_inner | epoch 005: 2456 / 3002 loss=2.55, ppl=5.86, wps=5803.3, ups=0.09, wpb=64769, bsz=128, num_updates=14379, lr=9.9893e-05, gnorm=2.156, loss_scale=4, train_wall=11, gb_free=2.8, wall=165089
2021-06-20 16:30:25 | INFO | train_inner | epoch 005: 2457 / 3002 loss=2.476, ppl=5.56, wps=5831.5, ups=0.09, wpb=64826, bsz=128, num_updates=14380, lr=9.9893e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=165100
2021-06-20 16:30:36 | INFO | train_inner | epoch 005: 2458 / 3002 loss=2.639, ppl=6.23, wps=5847.7, ups=0.09, wpb=64824, bsz=128, num_updates=14381, lr=9.98929e-05, gnorm=2.09, loss_scale=4, train_wall=11, gb_free=2.8, wall=165111
2021-06-20 16:30:48 | INFO | train_inner | epoch 005: 2459 / 3002 loss=2.508, ppl=5.69, wps=5848.6, ups=0.09, wpb=64923, bsz=128, num_updates=14382, lr=9.98929e-05, gnorm=2.057, loss_scale=4, train_wall=11, gb_free=2.8, wall=165122
2021-06-20 16:30:59 | INFO | train_inner | epoch 005: 2460 / 3002 loss=2.444, ppl=5.44, wps=5854.5, ups=0.09, wpb=64894, bsz=128, num_updates=14383, lr=9.98929e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=165133
2021-06-20 16:31:10 | INFO | train_inner | epoch 005: 2461 / 3002 loss=2.485, ppl=5.6, wps=5886.5, ups=0.09, wpb=64767, bsz=128, num_updates=14384, lr=9.98929e-05, gnorm=2.106, loss_scale=4, train_wall=11, gb_free=2.8, wall=165144
2021-06-20 16:31:21 | INFO | train_inner | epoch 005: 2462 / 3002 loss=2.504, ppl=5.67, wps=5824.5, ups=0.09, wpb=64636, bsz=128, num_updates=14385, lr=9.98929e-05, gnorm=2.066, loss_scale=4, train_wall=11, gb_free=2.8, wall=165155
2021-06-20 16:31:32 | INFO | train_inner | epoch 005: 2463 / 3002 loss=2.59, ppl=6.02, wps=5932.8, ups=0.09, wpb=64968, bsz=128, num_updates=14386, lr=9.98929e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=165166
2021-06-20 16:31:43 | INFO | train_inner | epoch 005: 2464 / 3002 loss=2.474, ppl=5.56, wps=5861.9, ups=0.09, wpb=64789, bsz=128, num_updates=14387, lr=9.98929e-05, gnorm=2.23, loss_scale=4, train_wall=11, gb_free=2.8, wall=165177
2021-06-20 16:31:54 | INFO | train_inner | epoch 005: 2465 / 3002 loss=2.578, ppl=5.97, wps=5911.2, ups=0.09, wpb=64778, bsz=128, num_updates=14388, lr=9.98929e-05, gnorm=2.091, loss_scale=4, train_wall=11, gb_free=2.8, wall=165188
2021-06-20 16:32:05 | INFO | train_inner | epoch 005: 2466 / 3002 loss=2.441, ppl=5.43, wps=5783.7, ups=0.09, wpb=64848, bsz=128, num_updates=14389, lr=9.98929e-05, gnorm=2.04, loss_scale=4, train_wall=11, gb_free=2.8, wall=165199
2021-06-20 16:32:16 | INFO | train_inner | epoch 005: 2467 / 3002 loss=2.628, ppl=6.18, wps=5876.1, ups=0.09, wpb=64811, bsz=128, num_updates=14390, lr=9.98929e-05, gnorm=1.99, loss_scale=4, train_wall=11, gb_free=2.8, wall=165210
2021-06-20 16:32:27 | INFO | train_inner | epoch 005: 2468 / 3002 loss=2.589, ppl=6.02, wps=5784.2, ups=0.09, wpb=64892, bsz=128, num_updates=14391, lr=9.98929e-05, gnorm=2.026, loss_scale=4, train_wall=11, gb_free=2.8, wall=165221
2021-06-20 16:32:38 | INFO | train_inner | epoch 005: 2469 / 3002 loss=2.578, ppl=5.97, wps=5834.9, ups=0.09, wpb=64825, bsz=128, num_updates=14392, lr=9.98929e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=165233
2021-06-20 16:32:49 | INFO | train_inner | epoch 005: 2470 / 3002 loss=2.457, ppl=5.49, wps=5767.5, ups=0.09, wpb=64825, bsz=128, num_updates=14393, lr=9.98928e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=165244
2021-06-20 16:33:00 | INFO | train_inner | epoch 005: 2471 / 3002 loss=2.658, ppl=6.31, wps=5903, ups=0.09, wpb=64874, bsz=128, num_updates=14394, lr=9.98928e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=165255
2021-06-20 16:33:11 | INFO | train_inner | epoch 005: 2472 / 3002 loss=2.41, ppl=5.31, wps=5953.3, ups=0.09, wpb=64827, bsz=128, num_updates=14395, lr=9.98928e-05, gnorm=1.964, loss_scale=4, train_wall=10, gb_free=2.8, wall=165266
2021-06-20 16:33:22 | INFO | train_inner | epoch 005: 2473 / 3002 loss=2.532, ppl=5.78, wps=5863.4, ups=0.09, wpb=64870, bsz=128, num_updates=14396, lr=9.98928e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=165277
2021-06-20 16:33:34 | INFO | train_inner | epoch 005: 2474 / 3002 loss=2.54, ppl=5.82, wps=5846.5, ups=0.09, wpb=64821, bsz=128, num_updates=14397, lr=9.98928e-05, gnorm=2.062, loss_scale=4, train_wall=11, gb_free=2.8, wall=165288
2021-06-20 16:33:45 | INFO | train_inner | epoch 005: 2475 / 3002 loss=2.48, ppl=5.58, wps=5827.4, ups=0.09, wpb=64850, bsz=128, num_updates=14398, lr=9.98928e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=165299
2021-06-20 16:33:56 | INFO | train_inner | epoch 005: 2476 / 3002 loss=2.563, ppl=5.91, wps=5835.7, ups=0.09, wpb=64879, bsz=128, num_updates=14399, lr=9.98928e-05, gnorm=1.892, loss_scale=4, train_wall=11, gb_free=2.8, wall=165310
2021-06-20 16:34:07 | INFO | train_inner | epoch 005: 2477 / 3002 loss=2.609, ppl=6.1, wps=5967.3, ups=0.09, wpb=64873, bsz=128, num_updates=14400, lr=9.98928e-05, gnorm=1.973, loss_scale=4, train_wall=10, gb_free=2.8, wall=165321
2021-06-20 16:34:18 | INFO | train_inner | epoch 005: 2478 / 3002 loss=2.536, ppl=5.8, wps=5860, ups=0.09, wpb=64771, bsz=128, num_updates=14401, lr=9.98928e-05, gnorm=2.079, loss_scale=4, train_wall=11, gb_free=2.8, wall=165332
2021-06-20 16:34:29 | INFO | train_inner | epoch 005: 2479 / 3002 loss=2.437, ppl=5.42, wps=5891.7, ups=0.09, wpb=64871, bsz=128, num_updates=14402, lr=9.98928e-05, gnorm=2.03, loss_scale=4, train_wall=11, gb_free=2.8, wall=165343
2021-06-20 16:34:40 | INFO | train_inner | epoch 005: 2480 / 3002 loss=2.493, ppl=5.63, wps=5969.2, ups=0.09, wpb=64786, bsz=128, num_updates=14403, lr=9.98928e-05, gnorm=1.969, loss_scale=4, train_wall=10, gb_free=2.8, wall=165354
2021-06-20 16:34:51 | INFO | train_inner | epoch 005: 2481 / 3002 loss=2.506, ppl=5.68, wps=5840.7, ups=0.09, wpb=64848, bsz=128, num_updates=14404, lr=9.98928e-05, gnorm=1.881, loss_scale=4, train_wall=11, gb_free=2.8, wall=165365
2021-06-20 16:35:02 | INFO | train_inner | epoch 005: 2482 / 3002 loss=2.328, ppl=5.02, wps=5928.5, ups=0.09, wpb=64841, bsz=128, num_updates=14405, lr=9.98928e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=165376
2021-06-20 16:35:13 | INFO | train_inner | epoch 005: 2483 / 3002 loss=2.519, ppl=5.73, wps=5890.5, ups=0.09, wpb=64889, bsz=128, num_updates=14406, lr=9.98927e-05, gnorm=1.932, loss_scale=4, train_wall=11, gb_free=2.8, wall=165387
2021-06-20 16:35:24 | INFO | train_inner | epoch 005: 2484 / 3002 loss=2.517, ppl=5.72, wps=5788.1, ups=0.09, wpb=64797, bsz=128, num_updates=14407, lr=9.98927e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=165398
2021-06-20 16:35:35 | INFO | train_inner | epoch 005: 2485 / 3002 loss=2.656, ppl=6.3, wps=5832.8, ups=0.09, wpb=64803, bsz=128, num_updates=14408, lr=9.98927e-05, gnorm=1.948, loss_scale=4, train_wall=11, gb_free=2.8, wall=165409
2021-06-20 16:35:46 | INFO | train_inner | epoch 005: 2486 / 3002 loss=2.544, ppl=5.83, wps=5846.1, ups=0.09, wpb=64735, bsz=128, num_updates=14409, lr=9.98927e-05, gnorm=2.248, loss_scale=4, train_wall=11, gb_free=2.8, wall=165420
2021-06-20 16:35:57 | INFO | train_inner | epoch 005: 2487 / 3002 loss=2.439, ppl=5.42, wps=5910.1, ups=0.09, wpb=64781, bsz=128, num_updates=14410, lr=9.98927e-05, gnorm=1.95, loss_scale=4, train_wall=11, gb_free=2.8, wall=165431
2021-06-20 16:36:08 | INFO | train_inner | epoch 005: 2488 / 3002 loss=2.597, ppl=6.05, wps=5727.9, ups=0.09, wpb=64848, bsz=128, num_updates=14411, lr=9.98927e-05, gnorm=1.936, loss_scale=4, train_wall=11, gb_free=2.8, wall=165443
2021-06-20 16:36:19 | INFO | train_inner | epoch 005: 2489 / 3002 loss=2.52, ppl=5.73, wps=5866.8, ups=0.09, wpb=64774, bsz=128, num_updates=14412, lr=9.98927e-05, gnorm=2.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=165454
2021-06-20 16:36:30 | INFO | train_inner | epoch 005: 2490 / 3002 loss=2.605, ppl=6.08, wps=5821.2, ups=0.09, wpb=64811, bsz=128, num_updates=14413, lr=9.98927e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=165465
2021-06-20 16:36:42 | INFO | train_inner | epoch 005: 2491 / 3002 loss=2.406, ppl=5.3, wps=5820.6, ups=0.09, wpb=64776, bsz=128, num_updates=14414, lr=9.98927e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=165476
2021-06-20 16:36:53 | INFO | train_inner | epoch 005: 2492 / 3002 loss=2.471, ppl=5.54, wps=5777, ups=0.09, wpb=64809, bsz=128, num_updates=14415, lr=9.98927e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=165487
2021-06-20 16:37:04 | INFO | train_inner | epoch 005: 2493 / 3002 loss=2.561, ppl=5.9, wps=5716.7, ups=0.09, wpb=64824, bsz=128, num_updates=14416, lr=9.98927e-05, gnorm=2.057, loss_scale=8, train_wall=11, gb_free=2.8, wall=165498
2021-06-20 16:37:15 | INFO | train_inner | epoch 005: 2494 / 3002 loss=2.342, ppl=5.07, wps=5796.8, ups=0.09, wpb=64850, bsz=128, num_updates=14417, lr=9.98927e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=165510
2021-06-20 16:37:27 | INFO | train_inner | epoch 005: 2495 / 3002 loss=2.659, ppl=6.32, wps=5781.2, ups=0.09, wpb=64800, bsz=128, num_updates=14418, lr=9.98926e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=165521
2021-06-20 16:37:38 | INFO | train_inner | epoch 005: 2496 / 3002 loss=2.523, ppl=5.75, wps=5849, ups=0.09, wpb=64874, bsz=128, num_updates=14419, lr=9.98926e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=165532
2021-06-20 16:37:49 | INFO | train_inner | epoch 005: 2497 / 3002 loss=2.669, ppl=6.36, wps=5772.1, ups=0.09, wpb=64855, bsz=128, num_updates=14420, lr=9.98926e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=165543
2021-06-20 16:38:00 | INFO | train_inner | epoch 005: 2498 / 3002 loss=2.526, ppl=5.76, wps=5953, ups=0.09, wpb=64796, bsz=128, num_updates=14421, lr=9.98926e-05, gnorm=1.914, loss_scale=8, train_wall=10, gb_free=2.8, wall=165554
2021-06-20 16:38:11 | INFO | train_inner | epoch 005: 2499 / 3002 loss=2.454, ppl=5.48, wps=5837.7, ups=0.09, wpb=64857, bsz=128, num_updates=14422, lr=9.98926e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=165565
2021-06-20 16:38:22 | INFO | train_inner | epoch 005: 2500 / 3002 loss=2.406, ppl=5.3, wps=5773.3, ups=0.09, wpb=64887, bsz=128, num_updates=14423, lr=9.98926e-05, gnorm=1.872, loss_scale=8, train_wall=11, gb_free=2.8, wall=165576
2021-06-20 16:38:33 | INFO | train_inner | epoch 005: 2501 / 3002 loss=2.557, ppl=5.89, wps=5897.5, ups=0.09, wpb=64812, bsz=128, num_updates=14424, lr=9.98926e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=165587
2021-06-20 16:38:44 | INFO | train_inner | epoch 005: 2502 / 3002 loss=2.536, ppl=5.8, wps=5798.6, ups=0.09, wpb=64889, bsz=128, num_updates=14425, lr=9.98926e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=165599
2021-06-20 16:38:55 | INFO | train_inner | epoch 005: 2503 / 3002 loss=2.475, ppl=5.56, wps=5898.1, ups=0.09, wpb=64879, bsz=128, num_updates=14426, lr=9.98926e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=165610
2021-06-20 16:39:06 | INFO | train_inner | epoch 005: 2504 / 3002 loss=2.422, ppl=5.36, wps=5796.6, ups=0.09, wpb=64852, bsz=128, num_updates=14427, lr=9.98926e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=165621
2021-06-20 16:39:18 | INFO | train_inner | epoch 005: 2505 / 3002 loss=2.388, ppl=5.23, wps=5766.1, ups=0.09, wpb=64770, bsz=128, num_updates=14428, lr=9.98926e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=165632
2021-06-20 16:39:29 | INFO | train_inner | epoch 005: 2506 / 3002 loss=2.543, ppl=5.83, wps=5770.1, ups=0.09, wpb=64743, bsz=128, num_updates=14429, lr=9.98926e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=165643
2021-06-20 16:39:40 | INFO | train_inner | epoch 005: 2507 / 3002 loss=2.543, ppl=5.83, wps=5807.8, ups=0.09, wpb=64754, bsz=128, num_updates=14430, lr=9.98926e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=165654
2021-06-20 16:39:51 | INFO | train_inner | epoch 005: 2508 / 3002 loss=2.494, ppl=5.63, wps=5803.4, ups=0.09, wpb=64825, bsz=128, num_updates=14431, lr=9.98925e-05, gnorm=1.87, loss_scale=8, train_wall=11, gb_free=2.8, wall=165666
2021-06-20 16:40:02 | INFO | train_inner | epoch 005: 2509 / 3002 loss=2.502, ppl=5.67, wps=6003.9, ups=0.09, wpb=64892, bsz=128, num_updates=14432, lr=9.98925e-05, gnorm=1.887, loss_scale=8, train_wall=10, gb_free=2.8, wall=165676
2021-06-20 16:40:13 | INFO | train_inner | epoch 005: 2510 / 3002 loss=2.46, ppl=5.5, wps=5832.5, ups=0.09, wpb=64891, bsz=128, num_updates=14433, lr=9.98925e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=165687
2021-06-20 16:40:24 | INFO | train_inner | epoch 005: 2511 / 3002 loss=2.548, ppl=5.85, wps=5839.6, ups=0.09, wpb=64882, bsz=128, num_updates=14434, lr=9.98925e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=165699
2021-06-20 16:40:35 | INFO | train_inner | epoch 005: 2512 / 3002 loss=2.641, ppl=6.24, wps=5925.8, ups=0.09, wpb=64835, bsz=128, num_updates=14435, lr=9.98925e-05, gnorm=1.97, loss_scale=8, train_wall=10, gb_free=2.8, wall=165710
2021-06-20 16:40:46 | INFO | train_inner | epoch 005: 2513 / 3002 loss=2.473, ppl=5.55, wps=5873.4, ups=0.09, wpb=64859, bsz=128, num_updates=14436, lr=9.98925e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=165721
2021-06-20 16:40:57 | INFO | train_inner | epoch 005: 2514 / 3002 loss=2.432, ppl=5.4, wps=5794.3, ups=0.09, wpb=64805, bsz=128, num_updates=14437, lr=9.98925e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=165732
2021-06-20 16:41:09 | INFO | train_inner | epoch 005: 2515 / 3002 loss=2.437, ppl=5.41, wps=5826.1, ups=0.09, wpb=64894, bsz=128, num_updates=14438, lr=9.98925e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=165743
2021-06-20 16:41:20 | INFO | train_inner | epoch 005: 2516 / 3002 loss=2.553, ppl=5.87, wps=5845.6, ups=0.09, wpb=64832, bsz=128, num_updates=14439, lr=9.98925e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=165754
2021-06-20 16:41:31 | INFO | train_inner | epoch 005: 2517 / 3002 loss=2.36, ppl=5.13, wps=5922.9, ups=0.09, wpb=64868, bsz=128, num_updates=14440, lr=9.98925e-05, gnorm=2.017, loss_scale=8, train_wall=10, gb_free=2.8, wall=165765
2021-06-20 16:41:42 | INFO | train_inner | epoch 005: 2518 / 3002 loss=2.557, ppl=5.88, wps=5806.9, ups=0.09, wpb=64841, bsz=128, num_updates=14441, lr=9.98925e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=165776
2021-06-20 16:41:53 | INFO | train_inner | epoch 005: 2519 / 3002 loss=2.417, ppl=5.34, wps=5842.1, ups=0.09, wpb=64891, bsz=128, num_updates=14442, lr=9.98925e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=165787
2021-06-20 16:42:04 | INFO | train_inner | epoch 005: 2520 / 3002 loss=2.691, ppl=6.46, wps=5909.5, ups=0.09, wpb=64850, bsz=128, num_updates=14443, lr=9.98924e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=165798
2021-06-20 16:42:15 | INFO | train_inner | epoch 005: 2521 / 3002 loss=2.662, ppl=6.33, wps=5809.9, ups=0.09, wpb=64805, bsz=128, num_updates=14444, lr=9.98924e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=165809
2021-06-20 16:42:26 | INFO | train_inner | epoch 005: 2522 / 3002 loss=2.472, ppl=5.55, wps=5942.6, ups=0.09, wpb=64834, bsz=128, num_updates=14445, lr=9.98924e-05, gnorm=1.941, loss_scale=8, train_wall=10, gb_free=2.8, wall=165820
2021-06-20 16:42:37 | INFO | train_inner | epoch 005: 2523 / 3002 loss=2.5, ppl=5.66, wps=5822.7, ups=0.09, wpb=64844, bsz=128, num_updates=14446, lr=9.98924e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=165831
2021-06-20 16:42:48 | INFO | train_inner | epoch 005: 2524 / 3002 loss=2.817, ppl=7.05, wps=5815.6, ups=0.09, wpb=64754, bsz=128, num_updates=14447, lr=9.98924e-05, gnorm=2.273, loss_scale=8, train_wall=11, gb_free=2.8, wall=165843
2021-06-20 16:42:59 | INFO | train_inner | epoch 005: 2525 / 3002 loss=2.514, ppl=5.71, wps=5828.2, ups=0.09, wpb=64842, bsz=128, num_updates=14448, lr=9.98924e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=165854
2021-06-20 16:43:11 | INFO | train_inner | epoch 005: 2526 / 3002 loss=2.511, ppl=5.7, wps=5780.4, ups=0.09, wpb=64854, bsz=128, num_updates=14449, lr=9.98924e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=165865
2021-06-20 16:43:22 | INFO | train_inner | epoch 005: 2527 / 3002 loss=2.36, ppl=5.13, wps=5873.3, ups=0.09, wpb=64793, bsz=128, num_updates=14450, lr=9.98924e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=165876
2021-06-20 16:43:32 | INFO | train_inner | epoch 005: 2528 / 3002 loss=2.497, ppl=5.65, wps=5955.6, ups=0.09, wpb=64770, bsz=128, num_updates=14451, lr=9.98924e-05, gnorm=2.021, loss_scale=8, train_wall=10, gb_free=2.8, wall=165887
2021-06-20 16:43:44 | INFO | train_inner | epoch 005: 2529 / 3002 loss=2.503, ppl=5.67, wps=5815.8, ups=0.09, wpb=64892, bsz=128, num_updates=14452, lr=9.98924e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=165898
2021-06-20 16:43:55 | INFO | train_inner | epoch 005: 2530 / 3002 loss=2.384, ppl=5.22, wps=5780.3, ups=0.09, wpb=64836, bsz=128, num_updates=14453, lr=9.98924e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=165909
2021-06-20 16:44:06 | INFO | train_inner | epoch 005: 2531 / 3002 loss=2.497, ppl=5.64, wps=5914, ups=0.09, wpb=64799, bsz=128, num_updates=14454, lr=9.98924e-05, gnorm=1.965, loss_scale=8, train_wall=10, gb_free=2.8, wall=165920
2021-06-20 16:44:17 | INFO | train_inner | epoch 005: 2532 / 3002 loss=2.546, ppl=5.84, wps=5785.3, ups=0.09, wpb=64875, bsz=128, num_updates=14455, lr=9.98924e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=165931
2021-06-20 16:44:28 | INFO | train_inner | epoch 005: 2533 / 3002 loss=2.425, ppl=5.37, wps=5737.9, ups=0.09, wpb=64820, bsz=128, num_updates=14456, lr=9.98923e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=165943
2021-06-20 16:44:39 | INFO | train_inner | epoch 005: 2534 / 3002 loss=2.508, ppl=5.69, wps=5873.9, ups=0.09, wpb=64911, bsz=128, num_updates=14457, lr=9.98923e-05, gnorm=2.414, loss_scale=8, train_wall=11, gb_free=2.8, wall=165954
2021-06-20 16:44:51 | INFO | train_inner | epoch 005: 2535 / 3002 loss=2.678, ppl=6.4, wps=5764.2, ups=0.09, wpb=64785, bsz=128, num_updates=14458, lr=9.98923e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=165965
2021-06-20 16:45:02 | INFO | train_inner | epoch 005: 2536 / 3002 loss=2.425, ppl=5.37, wps=5838.7, ups=0.09, wpb=64809, bsz=128, num_updates=14459, lr=9.98923e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=165976
2021-06-20 16:45:13 | INFO | train_inner | epoch 005: 2537 / 3002 loss=2.385, ppl=5.22, wps=5785.5, ups=0.09, wpb=64816, bsz=128, num_updates=14460, lr=9.98923e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=165987
2021-06-20 16:45:24 | INFO | train_inner | epoch 005: 2538 / 3002 loss=2.498, ppl=5.65, wps=5896.5, ups=0.09, wpb=64844, bsz=128, num_updates=14461, lr=9.98923e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=165998
2021-06-20 16:45:35 | INFO | train_inner | epoch 005: 2539 / 3002 loss=2.478, ppl=5.57, wps=5778.4, ups=0.09, wpb=64825, bsz=128, num_updates=14462, lr=9.98923e-05, gnorm=1.885, loss_scale=8, train_wall=11, gb_free=2.8, wall=166009
2021-06-20 16:45:46 | INFO | train_inner | epoch 005: 2540 / 3002 loss=2.633, ppl=6.2, wps=5786, ups=0.09, wpb=64868, bsz=128, num_updates=14463, lr=9.98923e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=166021
2021-06-20 16:45:57 | INFO | train_inner | epoch 005: 2541 / 3002 loss=2.492, ppl=5.63, wps=5886.7, ups=0.09, wpb=64781, bsz=128, num_updates=14464, lr=9.98923e-05, gnorm=1.802, loss_scale=8, train_wall=11, gb_free=2.8, wall=166032
2021-06-20 16:46:09 | INFO | train_inner | epoch 005: 2542 / 3002 loss=2.416, ppl=5.34, wps=5744.7, ups=0.09, wpb=64805, bsz=128, num_updates=14465, lr=9.98923e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=166043
2021-06-20 16:46:20 | INFO | train_inner | epoch 005: 2543 / 3002 loss=2.556, ppl=5.88, wps=5876.7, ups=0.09, wpb=64870, bsz=128, num_updates=14466, lr=9.98923e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=166054
2021-06-20 16:46:31 | INFO | train_inner | epoch 005: 2544 / 3002 loss=2.483, ppl=5.59, wps=5874.4, ups=0.09, wpb=64875, bsz=128, num_updates=14467, lr=9.98923e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=166065
2021-06-20 16:46:42 | INFO | train_inner | epoch 005: 2545 / 3002 loss=2.484, ppl=5.6, wps=5906, ups=0.09, wpb=64890, bsz=128, num_updates=14468, lr=9.98922e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=166076
2021-06-20 16:46:53 | INFO | train_inner | epoch 005: 2546 / 3002 loss=2.38, ppl=5.21, wps=5732.4, ups=0.09, wpb=64891, bsz=128, num_updates=14469, lr=9.98922e-05, gnorm=2.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=166087
2021-06-20 16:47:04 | INFO | train_inner | epoch 005: 2547 / 3002 loss=2.393, ppl=5.25, wps=5862.6, ups=0.09, wpb=64870, bsz=128, num_updates=14470, lr=9.98922e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=166098
2021-06-20 16:47:15 | INFO | train_inner | epoch 005: 2548 / 3002 loss=2.456, ppl=5.49, wps=5804.8, ups=0.09, wpb=64741, bsz=128, num_updates=14471, lr=9.98922e-05, gnorm=3.501, loss_scale=8, train_wall=11, gb_free=2.8, wall=166110
2021-06-20 16:47:27 | INFO | train_inner | epoch 005: 2549 / 3002 loss=2.321, ppl=5, wps=5744.9, ups=0.09, wpb=64890, bsz=128, num_updates=14472, lr=9.98922e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=166121
2021-06-20 16:47:38 | INFO | train_inner | epoch 005: 2550 / 3002 loss=2.512, ppl=5.7, wps=5730.7, ups=0.09, wpb=64855, bsz=128, num_updates=14473, lr=9.98922e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=166132
2021-06-20 16:47:49 | INFO | train_inner | epoch 005: 2551 / 3002 loss=2.452, ppl=5.47, wps=5863.2, ups=0.09, wpb=64798, bsz=128, num_updates=14474, lr=9.98922e-05, gnorm=2.272, loss_scale=8, train_wall=11, gb_free=2.8, wall=166143
2021-06-20 16:48:00 | INFO | train_inner | epoch 005: 2552 / 3002 loss=2.469, ppl=5.54, wps=5866.6, ups=0.09, wpb=64814, bsz=128, num_updates=14475, lr=9.98922e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=166154
2021-06-20 16:48:11 | INFO | train_inner | epoch 005: 2553 / 3002 loss=2.451, ppl=5.47, wps=5842.7, ups=0.09, wpb=64796, bsz=128, num_updates=14476, lr=9.98922e-05, gnorm=1.899, loss_scale=8, train_wall=11, gb_free=2.8, wall=166165
2021-06-20 16:48:22 | INFO | train_inner | epoch 005: 2554 / 3002 loss=2.446, ppl=5.45, wps=5841.7, ups=0.09, wpb=64894, bsz=128, num_updates=14477, lr=9.98922e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=166176
2021-06-20 16:48:33 | INFO | train_inner | epoch 005: 2555 / 3002 loss=2.454, ppl=5.48, wps=5720.1, ups=0.09, wpb=64775, bsz=128, num_updates=14478, lr=9.98922e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=166188
2021-06-20 16:48:44 | INFO | train_inner | epoch 005: 2556 / 3002 loss=2.454, ppl=5.48, wps=5936.3, ups=0.09, wpb=64836, bsz=128, num_updates=14479, lr=9.98922e-05, gnorm=1.925, loss_scale=8, train_wall=10, gb_free=2.8, wall=166199
2021-06-20 16:48:56 | INFO | train_inner | epoch 005: 2557 / 3002 loss=2.461, ppl=5.5, wps=5757.7, ups=0.09, wpb=64889, bsz=128, num_updates=14480, lr=9.98922e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=166210
2021-06-20 16:49:07 | INFO | train_inner | epoch 005: 2558 / 3002 loss=2.555, ppl=5.88, wps=5787.8, ups=0.09, wpb=64817, bsz=128, num_updates=14481, lr=9.98921e-05, gnorm=2.052, loss_scale=8, train_wall=11, gb_free=2.8, wall=166221
2021-06-20 16:49:18 | INFO | train_inner | epoch 005: 2559 / 3002 loss=2.506, ppl=5.68, wps=5705.7, ups=0.09, wpb=64819, bsz=128, num_updates=14482, lr=9.98921e-05, gnorm=2.015, loss_scale=8, train_wall=11, gb_free=2.8, wall=166233
2021-06-20 16:49:29 | INFO | train_inner | epoch 005: 2560 / 3002 loss=2.577, ppl=5.97, wps=5825.9, ups=0.09, wpb=64801, bsz=128, num_updates=14483, lr=9.98921e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=166244
2021-06-20 16:49:40 | INFO | train_inner | epoch 005: 2561 / 3002 loss=2.527, ppl=5.76, wps=5879.8, ups=0.09, wpb=64856, bsz=128, num_updates=14484, lr=9.98921e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=166255
2021-06-20 16:49:51 | INFO | train_inner | epoch 005: 2562 / 3002 loss=2.476, ppl=5.56, wps=5884.8, ups=0.09, wpb=64776, bsz=128, num_updates=14485, lr=9.98921e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=166266
2021-06-20 16:50:03 | INFO | train_inner | epoch 005: 2563 / 3002 loss=2.404, ppl=5.29, wps=5770.5, ups=0.09, wpb=64847, bsz=128, num_updates=14486, lr=9.98921e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=166277
2021-06-20 16:50:14 | INFO | train_inner | epoch 005: 2564 / 3002 loss=2.674, ppl=6.38, wps=5832.8, ups=0.09, wpb=64851, bsz=128, num_updates=14487, lr=9.98921e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=166288
2021-06-20 16:50:25 | INFO | train_inner | epoch 005: 2565 / 3002 loss=2.441, ppl=5.43, wps=5885.6, ups=0.09, wpb=64797, bsz=128, num_updates=14488, lr=9.98921e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=166299
2021-06-20 16:50:36 | INFO | train_inner | epoch 005: 2566 / 3002 loss=2.438, ppl=5.42, wps=5874.2, ups=0.09, wpb=64788, bsz=128, num_updates=14489, lr=9.98921e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=166310
2021-06-20 16:50:47 | INFO | train_inner | epoch 005: 2567 / 3002 loss=2.401, ppl=5.28, wps=5784.3, ups=0.09, wpb=64851, bsz=128, num_updates=14490, lr=9.98921e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=166321
2021-06-20 16:50:58 | INFO | train_inner | epoch 005: 2568 / 3002 loss=2.611, ppl=6.11, wps=5927.1, ups=0.09, wpb=64862, bsz=128, num_updates=14491, lr=9.98921e-05, gnorm=2.002, loss_scale=8, train_wall=10, gb_free=2.8, wall=166332
2021-06-20 16:51:09 | INFO | train_inner | epoch 005: 2569 / 3002 loss=2.308, ppl=4.95, wps=5913.5, ups=0.09, wpb=64853, bsz=128, num_updates=14492, lr=9.98921e-05, gnorm=1.987, loss_scale=8, train_wall=10, gb_free=2.8, wall=166343
2021-06-20 16:51:20 | INFO | train_inner | epoch 005: 2570 / 3002 loss=2.517, ppl=5.73, wps=5977.6, ups=0.09, wpb=64787, bsz=128, num_updates=14493, lr=9.9892e-05, gnorm=2.063, loss_scale=8, train_wall=10, gb_free=2.8, wall=166354
2021-06-20 16:51:31 | INFO | train_inner | epoch 005: 2571 / 3002 loss=2.591, ppl=6.02, wps=5947.2, ups=0.09, wpb=64856, bsz=128, num_updates=14494, lr=9.9892e-05, gnorm=1.996, loss_scale=8, train_wall=10, gb_free=2.8, wall=166365
2021-06-20 16:51:42 | INFO | train_inner | epoch 005: 2572 / 3002 loss=2.509, ppl=5.69, wps=5918.4, ups=0.09, wpb=64818, bsz=128, num_updates=14495, lr=9.9892e-05, gnorm=1.922, loss_scale=8, train_wall=10, gb_free=2.8, wall=166376
2021-06-20 16:51:53 | INFO | train_inner | epoch 005: 2573 / 3002 loss=2.369, ppl=5.17, wps=5807.4, ups=0.09, wpb=64832, bsz=128, num_updates=14496, lr=9.9892e-05, gnorm=1.867, loss_scale=8, train_wall=11, gb_free=2.8, wall=166387
2021-06-20 16:52:04 | INFO | train_inner | epoch 005: 2574 / 3002 loss=2.485, ppl=5.6, wps=5833.1, ups=0.09, wpb=64852, bsz=128, num_updates=14497, lr=9.9892e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=166398
2021-06-20 16:52:15 | INFO | train_inner | epoch 005: 2575 / 3002 loss=2.469, ppl=5.54, wps=5811.1, ups=0.09, wpb=64864, bsz=128, num_updates=14498, lr=9.9892e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=166409
2021-06-20 16:52:26 | INFO | train_inner | epoch 005: 2576 / 3002 loss=2.515, ppl=5.72, wps=5893.1, ups=0.09, wpb=64778, bsz=128, num_updates=14499, lr=9.9892e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=166420
2021-06-20 16:52:37 | INFO | train_inner | epoch 005: 2577 / 3002 loss=2.495, ppl=5.64, wps=5916.4, ups=0.09, wpb=64801, bsz=128, num_updates=14500, lr=9.9892e-05, gnorm=1.959, loss_scale=8, train_wall=10, gb_free=2.8, wall=166431
2021-06-20 16:52:48 | INFO | train_inner | epoch 005: 2578 / 3002 loss=2.44, ppl=5.43, wps=5677.7, ups=0.09, wpb=64839, bsz=128, num_updates=14501, lr=9.9892e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=166443
2021-06-20 16:53:00 | INFO | train_inner | epoch 005: 2579 / 3002 loss=2.586, ppl=6, wps=5746.2, ups=0.09, wpb=64805, bsz=128, num_updates=14502, lr=9.9892e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=166454
2021-06-20 16:53:11 | INFO | train_inner | epoch 005: 2580 / 3002 loss=2.553, ppl=5.87, wps=5986.2, ups=0.09, wpb=64914, bsz=128, num_updates=14503, lr=9.9892e-05, gnorm=2.079, loss_scale=8, train_wall=10, gb_free=2.8, wall=166465
2021-06-20 16:53:21 | INFO | train_inner | epoch 005: 2581 / 3002 loss=2.481, ppl=5.58, wps=5957.3, ups=0.09, wpb=64870, bsz=128, num_updates=14504, lr=9.9892e-05, gnorm=1.97, loss_scale=8, train_wall=10, gb_free=2.8, wall=166476
2021-06-20 16:53:33 | INFO | train_inner | epoch 005: 2582 / 3002 loss=2.378, ppl=5.2, wps=5834.9, ups=0.09, wpb=64816, bsz=128, num_updates=14505, lr=9.9892e-05, gnorm=1.843, loss_scale=8, train_wall=11, gb_free=2.8, wall=166487
2021-06-20 16:53:43 | INFO | train_inner | epoch 005: 2583 / 3002 loss=2.434, ppl=5.4, wps=6020.2, ups=0.09, wpb=64860, bsz=128, num_updates=14506, lr=9.98919e-05, gnorm=1.948, loss_scale=8, train_wall=10, gb_free=2.8, wall=166498
2021-06-20 16:53:54 | INFO | train_inner | epoch 005: 2584 / 3002 loss=2.472, ppl=5.55, wps=6028.7, ups=0.09, wpb=64792, bsz=128, num_updates=14507, lr=9.98919e-05, gnorm=1.94, loss_scale=8, train_wall=10, gb_free=2.8, wall=166508
2021-06-20 16:54:05 | INFO | train_inner | epoch 005: 2585 / 3002 loss=2.596, ppl=6.05, wps=5878.8, ups=0.09, wpb=64890, bsz=128, num_updates=14508, lr=9.98919e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=166519
2021-06-20 16:54:16 | INFO | train_inner | epoch 005: 2586 / 3002 loss=2.61, ppl=6.1, wps=5787.5, ups=0.09, wpb=64810, bsz=128, num_updates=14509, lr=9.98919e-05, gnorm=2.055, loss_scale=8, train_wall=11, gb_free=2.8, wall=166531
2021-06-20 16:54:27 | INFO | train_inner | epoch 005: 2587 / 3002 loss=2.415, ppl=5.33, wps=5808.1, ups=0.09, wpb=64854, bsz=128, num_updates=14510, lr=9.98919e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=166542
2021-06-20 16:54:39 | INFO | train_inner | epoch 005: 2588 / 3002 loss=2.571, ppl=5.94, wps=5838.7, ups=0.09, wpb=64830, bsz=128, num_updates=14511, lr=9.98919e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=166553
2021-06-20 16:54:50 | INFO | train_inner | epoch 005: 2589 / 3002 loss=2.374, ppl=5.18, wps=5718.1, ups=0.09, wpb=64889, bsz=128, num_updates=14512, lr=9.98919e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=166564
2021-06-20 16:55:01 | INFO | train_inner | epoch 005: 2590 / 3002 loss=2.375, ppl=5.19, wps=5832.4, ups=0.09, wpb=64799, bsz=128, num_updates=14513, lr=9.98919e-05, gnorm=1.882, loss_scale=8, train_wall=11, gb_free=2.8, wall=166575
2021-06-20 16:55:12 | INFO | train_inner | epoch 005: 2591 / 3002 loss=2.507, ppl=5.69, wps=5911.3, ups=0.09, wpb=64824, bsz=128, num_updates=14514, lr=9.98919e-05, gnorm=1.851, loss_scale=8, train_wall=11, gb_free=2.8, wall=166586
2021-06-20 16:55:23 | INFO | train_inner | epoch 005: 2592 / 3002 loss=2.593, ppl=6.03, wps=5871.4, ups=0.09, wpb=64806, bsz=128, num_updates=14515, lr=9.98919e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=166597
2021-06-20 16:55:34 | INFO | train_inner | epoch 005: 2593 / 3002 loss=2.526, ppl=5.76, wps=5847.6, ups=0.09, wpb=64825, bsz=128, num_updates=14516, lr=9.98919e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=166608
2021-06-20 16:55:45 | INFO | train_inner | epoch 005: 2594 / 3002 loss=2.485, ppl=5.6, wps=5970.4, ups=0.09, wpb=64856, bsz=128, num_updates=14517, lr=9.98919e-05, gnorm=1.89, loss_scale=8, train_wall=10, gb_free=2.8, wall=166619
2021-06-20 16:55:56 | INFO | train_inner | epoch 005: 2595 / 3002 loss=2.502, ppl=5.67, wps=5850.6, ups=0.09, wpb=64879, bsz=128, num_updates=14518, lr=9.98918e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=166630
2021-06-20 16:56:07 | INFO | train_inner | epoch 005: 2596 / 3002 loss=2.472, ppl=5.55, wps=5860, ups=0.09, wpb=64853, bsz=128, num_updates=14519, lr=9.98918e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=166641
2021-06-20 16:56:18 | INFO | train_inner | epoch 005: 2597 / 3002 loss=2.639, ppl=6.23, wps=5843.9, ups=0.09, wpb=64774, bsz=128, num_updates=14520, lr=9.98918e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=166653
2021-06-20 16:56:30 | INFO | train_inner | epoch 005: 2598 / 3002 loss=2.288, ppl=4.89, wps=5690.7, ups=0.09, wpb=64841, bsz=128, num_updates=14521, lr=9.98918e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=166664
2021-06-20 16:56:41 | INFO | train_inner | epoch 005: 2599 / 3002 loss=2.42, ppl=5.35, wps=5825.4, ups=0.09, wpb=64823, bsz=128, num_updates=14522, lr=9.98918e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=166675
2021-06-20 16:56:52 | INFO | train_inner | epoch 005: 2600 / 3002 loss=2.529, ppl=5.77, wps=5743.4, ups=0.09, wpb=64733, bsz=128, num_updates=14523, lr=9.98918e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=166686
2021-06-20 16:57:03 | INFO | train_inner | epoch 005: 2601 / 3002 loss=2.475, ppl=5.56, wps=5725, ups=0.09, wpb=64869, bsz=128, num_updates=14524, lr=9.98918e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=166698
2021-06-20 16:57:14 | INFO | train_inner | epoch 005: 2602 / 3002 loss=2.649, ppl=6.27, wps=5846, ups=0.09, wpb=64825, bsz=128, num_updates=14525, lr=9.98918e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=166709
2021-06-20 16:57:26 | INFO | train_inner | epoch 005: 2603 / 3002 loss=2.633, ppl=6.2, wps=5795.2, ups=0.09, wpb=64793, bsz=128, num_updates=14526, lr=9.98918e-05, gnorm=2.062, loss_scale=8, train_wall=11, gb_free=2.8, wall=166720
2021-06-20 16:57:37 | INFO | train_inner | epoch 005: 2604 / 3002 loss=2.601, ppl=6.07, wps=5888.2, ups=0.09, wpb=64767, bsz=128, num_updates=14527, lr=9.98918e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=166731
2021-06-20 16:57:48 | INFO | train_inner | epoch 005: 2605 / 3002 loss=2.384, ppl=5.22, wps=5915.8, ups=0.09, wpb=64885, bsz=128, num_updates=14528, lr=9.98918e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=166742
2021-06-20 16:57:59 | INFO | train_inner | epoch 005: 2606 / 3002 loss=2.37, ppl=5.17, wps=5916.7, ups=0.09, wpb=64850, bsz=128, num_updates=14529, lr=9.98918e-05, gnorm=1.966, loss_scale=8, train_wall=10, gb_free=2.8, wall=166753
2021-06-20 16:58:10 | INFO | train_inner | epoch 005: 2607 / 3002 loss=2.576, ppl=5.96, wps=5866.2, ups=0.09, wpb=64823, bsz=128, num_updates=14530, lr=9.98918e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=166764
2021-06-20 16:58:21 | INFO | train_inner | epoch 005: 2608 / 3002 loss=2.716, ppl=6.57, wps=5845.1, ups=0.09, wpb=64793, bsz=128, num_updates=14531, lr=9.98917e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=166775
2021-06-20 16:58:32 | INFO | train_inner | epoch 005: 2609 / 3002 loss=2.568, ppl=5.93, wps=5886.1, ups=0.09, wpb=64737, bsz=128, num_updates=14532, lr=9.98917e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=166786
2021-06-20 16:58:43 | INFO | train_inner | epoch 005: 2610 / 3002 loss=2.362, ppl=5.14, wps=5877.3, ups=0.09, wpb=64870, bsz=128, num_updates=14533, lr=9.98917e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=166797
2021-06-20 16:58:54 | INFO | train_inner | epoch 005: 2611 / 3002 loss=2.418, ppl=5.35, wps=5778.5, ups=0.09, wpb=64817, bsz=128, num_updates=14534, lr=9.98917e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=166808
2021-06-20 16:59:05 | INFO | train_inner | epoch 005: 2612 / 3002 loss=2.524, ppl=5.75, wps=5870.5, ups=0.09, wpb=64876, bsz=128, num_updates=14535, lr=9.98917e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=166819
2021-06-20 16:59:16 | INFO | train_inner | epoch 005: 2613 / 3002 loss=2.571, ppl=5.94, wps=5864.1, ups=0.09, wpb=64828, bsz=128, num_updates=14536, lr=9.98917e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=166830
2021-06-20 16:59:27 | INFO | train_inner | epoch 005: 2614 / 3002 loss=2.721, ppl=6.59, wps=5834.8, ups=0.09, wpb=64909, bsz=128, num_updates=14537, lr=9.98917e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=166841
2021-06-20 16:59:38 | INFO | train_inner | epoch 005: 2615 / 3002 loss=2.362, ppl=5.14, wps=5907.6, ups=0.09, wpb=64845, bsz=128, num_updates=14538, lr=9.98917e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=166852
2021-06-20 16:59:49 | INFO | train_inner | epoch 005: 2616 / 3002 loss=2.547, ppl=5.84, wps=5805.6, ups=0.09, wpb=64869, bsz=128, num_updates=14539, lr=9.98917e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=166864
2021-06-20 17:00:00 | INFO | train_inner | epoch 005: 2617 / 3002 loss=2.554, ppl=5.87, wps=5830.8, ups=0.09, wpb=64847, bsz=128, num_updates=14540, lr=9.98917e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=166875
2021-06-20 17:00:11 | INFO | train_inner | epoch 005: 2618 / 3002 loss=2.578, ppl=5.97, wps=5872, ups=0.09, wpb=64897, bsz=128, num_updates=14541, lr=9.98917e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=166886
2021-06-20 17:00:23 | INFO | train_inner | epoch 005: 2619 / 3002 loss=2.588, ppl=6.01, wps=5832.5, ups=0.09, wpb=64900, bsz=128, num_updates=14542, lr=9.98917e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=166897
2021-06-20 17:00:34 | INFO | train_inner | epoch 005: 2620 / 3002 loss=2.616, ppl=6.13, wps=5892.3, ups=0.09, wpb=64686, bsz=128, num_updates=14543, lr=9.98916e-05, gnorm=2.153, loss_scale=16, train_wall=11, gb_free=2.8, wall=166908
2021-06-20 17:00:44 | INFO | train_inner | epoch 005: 2621 / 3002 loss=2.476, ppl=5.56, wps=5957.9, ups=0.09, wpb=64889, bsz=128, num_updates=14544, lr=9.98916e-05, gnorm=1.918, loss_scale=16, train_wall=10, gb_free=2.8, wall=166919
2021-06-20 17:00:55 | INFO | train_inner | epoch 005: 2622 / 3002 loss=2.473, ppl=5.55, wps=5947.5, ups=0.09, wpb=64789, bsz=128, num_updates=14545, lr=9.98916e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=166930
2021-06-20 17:01:06 | INFO | train_inner | epoch 005: 2623 / 3002 loss=2.582, ppl=5.99, wps=5915.1, ups=0.09, wpb=64821, bsz=128, num_updates=14546, lr=9.98916e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=166941
2021-06-20 17:01:17 | INFO | train_inner | epoch 005: 2624 / 3002 loss=2.498, ppl=5.65, wps=5808.2, ups=0.09, wpb=64813, bsz=128, num_updates=14547, lr=9.98916e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=166952
2021-06-20 17:01:29 | INFO | train_inner | epoch 005: 2625 / 3002 loss=2.488, ppl=5.61, wps=5836.5, ups=0.09, wpb=64831, bsz=128, num_updates=14548, lr=9.98916e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=166963
2021-06-20 17:01:40 | INFO | train_inner | epoch 005: 2626 / 3002 loss=2.411, ppl=5.32, wps=5892.9, ups=0.09, wpb=64859, bsz=128, num_updates=14549, lr=9.98916e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=166974
2021-06-20 17:01:51 | INFO | train_inner | epoch 005: 2627 / 3002 loss=2.386, ppl=5.23, wps=5770.9, ups=0.09, wpb=64776, bsz=128, num_updates=14550, lr=9.98916e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=166985
2021-06-20 17:02:02 | INFO | train_inner | epoch 005: 2628 / 3002 loss=2.472, ppl=5.55, wps=5919.1, ups=0.09, wpb=64839, bsz=128, num_updates=14551, lr=9.98916e-05, gnorm=2.062, loss_scale=16, train_wall=11, gb_free=2.8, wall=166996
2021-06-20 17:02:13 | INFO | train_inner | epoch 005: 2629 / 3002 loss=2.58, ppl=5.98, wps=5837.6, ups=0.09, wpb=64781, bsz=128, num_updates=14552, lr=9.98916e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=167007
2021-06-20 17:02:24 | INFO | train_inner | epoch 005: 2630 / 3002 loss=2.398, ppl=5.27, wps=5855.1, ups=0.09, wpb=64832, bsz=128, num_updates=14553, lr=9.98916e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=167018
2021-06-20 17:02:35 | INFO | train_inner | epoch 005: 2631 / 3002 loss=2.491, ppl=5.62, wps=5844.1, ups=0.09, wpb=64900, bsz=128, num_updates=14554, lr=9.98916e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=167029
2021-06-20 17:02:46 | INFO | train_inner | epoch 005: 2632 / 3002 loss=2.539, ppl=5.81, wps=5907.8, ups=0.09, wpb=64818, bsz=128, num_updates=14555, lr=9.98916e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=167040
2021-06-20 17:02:57 | INFO | train_inner | epoch 005: 2633 / 3002 loss=2.578, ppl=5.97, wps=5753.4, ups=0.09, wpb=64771, bsz=128, num_updates=14556, lr=9.98915e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=167052
2021-06-20 17:03:08 | INFO | train_inner | epoch 005: 2634 / 3002 loss=2.442, ppl=5.43, wps=5852.2, ups=0.09, wpb=64848, bsz=128, num_updates=14557, lr=9.98915e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=167063
2021-06-20 17:03:19 | INFO | train_inner | epoch 005: 2635 / 3002 loss=2.643, ppl=6.25, wps=5849.7, ups=0.09, wpb=64762, bsz=128, num_updates=14558, lr=9.98915e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=167074
2021-06-20 17:03:30 | INFO | train_inner | epoch 005: 2636 / 3002 loss=2.558, ppl=5.89, wps=5923.9, ups=0.09, wpb=64770, bsz=128, num_updates=14559, lr=9.98915e-05, gnorm=1.949, loss_scale=16, train_wall=10, gb_free=2.8, wall=167085
2021-06-20 17:03:42 | INFO | train_inner | epoch 005: 2637 / 3002 loss=2.635, ppl=6.21, wps=5816.5, ups=0.09, wpb=64825, bsz=128, num_updates=14560, lr=9.98915e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=167096
2021-06-20 17:03:52 | INFO | train_inner | epoch 005: 2638 / 3002 loss=2.464, ppl=5.52, wps=5922.4, ups=0.09, wpb=64905, bsz=128, num_updates=14561, lr=9.98915e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=167107
2021-06-20 17:04:04 | INFO | train_inner | epoch 005: 2639 / 3002 loss=2.465, ppl=5.52, wps=5849.4, ups=0.09, wpb=64769, bsz=128, num_updates=14562, lr=9.98915e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=167118
2021-06-20 17:04:14 | INFO | train_inner | epoch 005: 2640 / 3002 loss=2.601, ppl=6.07, wps=5945.6, ups=0.09, wpb=64905, bsz=128, num_updates=14563, lr=9.98915e-05, gnorm=1.951, loss_scale=16, train_wall=10, gb_free=2.8, wall=167129
2021-06-20 17:04:25 | INFO | train_inner | epoch 005: 2641 / 3002 loss=2.659, ppl=6.31, wps=5900.3, ups=0.09, wpb=64818, bsz=128, num_updates=14564, lr=9.98915e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=167140
2021-06-20 17:04:37 | INFO | train_inner | epoch 005: 2642 / 3002 loss=2.548, ppl=5.85, wps=5782.1, ups=0.09, wpb=64872, bsz=128, num_updates=14565, lr=9.98915e-05, gnorm=2.009, loss_scale=16, train_wall=11, gb_free=2.8, wall=167151
2021-06-20 17:04:48 | INFO | train_inner | epoch 005: 2643 / 3002 loss=2.583, ppl=5.99, wps=5774.2, ups=0.09, wpb=64794, bsz=128, num_updates=14566, lr=9.98915e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=167162
2021-06-20 17:04:59 | INFO | train_inner | epoch 005: 2644 / 3002 loss=2.614, ppl=6.12, wps=5805.5, ups=0.09, wpb=64789, bsz=128, num_updates=14567, lr=9.98915e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=167173
2021-06-20 17:05:10 | INFO | train_inner | epoch 005: 2645 / 3002 loss=2.392, ppl=5.25, wps=5749.8, ups=0.09, wpb=64856, bsz=128, num_updates=14568, lr=9.98914e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=167185
2021-06-20 17:05:22 | INFO | train_inner | epoch 005: 2646 / 3002 loss=2.463, ppl=5.52, wps=5801.4, ups=0.09, wpb=64832, bsz=128, num_updates=14569, lr=9.98914e-05, gnorm=1.879, loss_scale=16, train_wall=11, gb_free=2.8, wall=167196
2021-06-20 17:05:33 | INFO | train_inner | epoch 005: 2647 / 3002 loss=2.48, ppl=5.58, wps=5852.1, ups=0.09, wpb=64915, bsz=128, num_updates=14570, lr=9.98914e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=167207
2021-06-20 17:05:44 | INFO | train_inner | epoch 005: 2648 / 3002 loss=2.403, ppl=5.29, wps=5841.2, ups=0.09, wpb=64760, bsz=128, num_updates=14571, lr=9.98914e-05, gnorm=1.871, loss_scale=16, train_wall=11, gb_free=2.8, wall=167218
2021-06-20 17:05:55 | INFO | train_inner | epoch 005: 2649 / 3002 loss=2.484, ppl=5.59, wps=5853.9, ups=0.09, wpb=64832, bsz=128, num_updates=14572, lr=9.98914e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=167229
2021-06-20 17:06:06 | INFO | train_inner | epoch 005: 2650 / 3002 loss=2.424, ppl=5.37, wps=5903.8, ups=0.09, wpb=64842, bsz=128, num_updates=14573, lr=9.98914e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=167240
2021-06-20 17:06:17 | INFO | train_inner | epoch 005: 2651 / 3002 loss=2.565, ppl=5.92, wps=5801.7, ups=0.09, wpb=64882, bsz=128, num_updates=14574, lr=9.98914e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=167251
2021-06-20 17:06:28 | INFO | train_inner | epoch 005: 2652 / 3002 loss=2.411, ppl=5.32, wps=5814.2, ups=0.09, wpb=64808, bsz=128, num_updates=14575, lr=9.98914e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=167262
2021-06-20 17:06:39 | INFO | train_inner | epoch 005: 2653 / 3002 loss=2.482, ppl=5.59, wps=5840, ups=0.09, wpb=64860, bsz=128, num_updates=14576, lr=9.98914e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=167274
2021-06-20 17:06:50 | INFO | train_inner | epoch 005: 2654 / 3002 loss=2.481, ppl=5.58, wps=5782.6, ups=0.09, wpb=64824, bsz=128, num_updates=14577, lr=9.98914e-05, gnorm=1.967, loss_scale=16, train_wall=11, gb_free=2.8, wall=167285
2021-06-20 17:07:02 | INFO | train_inner | epoch 005: 2655 / 3002 loss=2.493, ppl=5.63, wps=5767.3, ups=0.09, wpb=64805, bsz=128, num_updates=14578, lr=9.98914e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=167296
2021-06-20 17:07:13 | INFO | train_inner | epoch 005: 2656 / 3002 loss=2.541, ppl=5.82, wps=5823.1, ups=0.09, wpb=64864, bsz=128, num_updates=14579, lr=9.98914e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=167307
2021-06-20 17:07:24 | INFO | train_inner | epoch 005: 2657 / 3002 loss=2.702, ppl=6.51, wps=5951.8, ups=0.09, wpb=64885, bsz=128, num_updates=14580, lr=9.98914e-05, gnorm=1.941, loss_scale=16, train_wall=10, gb_free=2.8, wall=167318
2021-06-20 17:07:35 | INFO | train_inner | epoch 005: 2658 / 3002 loss=2.421, ppl=5.35, wps=5767.4, ups=0.09, wpb=64764, bsz=128, num_updates=14581, lr=9.98913e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=167329
2021-06-20 17:07:46 | INFO | train_inner | epoch 005: 2659 / 3002 loss=2.466, ppl=5.52, wps=5842.4, ups=0.09, wpb=64819, bsz=128, num_updates=14582, lr=9.98913e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=167340
2021-06-20 17:07:57 | INFO | train_inner | epoch 005: 2660 / 3002 loss=2.528, ppl=5.77, wps=5905.1, ups=0.09, wpb=64818, bsz=128, num_updates=14583, lr=9.98913e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=167351
2021-06-20 17:08:08 | INFO | train_inner | epoch 005: 2661 / 3002 loss=2.592, ppl=6.03, wps=6008, ups=0.09, wpb=64780, bsz=128, num_updates=14584, lr=9.98913e-05, gnorm=2, loss_scale=16, train_wall=10, gb_free=2.8, wall=167362
2021-06-20 17:08:19 | INFO | train_inner | epoch 005: 2662 / 3002 loss=2.514, ppl=5.71, wps=5866.9, ups=0.09, wpb=64810, bsz=128, num_updates=14585, lr=9.98913e-05, gnorm=1.943, loss_scale=16, train_wall=11, gb_free=2.8, wall=167373
2021-06-20 17:08:30 | INFO | train_inner | epoch 005: 2663 / 3002 loss=2.404, ppl=5.29, wps=5789.1, ups=0.09, wpb=64803, bsz=128, num_updates=14586, lr=9.98913e-05, gnorm=2.068, loss_scale=16, train_wall=11, gb_free=2.8, wall=167384
2021-06-20 17:08:41 | INFO | train_inner | epoch 005: 2664 / 3002 loss=2.567, ppl=5.92, wps=5776.4, ups=0.09, wpb=64844, bsz=128, num_updates=14587, lr=9.98913e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=167396
2021-06-20 17:08:52 | INFO | train_inner | epoch 005: 2665 / 3002 loss=2.508, ppl=5.69, wps=5953.2, ups=0.09, wpb=64851, bsz=128, num_updates=14588, lr=9.98913e-05, gnorm=1.967, loss_scale=16, train_wall=10, gb_free=2.8, wall=167406
2021-06-20 17:09:03 | INFO | train_inner | epoch 005: 2666 / 3002 loss=2.41, ppl=5.31, wps=5847.4, ups=0.09, wpb=64803, bsz=128, num_updates=14589, lr=9.98913e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=167418
2021-06-20 17:09:14 | INFO | train_inner | epoch 005: 2667 / 3002 loss=2.507, ppl=5.68, wps=5809.5, ups=0.09, wpb=64796, bsz=128, num_updates=14590, lr=9.98913e-05, gnorm=1.828, loss_scale=16, train_wall=11, gb_free=2.8, wall=167429
2021-06-20 17:09:26 | INFO | train_inner | epoch 005: 2668 / 3002 loss=2.535, ppl=5.8, wps=5798, ups=0.09, wpb=64838, bsz=128, num_updates=14591, lr=9.98913e-05, gnorm=2.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=167440
2021-06-20 17:09:37 | INFO | train_inner | epoch 005: 2669 / 3002 loss=2.457, ppl=5.49, wps=5806, ups=0.09, wpb=64893, bsz=128, num_updates=14592, lr=9.98913e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=167451
2021-06-20 17:09:48 | INFO | train_inner | epoch 005: 2670 / 3002 loss=2.643, ppl=6.25, wps=5856, ups=0.09, wpb=64779, bsz=128, num_updates=14593, lr=9.98912e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=167462
2021-06-20 17:09:59 | INFO | train_inner | epoch 005: 2671 / 3002 loss=2.576, ppl=5.96, wps=5816.7, ups=0.09, wpb=64835, bsz=128, num_updates=14594, lr=9.98912e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=167473
2021-06-20 17:10:10 | INFO | train_inner | epoch 005: 2672 / 3002 loss=2.512, ppl=5.71, wps=5819.5, ups=0.09, wpb=64848, bsz=128, num_updates=14595, lr=9.98912e-05, gnorm=2.068, loss_scale=16, train_wall=11, gb_free=2.8, wall=167484
2021-06-20 17:10:21 | INFO | train_inner | epoch 005: 2673 / 3002 loss=2.496, ppl=5.64, wps=5908.2, ups=0.09, wpb=64789, bsz=128, num_updates=14596, lr=9.98912e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=167495
2021-06-20 17:10:32 | INFO | train_inner | epoch 005: 2674 / 3002 loss=2.493, ppl=5.63, wps=5880.8, ups=0.09, wpb=64894, bsz=128, num_updates=14597, lr=9.98912e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=167506
2021-06-20 17:10:43 | INFO | train_inner | epoch 005: 2675 / 3002 loss=2.45, ppl=5.46, wps=5830.6, ups=0.09, wpb=64894, bsz=128, num_updates=14598, lr=9.98912e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=167518
2021-06-20 17:10:54 | INFO | train_inner | epoch 005: 2676 / 3002 loss=2.535, ppl=5.8, wps=5853.7, ups=0.09, wpb=64794, bsz=128, num_updates=14599, lr=9.98912e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=167529
2021-06-20 17:11:05 | INFO | train_inner | epoch 005: 2677 / 3002 loss=2.462, ppl=5.51, wps=5923.9, ups=0.09, wpb=64811, bsz=128, num_updates=14600, lr=9.98912e-05, gnorm=1.942, loss_scale=16, train_wall=10, gb_free=2.8, wall=167540
2021-06-20 17:11:16 | INFO | train_inner | epoch 005: 2678 / 3002 loss=2.605, ppl=6.09, wps=5821, ups=0.09, wpb=64760, bsz=128, num_updates=14601, lr=9.98912e-05, gnorm=2.009, loss_scale=16, train_wall=11, gb_free=2.8, wall=167551
2021-06-20 17:11:27 | INFO | train_inner | epoch 005: 2679 / 3002 loss=2.474, ppl=5.55, wps=5816.9, ups=0.09, wpb=64851, bsz=128, num_updates=14602, lr=9.98912e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=167562
2021-06-20 17:11:39 | INFO | train_inner | epoch 005: 2680 / 3002 loss=2.628, ppl=6.18, wps=5769.3, ups=0.09, wpb=64720, bsz=128, num_updates=14603, lr=9.98912e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=167573
2021-06-20 17:11:50 | INFO | train_inner | epoch 005: 2681 / 3002 loss=2.473, ppl=5.55, wps=5839.7, ups=0.09, wpb=64787, bsz=128, num_updates=14604, lr=9.98912e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=167584
2021-06-20 17:12:01 | INFO | train_inner | epoch 005: 2682 / 3002 loss=2.577, ppl=5.97, wps=5848.9, ups=0.09, wpb=64835, bsz=128, num_updates=14605, lr=9.98912e-05, gnorm=2.329, loss_scale=16, train_wall=11, gb_free=2.8, wall=167595
2021-06-20 17:12:12 | INFO | train_inner | epoch 005: 2683 / 3002 loss=2.479, ppl=5.58, wps=5895.1, ups=0.09, wpb=64718, bsz=128, num_updates=14606, lr=9.98911e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=167606
2021-06-20 17:12:23 | INFO | train_inner | epoch 005: 2684 / 3002 loss=2.54, ppl=5.82, wps=5712.1, ups=0.09, wpb=64784, bsz=128, num_updates=14607, lr=9.98911e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=167618
2021-06-20 17:12:34 | INFO | train_inner | epoch 005: 2685 / 3002 loss=2.405, ppl=5.3, wps=5779, ups=0.09, wpb=64825, bsz=128, num_updates=14608, lr=9.98911e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=167629
2021-06-20 17:12:46 | INFO | train_inner | epoch 005: 2686 / 3002 loss=2.566, ppl=5.92, wps=5712.3, ups=0.09, wpb=64753, bsz=128, num_updates=14609, lr=9.98911e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=167640
2021-06-20 17:12:57 | INFO | train_inner | epoch 005: 2687 / 3002 loss=2.357, ppl=5.12, wps=5807.7, ups=0.09, wpb=64866, bsz=128, num_updates=14610, lr=9.98911e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=167651
2021-06-20 17:13:08 | INFO | train_inner | epoch 005: 2688 / 3002 loss=2.456, ppl=5.49, wps=5788.5, ups=0.09, wpb=64887, bsz=128, num_updates=14611, lr=9.98911e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=167662
2021-06-20 17:13:19 | INFO | train_inner | epoch 005: 2689 / 3002 loss=2.439, ppl=5.42, wps=5931.6, ups=0.09, wpb=64853, bsz=128, num_updates=14612, lr=9.98911e-05, gnorm=2.02, loss_scale=16, train_wall=10, gb_free=2.8, wall=167673
2021-06-20 17:13:30 | INFO | train_inner | epoch 005: 2690 / 3002 loss=2.519, ppl=5.73, wps=6002.7, ups=0.09, wpb=64892, bsz=128, num_updates=14613, lr=9.98911e-05, gnorm=1.97, loss_scale=16, train_wall=10, gb_free=2.8, wall=167684
2021-06-20 17:13:41 | INFO | train_inner | epoch 005: 2691 / 3002 loss=2.605, ppl=6.08, wps=5790.2, ups=0.09, wpb=64711, bsz=128, num_updates=14614, lr=9.98911e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=167695
2021-06-20 17:13:52 | INFO | train_inner | epoch 005: 2692 / 3002 loss=2.55, ppl=5.86, wps=5938.2, ups=0.09, wpb=64812, bsz=128, num_updates=14615, lr=9.98911e-05, gnorm=1.952, loss_scale=16, train_wall=10, gb_free=2.8, wall=167706
2021-06-20 17:14:03 | INFO | train_inner | epoch 005: 2693 / 3002 loss=2.558, ppl=5.89, wps=5831.2, ups=0.09, wpb=64826, bsz=128, num_updates=14616, lr=9.98911e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=167717
2021-06-20 17:14:14 | INFO | train_inner | epoch 005: 2694 / 3002 loss=2.431, ppl=5.39, wps=5845.2, ups=0.09, wpb=64878, bsz=128, num_updates=14617, lr=9.98911e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=167729
2021-06-20 17:14:25 | INFO | train_inner | epoch 005: 2695 / 3002 loss=2.498, ppl=5.65, wps=5903.6, ups=0.09, wpb=64812, bsz=128, num_updates=14618, lr=9.9891e-05, gnorm=1.881, loss_scale=16, train_wall=11, gb_free=2.8, wall=167739
2021-06-20 17:14:36 | INFO | train_inner | epoch 005: 2696 / 3002 loss=2.515, ppl=5.72, wps=5877.7, ups=0.09, wpb=64823, bsz=128, num_updates=14619, lr=9.9891e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=167751
2021-06-20 17:14:47 | INFO | train_inner | epoch 005: 2697 / 3002 loss=2.505, ppl=5.68, wps=5828.6, ups=0.09, wpb=64810, bsz=128, num_updates=14620, lr=9.9891e-05, gnorm=2.008, loss_scale=16, train_wall=11, gb_free=2.8, wall=167762
2021-06-20 17:14:58 | INFO | train_inner | epoch 005: 2698 / 3002 loss=2.431, ppl=5.39, wps=5798.8, ups=0.09, wpb=64796, bsz=128, num_updates=14621, lr=9.9891e-05, gnorm=2.322, loss_scale=16, train_wall=11, gb_free=2.8, wall=167773
2021-06-20 17:15:10 | INFO | train_inner | epoch 005: 2699 / 3002 loss=2.626, ppl=6.17, wps=5855.7, ups=0.09, wpb=64797, bsz=128, num_updates=14622, lr=9.9891e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=167784
2021-06-20 17:15:21 | INFO | train_inner | epoch 005: 2700 / 3002 loss=2.398, ppl=5.27, wps=5873, ups=0.09, wpb=64824, bsz=128, num_updates=14623, lr=9.9891e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=167795
2021-06-20 17:15:32 | INFO | train_inner | epoch 005: 2701 / 3002 loss=2.685, ppl=6.43, wps=5845.7, ups=0.09, wpb=64692, bsz=128, num_updates=14624, lr=9.9891e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=167806
2021-06-20 17:15:43 | INFO | train_inner | epoch 005: 2702 / 3002 loss=2.722, ppl=6.6, wps=5791.9, ups=0.09, wpb=64787, bsz=128, num_updates=14625, lr=9.9891e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=167817
2021-06-20 17:15:54 | INFO | train_inner | epoch 005: 2703 / 3002 loss=2.57, ppl=5.94, wps=5804.8, ups=0.09, wpb=64889, bsz=128, num_updates=14626, lr=9.9891e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=167828
2021-06-20 17:16:05 | INFO | train_inner | epoch 005: 2704 / 3002 loss=2.544, ppl=5.83, wps=5960.2, ups=0.09, wpb=64774, bsz=128, num_updates=14627, lr=9.9891e-05, gnorm=1.992, loss_scale=16, train_wall=10, gb_free=2.8, wall=167839
2021-06-20 17:16:16 | INFO | train_inner | epoch 005: 2705 / 3002 loss=2.588, ppl=6.01, wps=5854.6, ups=0.09, wpb=64771, bsz=128, num_updates=14628, lr=9.9891e-05, gnorm=2.06, loss_scale=16, train_wall=11, gb_free=2.8, wall=167850
2021-06-20 17:16:27 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 17:16:38 | INFO | train_inner | epoch 005: 2707 / 3002 loss=2.403, ppl=5.29, wps=2914.6, ups=0.04, wpb=64823, bsz=128, num_updates=14629, lr=9.9891e-05, gnorm=1.884, loss_scale=8, train_wall=21, gb_free=2.8, wall=167873
2021-06-20 17:16:49 | INFO | train_inner | epoch 005: 2708 / 3002 loss=2.683, ppl=6.42, wps=5864.3, ups=0.09, wpb=64824, bsz=128, num_updates=14630, lr=9.9891e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=167884
2021-06-20 17:17:00 | INFO | train_inner | epoch 005: 2709 / 3002 loss=2.322, ppl=5, wps=5964.7, ups=0.09, wpb=64781, bsz=128, num_updates=14631, lr=9.98909e-05, gnorm=1.987, loss_scale=8, train_wall=10, gb_free=2.8, wall=167894
2021-06-20 17:17:11 | INFO | train_inner | epoch 005: 2710 / 3002 loss=2.39, ppl=5.24, wps=5910.7, ups=0.09, wpb=64849, bsz=128, num_updates=14632, lr=9.98909e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=167905
2021-06-20 17:17:22 | INFO | train_inner | epoch 005: 2711 / 3002 loss=2.542, ppl=5.82, wps=5859.4, ups=0.09, wpb=64839, bsz=128, num_updates=14633, lr=9.98909e-05, gnorm=2.047, loss_scale=8, train_wall=11, gb_free=2.8, wall=167916
2021-06-20 17:17:33 | INFO | train_inner | epoch 005: 2712 / 3002 loss=2.492, ppl=5.62, wps=5836.3, ups=0.09, wpb=64808, bsz=128, num_updates=14634, lr=9.98909e-05, gnorm=2.036, loss_scale=8, train_wall=11, gb_free=2.8, wall=167928
2021-06-20 17:17:44 | INFO | train_inner | epoch 005: 2713 / 3002 loss=2.434, ppl=5.4, wps=5893.6, ups=0.09, wpb=64788, bsz=128, num_updates=14635, lr=9.98909e-05, gnorm=1.86, loss_scale=8, train_wall=11, gb_free=2.8, wall=167939
2021-06-20 17:17:55 | INFO | train_inner | epoch 005: 2714 / 3002 loss=2.42, ppl=5.35, wps=5817.4, ups=0.09, wpb=64735, bsz=128, num_updates=14636, lr=9.98909e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=167950
2021-06-20 17:18:06 | INFO | train_inner | epoch 005: 2715 / 3002 loss=2.467, ppl=5.53, wps=5888.9, ups=0.09, wpb=64817, bsz=128, num_updates=14637, lr=9.98909e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=167961
2021-06-20 17:18:18 | INFO | train_inner | epoch 005: 2716 / 3002 loss=2.585, ppl=6, wps=5811.5, ups=0.09, wpb=64860, bsz=128, num_updates=14638, lr=9.98909e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=167972
2021-06-20 17:18:29 | INFO | train_inner | epoch 005: 2717 / 3002 loss=2.423, ppl=5.36, wps=5829, ups=0.09, wpb=64898, bsz=128, num_updates=14639, lr=9.98909e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=167983
2021-06-20 17:18:40 | INFO | train_inner | epoch 005: 2718 / 3002 loss=2.482, ppl=5.59, wps=5776.1, ups=0.09, wpb=64834, bsz=128, num_updates=14640, lr=9.98909e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=167994
2021-06-20 17:18:51 | INFO | train_inner | epoch 005: 2719 / 3002 loss=2.453, ppl=5.48, wps=5755.5, ups=0.09, wpb=64867, bsz=128, num_updates=14641, lr=9.98909e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=168006
2021-06-20 17:19:02 | INFO | train_inner | epoch 005: 2720 / 3002 loss=2.464, ppl=5.52, wps=5875.5, ups=0.09, wpb=64781, bsz=128, num_updates=14642, lr=9.98909e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=168017
2021-06-20 17:19:13 | INFO | train_inner | epoch 005: 2721 / 3002 loss=2.565, ppl=5.92, wps=5873.6, ups=0.09, wpb=64817, bsz=128, num_updates=14643, lr=9.98908e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=168028
2021-06-20 17:19:24 | INFO | train_inner | epoch 005: 2722 / 3002 loss=2.37, ppl=5.17, wps=5832.8, ups=0.09, wpb=64885, bsz=128, num_updates=14644, lr=9.98908e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=168039
2021-06-20 17:19:36 | INFO | train_inner | epoch 005: 2723 / 3002 loss=2.461, ppl=5.51, wps=5748.1, ups=0.09, wpb=64830, bsz=128, num_updates=14645, lr=9.98908e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=168050
2021-06-20 17:19:47 | INFO | train_inner | epoch 005: 2724 / 3002 loss=2.477, ppl=5.57, wps=5873.9, ups=0.09, wpb=64851, bsz=128, num_updates=14646, lr=9.98908e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=168061
2021-06-20 17:19:58 | INFO | train_inner | epoch 005: 2725 / 3002 loss=2.567, ppl=5.93, wps=5895, ups=0.09, wpb=64832, bsz=128, num_updates=14647, lr=9.98908e-05, gnorm=1.863, loss_scale=8, train_wall=11, gb_free=2.8, wall=168072
2021-06-20 17:20:09 | INFO | train_inner | epoch 005: 2726 / 3002 loss=2.489, ppl=5.61, wps=5899.3, ups=0.09, wpb=64765, bsz=128, num_updates=14648, lr=9.98908e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=168083
2021-06-20 17:20:20 | INFO | train_inner | epoch 005: 2727 / 3002 loss=2.503, ppl=5.67, wps=5831.9, ups=0.09, wpb=64776, bsz=128, num_updates=14649, lr=9.98908e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=168094
2021-06-20 17:20:31 | INFO | train_inner | epoch 005: 2728 / 3002 loss=2.391, ppl=5.24, wps=5789.8, ups=0.09, wpb=64804, bsz=128, num_updates=14650, lr=9.98908e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=168105
2021-06-20 17:20:42 | INFO | train_inner | epoch 005: 2729 / 3002 loss=2.507, ppl=5.68, wps=5781, ups=0.09, wpb=64752, bsz=128, num_updates=14651, lr=9.98908e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=168116
2021-06-20 17:20:53 | INFO | train_inner | epoch 005: 2730 / 3002 loss=2.518, ppl=5.73, wps=5859.7, ups=0.09, wpb=64843, bsz=128, num_updates=14652, lr=9.98908e-05, gnorm=2.093, loss_scale=8, train_wall=11, gb_free=2.8, wall=168128
2021-06-20 17:21:04 | INFO | train_inner | epoch 005: 2731 / 3002 loss=2.496, ppl=5.64, wps=5914.6, ups=0.09, wpb=64799, bsz=128, num_updates=14653, lr=9.98908e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=168139
2021-06-20 17:21:15 | INFO | train_inner | epoch 005: 2732 / 3002 loss=2.513, ppl=5.71, wps=5816.8, ups=0.09, wpb=64889, bsz=128, num_updates=14654, lr=9.98908e-05, gnorm=2.112, loss_scale=8, train_wall=11, gb_free=2.8, wall=168150
2021-06-20 17:21:27 | INFO | train_inner | epoch 005: 2733 / 3002 loss=2.388, ppl=5.23, wps=5775.1, ups=0.09, wpb=64905, bsz=128, num_updates=14655, lr=9.98908e-05, gnorm=2.101, loss_scale=8, train_wall=11, gb_free=2.8, wall=168161
2021-06-20 17:21:38 | INFO | train_inner | epoch 005: 2734 / 3002 loss=2.417, ppl=5.34, wps=5905, ups=0.09, wpb=64925, bsz=128, num_updates=14656, lr=9.98907e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=168172
2021-06-20 17:21:49 | INFO | train_inner | epoch 005: 2735 / 3002 loss=2.601, ppl=6.07, wps=5860.6, ups=0.09, wpb=64769, bsz=128, num_updates=14657, lr=9.98907e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=168183
2021-06-20 17:22:00 | INFO | train_inner | epoch 005: 2736 / 3002 loss=2.448, ppl=5.45, wps=5906.7, ups=0.09, wpb=64881, bsz=128, num_updates=14658, lr=9.98907e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=168194
2021-06-20 17:22:11 | INFO | train_inner | epoch 005: 2737 / 3002 loss=2.461, ppl=5.51, wps=5748.5, ups=0.09, wpb=64804, bsz=128, num_updates=14659, lr=9.98907e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=168205
2021-06-20 17:22:22 | INFO | train_inner | epoch 005: 2738 / 3002 loss=2.494, ppl=5.63, wps=5898.2, ups=0.09, wpb=64885, bsz=128, num_updates=14660, lr=9.98907e-05, gnorm=1.87, loss_scale=8, train_wall=11, gb_free=2.8, wall=168216
2021-06-20 17:22:33 | INFO | train_inner | epoch 005: 2739 / 3002 loss=2.525, ppl=5.76, wps=5837, ups=0.09, wpb=64751, bsz=128, num_updates=14661, lr=9.98907e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=168227
2021-06-20 17:22:44 | INFO | train_inner | epoch 005: 2740 / 3002 loss=2.542, ppl=5.82, wps=5828.4, ups=0.09, wpb=64799, bsz=128, num_updates=14662, lr=9.98907e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=168238
2021-06-20 17:22:55 | INFO | train_inner | epoch 005: 2741 / 3002 loss=2.433, ppl=5.4, wps=6012.9, ups=0.09, wpb=64903, bsz=128, num_updates=14663, lr=9.98907e-05, gnorm=1.885, loss_scale=8, train_wall=10, gb_free=2.8, wall=168249
2021-06-20 17:23:06 | INFO | train_inner | epoch 005: 2742 / 3002 loss=2.414, ppl=5.33, wps=5779.9, ups=0.09, wpb=64907, bsz=128, num_updates=14664, lr=9.98907e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=168260
2021-06-20 17:23:17 | INFO | train_inner | epoch 005: 2743 / 3002 loss=2.438, ppl=5.42, wps=5826.3, ups=0.09, wpb=64826, bsz=128, num_updates=14665, lr=9.98907e-05, gnorm=2.124, loss_scale=8, train_wall=11, gb_free=2.8, wall=168272
2021-06-20 17:23:28 | INFO | train_inner | epoch 005: 2744 / 3002 loss=2.545, ppl=5.84, wps=5862.9, ups=0.09, wpb=64825, bsz=128, num_updates=14666, lr=9.98907e-05, gnorm=2.046, loss_scale=8, train_wall=11, gb_free=2.8, wall=168283
2021-06-20 17:23:40 | INFO | train_inner | epoch 005: 2745 / 3002 loss=2.513, ppl=5.71, wps=5777.6, ups=0.09, wpb=64787, bsz=128, num_updates=14667, lr=9.98907e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=168294
2021-06-20 17:23:51 | INFO | train_inner | epoch 005: 2746 / 3002 loss=2.506, ppl=5.68, wps=5797, ups=0.09, wpb=64861, bsz=128, num_updates=14668, lr=9.98906e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=168305
2021-06-20 17:24:02 | INFO | train_inner | epoch 005: 2747 / 3002 loss=2.301, ppl=4.93, wps=5834, ups=0.09, wpb=64919, bsz=128, num_updates=14669, lr=9.98906e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=168316
2021-06-20 17:24:13 | INFO | train_inner | epoch 005: 2748 / 3002 loss=2.476, ppl=5.56, wps=5769.9, ups=0.09, wpb=64776, bsz=128, num_updates=14670, lr=9.98906e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=168327
2021-06-20 17:24:24 | INFO | train_inner | epoch 005: 2749 / 3002 loss=2.485, ppl=5.6, wps=5784.9, ups=0.09, wpb=64896, bsz=128, num_updates=14671, lr=9.98906e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=168339
2021-06-20 17:24:35 | INFO | train_inner | epoch 005: 2750 / 3002 loss=2.496, ppl=5.64, wps=5910.6, ups=0.09, wpb=64863, bsz=128, num_updates=14672, lr=9.98906e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=168350
2021-06-20 17:24:46 | INFO | train_inner | epoch 005: 2751 / 3002 loss=2.516, ppl=5.72, wps=5815.2, ups=0.09, wpb=64728, bsz=128, num_updates=14673, lr=9.98906e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=168361
2021-06-20 17:24:57 | INFO | train_inner | epoch 005: 2752 / 3002 loss=2.288, ppl=4.88, wps=5864.5, ups=0.09, wpb=64877, bsz=128, num_updates=14674, lr=9.98906e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=168372
2021-06-20 17:25:09 | INFO | train_inner | epoch 005: 2753 / 3002 loss=2.485, ppl=5.6, wps=5848.8, ups=0.09, wpb=64873, bsz=128, num_updates=14675, lr=9.98906e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=168383
2021-06-20 17:25:20 | INFO | train_inner | epoch 005: 2754 / 3002 loss=2.364, ppl=5.15, wps=5834.5, ups=0.09, wpb=64848, bsz=128, num_updates=14676, lr=9.98906e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=168394
2021-06-20 17:25:31 | INFO | train_inner | epoch 005: 2755 / 3002 loss=2.471, ppl=5.54, wps=5801.8, ups=0.09, wpb=64809, bsz=128, num_updates=14677, lr=9.98906e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=168405
2021-06-20 17:25:42 | INFO | train_inner | epoch 005: 2756 / 3002 loss=2.485, ppl=5.6, wps=5778.5, ups=0.09, wpb=64767, bsz=128, num_updates=14678, lr=9.98906e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=168416
2021-06-20 17:25:53 | INFO | train_inner | epoch 005: 2757 / 3002 loss=2.539, ppl=5.81, wps=5835.3, ups=0.09, wpb=64821, bsz=128, num_updates=14679, lr=9.98906e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=168427
2021-06-20 17:26:04 | INFO | train_inner | epoch 005: 2758 / 3002 loss=2.538, ppl=5.81, wps=5830.9, ups=0.09, wpb=64817, bsz=128, num_updates=14680, lr=9.98906e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=168439
2021-06-20 17:26:15 | INFO | train_inner | epoch 005: 2759 / 3002 loss=2.56, ppl=5.9, wps=5919.8, ups=0.09, wpb=64848, bsz=128, num_updates=14681, lr=9.98905e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=168450
2021-06-20 17:26:26 | INFO | train_inner | epoch 005: 2760 / 3002 loss=2.688, ppl=6.44, wps=5836.7, ups=0.09, wpb=64804, bsz=128, num_updates=14682, lr=9.98905e-05, gnorm=2.034, loss_scale=8, train_wall=11, gb_free=2.8, wall=168461
2021-06-20 17:26:37 | INFO | train_inner | epoch 005: 2761 / 3002 loss=2.467, ppl=5.53, wps=5794.2, ups=0.09, wpb=64796, bsz=128, num_updates=14683, lr=9.98905e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=168472
2021-06-20 17:26:48 | INFO | train_inner | epoch 005: 2762 / 3002 loss=2.323, ppl=5, wps=5900, ups=0.09, wpb=64839, bsz=128, num_updates=14684, lr=9.98905e-05, gnorm=1.883, loss_scale=8, train_wall=11, gb_free=2.8, wall=168483
2021-06-20 17:26:59 | INFO | train_inner | epoch 005: 2763 / 3002 loss=2.57, ppl=5.94, wps=6002.8, ups=0.09, wpb=64883, bsz=128, num_updates=14685, lr=9.98905e-05, gnorm=1.933, loss_scale=8, train_wall=10, gb_free=2.8, wall=168494
2021-06-20 17:27:11 | INFO | train_inner | epoch 005: 2764 / 3002 loss=2.451, ppl=5.47, wps=5773.3, ups=0.09, wpb=64792, bsz=128, num_updates=14686, lr=9.98905e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=168505
2021-06-20 17:27:21 | INFO | train_inner | epoch 005: 2765 / 3002 loss=2.302, ppl=4.93, wps=5963.3, ups=0.09, wpb=64845, bsz=128, num_updates=14687, lr=9.98905e-05, gnorm=1.912, loss_scale=8, train_wall=10, gb_free=2.8, wall=168516
2021-06-20 17:27:33 | INFO | train_inner | epoch 005: 2766 / 3002 loss=2.385, ppl=5.22, wps=5775.6, ups=0.09, wpb=64825, bsz=128, num_updates=14688, lr=9.98905e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=168527
2021-06-20 17:27:44 | INFO | train_inner | epoch 005: 2767 / 3002 loss=2.351, ppl=5.1, wps=5839.7, ups=0.09, wpb=64792, bsz=128, num_updates=14689, lr=9.98905e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=168538
2021-06-20 17:27:55 | INFO | train_inner | epoch 005: 2768 / 3002 loss=2.521, ppl=5.74, wps=5849.5, ups=0.09, wpb=64851, bsz=128, num_updates=14690, lr=9.98905e-05, gnorm=3.18, loss_scale=8, train_wall=11, gb_free=2.8, wall=168549
2021-06-20 17:28:06 | INFO | train_inner | epoch 005: 2769 / 3002 loss=2.47, ppl=5.54, wps=5914.3, ups=0.09, wpb=64881, bsz=128, num_updates=14691, lr=9.98905e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=168560
2021-06-20 17:28:17 | INFO | train_inner | epoch 005: 2770 / 3002 loss=2.586, ppl=6.01, wps=5841.9, ups=0.09, wpb=64802, bsz=128, num_updates=14692, lr=9.98905e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=168571
2021-06-20 17:28:28 | INFO | train_inner | epoch 005: 2771 / 3002 loss=2.55, ppl=5.86, wps=5853.1, ups=0.09, wpb=64885, bsz=128, num_updates=14693, lr=9.98904e-05, gnorm=1.98, loss_scale=8, train_wall=11, gb_free=2.8, wall=168582
2021-06-20 17:28:39 | INFO | train_inner | epoch 005: 2772 / 3002 loss=2.515, ppl=5.72, wps=5889.6, ups=0.09, wpb=64855, bsz=128, num_updates=14694, lr=9.98904e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=168593
2021-06-20 17:28:50 | INFO | train_inner | epoch 005: 2773 / 3002 loss=2.363, ppl=5.15, wps=5830.1, ups=0.09, wpb=64848, bsz=128, num_updates=14695, lr=9.98904e-05, gnorm=1.834, loss_scale=8, train_wall=11, gb_free=2.8, wall=168604
2021-06-20 17:29:01 | INFO | train_inner | epoch 005: 2774 / 3002 loss=2.548, ppl=5.85, wps=5768.9, ups=0.09, wpb=64804, bsz=128, num_updates=14696, lr=9.98904e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=168616
2021-06-20 17:29:12 | INFO | train_inner | epoch 005: 2775 / 3002 loss=2.463, ppl=5.51, wps=5937, ups=0.09, wpb=64815, bsz=128, num_updates=14697, lr=9.98904e-05, gnorm=1.893, loss_scale=8, train_wall=10, gb_free=2.8, wall=168627
2021-06-20 17:29:24 | INFO | train_inner | epoch 005: 2776 / 3002 loss=2.538, ppl=5.81, wps=5726.3, ups=0.09, wpb=64838, bsz=128, num_updates=14698, lr=9.98904e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=168638
2021-06-20 17:29:35 | INFO | train_inner | epoch 005: 2777 / 3002 loss=2.473, ppl=5.55, wps=5850.7, ups=0.09, wpb=64790, bsz=128, num_updates=14699, lr=9.98904e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=168649
2021-06-20 17:29:46 | INFO | train_inner | epoch 005: 2778 / 3002 loss=2.473, ppl=5.55, wps=5891.7, ups=0.09, wpb=64871, bsz=128, num_updates=14700, lr=9.98904e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=168660
2021-06-20 17:29:57 | INFO | train_inner | epoch 005: 2779 / 3002 loss=2.475, ppl=5.56, wps=5901.9, ups=0.09, wpb=64892, bsz=128, num_updates=14701, lr=9.98904e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=168671
2021-06-20 17:30:08 | INFO | train_inner | epoch 005: 2780 / 3002 loss=2.392, ppl=5.25, wps=5734, ups=0.09, wpb=64848, bsz=128, num_updates=14702, lr=9.98904e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=168682
2021-06-20 17:30:19 | INFO | train_inner | epoch 005: 2781 / 3002 loss=2.378, ppl=5.2, wps=5782.3, ups=0.09, wpb=64839, bsz=128, num_updates=14703, lr=9.98904e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=168693
2021-06-20 17:30:30 | INFO | train_inner | epoch 005: 2782 / 3002 loss=2.548, ppl=5.85, wps=5736.4, ups=0.09, wpb=64732, bsz=128, num_updates=14704, lr=9.98904e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=168705
2021-06-20 17:30:42 | INFO | train_inner | epoch 005: 2783 / 3002 loss=2.523, ppl=5.75, wps=5789.1, ups=0.09, wpb=64858, bsz=128, num_updates=14705, lr=9.98904e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=168716
2021-06-20 17:30:53 | INFO | train_inner | epoch 005: 2784 / 3002 loss=2.472, ppl=5.55, wps=5947, ups=0.09, wpb=64858, bsz=128, num_updates=14706, lr=9.98903e-05, gnorm=1.985, loss_scale=8, train_wall=10, gb_free=2.8, wall=168727
2021-06-20 17:31:04 | INFO | train_inner | epoch 005: 2785 / 3002 loss=2.431, ppl=5.39, wps=5718.1, ups=0.09, wpb=64834, bsz=128, num_updates=14707, lr=9.98903e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=168738
2021-06-20 17:31:15 | INFO | train_inner | epoch 005: 2786 / 3002 loss=2.432, ppl=5.4, wps=5857.5, ups=0.09, wpb=64776, bsz=128, num_updates=14708, lr=9.98903e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=168749
2021-06-20 17:31:26 | INFO | train_inner | epoch 005: 2787 / 3002 loss=2.555, ppl=5.88, wps=5814.1, ups=0.09, wpb=64807, bsz=128, num_updates=14709, lr=9.98903e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=168760
2021-06-20 17:31:37 | INFO | train_inner | epoch 005: 2788 / 3002 loss=2.416, ppl=5.34, wps=6052, ups=0.09, wpb=64895, bsz=128, num_updates=14710, lr=9.98903e-05, gnorm=1.984, loss_scale=8, train_wall=10, gb_free=2.8, wall=168771
2021-06-20 17:31:48 | INFO | train_inner | epoch 005: 2789 / 3002 loss=2.464, ppl=5.52, wps=5848.9, ups=0.09, wpb=64827, bsz=128, num_updates=14711, lr=9.98903e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=168782
2021-06-20 17:31:59 | INFO | train_inner | epoch 005: 2790 / 3002 loss=2.567, ppl=5.93, wps=5831.3, ups=0.09, wpb=64789, bsz=128, num_updates=14712, lr=9.98903e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=168793
2021-06-20 17:32:10 | INFO | train_inner | epoch 005: 2791 / 3002 loss=2.434, ppl=5.4, wps=5973.4, ups=0.09, wpb=64769, bsz=128, num_updates=14713, lr=9.98903e-05, gnorm=1.956, loss_scale=8, train_wall=10, gb_free=2.8, wall=168804
2021-06-20 17:32:21 | INFO | train_inner | epoch 005: 2792 / 3002 loss=2.485, ppl=5.6, wps=5957.5, ups=0.09, wpb=64886, bsz=128, num_updates=14714, lr=9.98903e-05, gnorm=1.932, loss_scale=8, train_wall=10, gb_free=2.8, wall=168815
2021-06-20 17:32:32 | INFO | train_inner | epoch 005: 2793 / 3002 loss=2.442, ppl=5.44, wps=5753.3, ups=0.09, wpb=64847, bsz=128, num_updates=14715, lr=9.98903e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=168826
2021-06-20 17:32:43 | INFO | train_inner | epoch 005: 2794 / 3002 loss=2.492, ppl=5.62, wps=5849.6, ups=0.09, wpb=64857, bsz=128, num_updates=14716, lr=9.98903e-05, gnorm=2.023, loss_scale=8, train_wall=11, gb_free=2.8, wall=168837
2021-06-20 17:32:54 | INFO | train_inner | epoch 005: 2795 / 3002 loss=2.433, ppl=5.4, wps=5935.6, ups=0.09, wpb=64907, bsz=128, num_updates=14717, lr=9.98903e-05, gnorm=2.022, loss_scale=8, train_wall=10, gb_free=2.8, wall=168848
2021-06-20 17:33:05 | INFO | train_inner | epoch 005: 2796 / 3002 loss=2.559, ppl=5.89, wps=5846.5, ups=0.09, wpb=64734, bsz=128, num_updates=14718, lr=9.98902e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=168859
2021-06-20 17:33:16 | INFO | train_inner | epoch 005: 2797 / 3002 loss=2.542, ppl=5.82, wps=5763.6, ups=0.09, wpb=64806, bsz=128, num_updates=14719, lr=9.98902e-05, gnorm=2.116, loss_scale=8, train_wall=11, gb_free=2.8, wall=168871
2021-06-20 17:33:27 | INFO | train_inner | epoch 005: 2798 / 3002 loss=2.478, ppl=5.57, wps=5856.6, ups=0.09, wpb=64894, bsz=128, num_updates=14720, lr=9.98902e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=168882
2021-06-20 17:33:39 | INFO | train_inner | epoch 005: 2799 / 3002 loss=2.518, ppl=5.73, wps=5818.4, ups=0.09, wpb=64842, bsz=128, num_updates=14721, lr=9.98902e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=168893
2021-06-20 17:33:50 | INFO | train_inner | epoch 005: 2800 / 3002 loss=2.507, ppl=5.68, wps=5906.6, ups=0.09, wpb=64870, bsz=128, num_updates=14722, lr=9.98902e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=168904
2021-06-20 17:34:01 | INFO | train_inner | epoch 005: 2801 / 3002 loss=2.388, ppl=5.23, wps=5919.7, ups=0.09, wpb=64877, bsz=128, num_updates=14723, lr=9.98902e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=168915
2021-06-20 17:34:12 | INFO | train_inner | epoch 005: 2802 / 3002 loss=2.551, ppl=5.86, wps=5728.9, ups=0.09, wpb=64911, bsz=128, num_updates=14724, lr=9.98902e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=168926
2021-06-20 17:34:23 | INFO | train_inner | epoch 005: 2803 / 3002 loss=2.497, ppl=5.64, wps=5822.4, ups=0.09, wpb=64697, bsz=128, num_updates=14725, lr=9.98902e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=168937
2021-06-20 17:34:34 | INFO | train_inner | epoch 005: 2804 / 3002 loss=2.515, ppl=5.72, wps=5744.7, ups=0.09, wpb=64783, bsz=128, num_updates=14726, lr=9.98902e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=168949
2021-06-20 17:34:45 | INFO | train_inner | epoch 005: 2805 / 3002 loss=2.648, ppl=6.27, wps=5925.4, ups=0.09, wpb=64719, bsz=128, num_updates=14727, lr=9.98902e-05, gnorm=1.978, loss_scale=8, train_wall=10, gb_free=2.8, wall=168960
2021-06-20 17:34:56 | INFO | train_inner | epoch 005: 2806 / 3002 loss=2.361, ppl=5.14, wps=5822.5, ups=0.09, wpb=64878, bsz=128, num_updates=14728, lr=9.98902e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=168971
2021-06-20 17:35:07 | INFO | train_inner | epoch 005: 2807 / 3002 loss=2.515, ppl=5.72, wps=5845.3, ups=0.09, wpb=64937, bsz=128, num_updates=14729, lr=9.98902e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=168982
2021-06-20 17:35:18 | INFO | train_inner | epoch 005: 2808 / 3002 loss=2.405, ppl=5.3, wps=5894.5, ups=0.09, wpb=64781, bsz=128, num_updates=14730, lr=9.98902e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=168993
2021-06-20 17:35:30 | INFO | train_inner | epoch 005: 2809 / 3002 loss=2.38, ppl=5.21, wps=5835.8, ups=0.09, wpb=64903, bsz=128, num_updates=14731, lr=9.98901e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=169004
2021-06-20 17:35:41 | INFO | train_inner | epoch 005: 2810 / 3002 loss=2.441, ppl=5.43, wps=5859.5, ups=0.09, wpb=64897, bsz=128, num_updates=14732, lr=9.98901e-05, gnorm=1.854, loss_scale=8, train_wall=11, gb_free=2.8, wall=169015
2021-06-20 17:35:52 | INFO | train_inner | epoch 005: 2811 / 3002 loss=2.487, ppl=5.61, wps=5802.3, ups=0.09, wpb=64884, bsz=128, num_updates=14733, lr=9.98901e-05, gnorm=1.899, loss_scale=8, train_wall=11, gb_free=2.8, wall=169026
2021-06-20 17:36:03 | INFO | train_inner | epoch 005: 2812 / 3002 loss=2.432, ppl=5.4, wps=5846.2, ups=0.09, wpb=64903, bsz=128, num_updates=14734, lr=9.98901e-05, gnorm=1.864, loss_scale=8, train_wall=11, gb_free=2.8, wall=169037
2021-06-20 17:36:14 | INFO | train_inner | epoch 005: 2813 / 3002 loss=2.51, ppl=5.7, wps=5825.1, ups=0.09, wpb=64807, bsz=128, num_updates=14735, lr=9.98901e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=169048
2021-06-20 17:36:25 | INFO | train_inner | epoch 005: 2814 / 3002 loss=2.474, ppl=5.55, wps=5822, ups=0.09, wpb=64800, bsz=128, num_updates=14736, lr=9.98901e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=169059
2021-06-20 17:36:36 | INFO | train_inner | epoch 005: 2815 / 3002 loss=2.356, ppl=5.12, wps=5808.7, ups=0.09, wpb=64852, bsz=128, num_updates=14737, lr=9.98901e-05, gnorm=2.246, loss_scale=8, train_wall=11, gb_free=2.8, wall=169071
2021-06-20 17:36:48 | INFO | train_inner | epoch 005: 2816 / 3002 loss=2.485, ppl=5.6, wps=5784.5, ups=0.09, wpb=64816, bsz=128, num_updates=14738, lr=9.98901e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=169082
2021-06-20 17:36:58 | INFO | train_inner | epoch 005: 2817 / 3002 loss=2.429, ppl=5.39, wps=5959.3, ups=0.09, wpb=64857, bsz=128, num_updates=14739, lr=9.98901e-05, gnorm=1.972, loss_scale=8, train_wall=10, gb_free=2.8, wall=169093
2021-06-20 17:37:09 | INFO | train_inner | epoch 005: 2818 / 3002 loss=2.482, ppl=5.59, wps=5943.5, ups=0.09, wpb=64809, bsz=128, num_updates=14740, lr=9.98901e-05, gnorm=1.969, loss_scale=8, train_wall=10, gb_free=2.8, wall=169104
2021-06-20 17:37:20 | INFO | train_inner | epoch 005: 2819 / 3002 loss=2.398, ppl=5.27, wps=5878.4, ups=0.09, wpb=64920, bsz=128, num_updates=14741, lr=9.98901e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=169115
2021-06-20 17:37:31 | INFO | train_inner | epoch 005: 2820 / 3002 loss=2.529, ppl=5.77, wps=5863.4, ups=0.09, wpb=64837, bsz=128, num_updates=14742, lr=9.98901e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=169126
2021-06-20 17:37:42 | INFO | train_inner | epoch 005: 2821 / 3002 loss=2.49, ppl=5.62, wps=5902.6, ups=0.09, wpb=64863, bsz=128, num_updates=14743, lr=9.989e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=169137
2021-06-20 17:37:53 | INFO | train_inner | epoch 005: 2822 / 3002 loss=2.608, ppl=6.1, wps=5856.1, ups=0.09, wpb=64746, bsz=128, num_updates=14744, lr=9.989e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=169148
2021-06-20 17:38:04 | INFO | train_inner | epoch 005: 2823 / 3002 loss=2.533, ppl=5.79, wps=5885.9, ups=0.09, wpb=64790, bsz=128, num_updates=14745, lr=9.989e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=169159
2021-06-20 17:38:15 | INFO | train_inner | epoch 005: 2824 / 3002 loss=2.507, ppl=5.68, wps=5960.7, ups=0.09, wpb=64899, bsz=128, num_updates=14746, lr=9.989e-05, gnorm=1.878, loss_scale=8, train_wall=10, gb_free=2.8, wall=169170
2021-06-20 17:38:26 | INFO | train_inner | epoch 005: 2825 / 3002 loss=2.472, ppl=5.55, wps=5848.3, ups=0.09, wpb=64858, bsz=128, num_updates=14747, lr=9.989e-05, gnorm=2.049, loss_scale=8, train_wall=11, gb_free=2.8, wall=169181
2021-06-20 17:38:37 | INFO | train_inner | epoch 005: 2826 / 3002 loss=2.436, ppl=5.41, wps=5967.2, ups=0.09, wpb=64794, bsz=128, num_updates=14748, lr=9.989e-05, gnorm=1.998, loss_scale=8, train_wall=10, gb_free=2.8, wall=169192
2021-06-20 17:38:49 | INFO | train_inner | epoch 005: 2827 / 3002 loss=2.568, ppl=5.93, wps=5776.7, ups=0.09, wpb=64808, bsz=128, num_updates=14749, lr=9.989e-05, gnorm=2.184, loss_scale=8, train_wall=11, gb_free=2.8, wall=169203
2021-06-20 17:39:00 | INFO | train_inner | epoch 005: 2828 / 3002 loss=2.449, ppl=5.46, wps=5818.2, ups=0.09, wpb=64854, bsz=128, num_updates=14750, lr=9.989e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=169214
2021-06-20 17:39:11 | INFO | train_inner | epoch 005: 2829 / 3002 loss=2.541, ppl=5.82, wps=5889.3, ups=0.09, wpb=64847, bsz=128, num_updates=14751, lr=9.989e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=169225
2021-06-20 17:39:22 | INFO | train_inner | epoch 005: 2830 / 3002 loss=2.491, ppl=5.62, wps=5794.4, ups=0.09, wpb=64787, bsz=128, num_updates=14752, lr=9.989e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=169236
2021-06-20 17:39:33 | INFO | train_inner | epoch 005: 2831 / 3002 loss=2.552, ppl=5.86, wps=5857.5, ups=0.09, wpb=64869, bsz=128, num_updates=14753, lr=9.989e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=169247
2021-06-20 17:39:44 | INFO | train_inner | epoch 005: 2832 / 3002 loss=2.416, ppl=5.34, wps=5887.9, ups=0.09, wpb=64863, bsz=128, num_updates=14754, lr=9.989e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=169258
2021-06-20 17:39:55 | INFO | train_inner | epoch 005: 2833 / 3002 loss=2.531, ppl=5.78, wps=5776.6, ups=0.09, wpb=64792, bsz=128, num_updates=14755, lr=9.989e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=169270
2021-06-20 17:40:06 | INFO | train_inner | epoch 005: 2834 / 3002 loss=2.535, ppl=5.8, wps=5858.9, ups=0.09, wpb=64798, bsz=128, num_updates=14756, lr=9.98899e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=169281
2021-06-20 17:40:17 | INFO | train_inner | epoch 005: 2835 / 3002 loss=2.385, ppl=5.22, wps=5815.9, ups=0.09, wpb=64827, bsz=128, num_updates=14757, lr=9.98899e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=169292
2021-06-20 17:40:29 | INFO | train_inner | epoch 005: 2836 / 3002 loss=2.442, ppl=5.44, wps=5751.8, ups=0.09, wpb=64768, bsz=128, num_updates=14758, lr=9.98899e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=169303
2021-06-20 17:40:40 | INFO | train_inner | epoch 005: 2837 / 3002 loss=2.385, ppl=5.22, wps=5799.8, ups=0.09, wpb=64762, bsz=128, num_updates=14759, lr=9.98899e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=169314
2021-06-20 17:40:51 | INFO | train_inner | epoch 005: 2838 / 3002 loss=2.476, ppl=5.56, wps=5716.2, ups=0.09, wpb=64923, bsz=128, num_updates=14760, lr=9.98899e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=169326
2021-06-20 17:41:02 | INFO | train_inner | epoch 005: 2839 / 3002 loss=2.474, ppl=5.56, wps=5809.9, ups=0.09, wpb=64763, bsz=128, num_updates=14761, lr=9.98899e-05, gnorm=1.988, loss_scale=16, train_wall=11, gb_free=2.8, wall=169337
2021-06-20 17:41:14 | INFO | train_inner | epoch 005: 2840 / 3002 loss=2.612, ppl=6.11, wps=5691.7, ups=0.09, wpb=64838, bsz=128, num_updates=14762, lr=9.98899e-05, gnorm=2.017, loss_scale=16, train_wall=11, gb_free=2.8, wall=169348
2021-06-20 17:41:25 | INFO | train_inner | epoch 005: 2841 / 3002 loss=2.555, ppl=5.88, wps=6001.4, ups=0.09, wpb=64862, bsz=128, num_updates=14763, lr=9.98899e-05, gnorm=1.957, loss_scale=16, train_wall=10, gb_free=2.8, wall=169359
2021-06-20 17:41:36 | INFO | train_inner | epoch 005: 2842 / 3002 loss=2.501, ppl=5.66, wps=5832.5, ups=0.09, wpb=64848, bsz=128, num_updates=14764, lr=9.98899e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=169370
2021-06-20 17:41:47 | INFO | train_inner | epoch 005: 2843 / 3002 loss=2.417, ppl=5.34, wps=5810, ups=0.09, wpb=64834, bsz=128, num_updates=14765, lr=9.98899e-05, gnorm=1.881, loss_scale=16, train_wall=11, gb_free=2.8, wall=169381
2021-06-20 17:41:58 | INFO | train_inner | epoch 005: 2844 / 3002 loss=2.524, ppl=5.75, wps=5816.2, ups=0.09, wpb=64835, bsz=128, num_updates=14766, lr=9.98899e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=169392
2021-06-20 17:42:09 | INFO | train_inner | epoch 005: 2845 / 3002 loss=2.56, ppl=5.9, wps=5735.8, ups=0.09, wpb=64805, bsz=128, num_updates=14767, lr=9.98899e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=169404
2021-06-20 17:42:20 | INFO | train_inner | epoch 005: 2846 / 3002 loss=2.493, ppl=5.63, wps=5836.3, ups=0.09, wpb=64810, bsz=128, num_updates=14768, lr=9.98898e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=169415
2021-06-20 17:42:31 | INFO | train_inner | epoch 005: 2847 / 3002 loss=2.542, ppl=5.82, wps=5868.8, ups=0.09, wpb=64876, bsz=128, num_updates=14769, lr=9.98898e-05, gnorm=2.057, loss_scale=16, train_wall=11, gb_free=2.8, wall=169426
2021-06-20 17:42:42 | INFO | train_inner | epoch 005: 2848 / 3002 loss=2.447, ppl=5.45, wps=5844.7, ups=0.09, wpb=64892, bsz=128, num_updates=14770, lr=9.98898e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=169437
2021-06-20 17:42:54 | INFO | train_inner | epoch 005: 2849 / 3002 loss=2.403, ppl=5.29, wps=5870.7, ups=0.09, wpb=64839, bsz=128, num_updates=14771, lr=9.98898e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=169448
2021-06-20 17:43:05 | INFO | train_inner | epoch 005: 2850 / 3002 loss=2.554, ppl=5.87, wps=5912.9, ups=0.09, wpb=64780, bsz=128, num_updates=14772, lr=9.98898e-05, gnorm=2.104, loss_scale=16, train_wall=11, gb_free=2.8, wall=169459
2021-06-20 17:43:16 | INFO | train_inner | epoch 005: 2851 / 3002 loss=2.359, ppl=5.13, wps=5814.3, ups=0.09, wpb=64801, bsz=128, num_updates=14773, lr=9.98898e-05, gnorm=2.089, loss_scale=16, train_wall=11, gb_free=2.8, wall=169470
2021-06-20 17:43:27 | INFO | train_inner | epoch 005: 2852 / 3002 loss=2.454, ppl=5.48, wps=5860.2, ups=0.09, wpb=64829, bsz=128, num_updates=14774, lr=9.98898e-05, gnorm=2.076, loss_scale=16, train_wall=11, gb_free=2.8, wall=169481
2021-06-20 17:43:38 | INFO | train_inner | epoch 005: 2853 / 3002 loss=2.49, ppl=5.62, wps=6008.6, ups=0.09, wpb=64850, bsz=128, num_updates=14775, lr=9.98898e-05, gnorm=2.012, loss_scale=16, train_wall=10, gb_free=2.8, wall=169492
2021-06-20 17:43:49 | INFO | train_inner | epoch 005: 2854 / 3002 loss=2.722, ppl=6.6, wps=5860.3, ups=0.09, wpb=64797, bsz=128, num_updates=14776, lr=9.98898e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=169503
2021-06-20 17:43:59 | INFO | train_inner | epoch 005: 2855 / 3002 loss=2.487, ppl=5.61, wps=5949.7, ups=0.09, wpb=64808, bsz=128, num_updates=14777, lr=9.98898e-05, gnorm=1.921, loss_scale=16, train_wall=10, gb_free=2.8, wall=169514
2021-06-20 17:44:11 | INFO | train_inner | epoch 005: 2856 / 3002 loss=2.547, ppl=5.84, wps=5700.3, ups=0.09, wpb=64698, bsz=128, num_updates=14778, lr=9.98898e-05, gnorm=2.185, loss_scale=16, train_wall=11, gb_free=2.8, wall=169525
2021-06-20 17:44:22 | INFO | train_inner | epoch 005: 2857 / 3002 loss=2.46, ppl=5.5, wps=5803.7, ups=0.09, wpb=64830, bsz=128, num_updates=14779, lr=9.98898e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=169536
2021-06-20 17:44:33 | INFO | train_inner | epoch 005: 2858 / 3002 loss=2.567, ppl=5.93, wps=5768.1, ups=0.09, wpb=64827, bsz=128, num_updates=14780, lr=9.98898e-05, gnorm=1.857, loss_scale=16, train_wall=11, gb_free=2.8, wall=169548
2021-06-20 17:44:44 | INFO | train_inner | epoch 005: 2859 / 3002 loss=2.457, ppl=5.49, wps=5790.4, ups=0.09, wpb=64791, bsz=128, num_updates=14781, lr=9.98897e-05, gnorm=1.84, loss_scale=16, train_wall=11, gb_free=2.8, wall=169559
2021-06-20 17:44:55 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 17:45:06 | INFO | train_inner | epoch 005: 2861 / 3002 loss=2.44, ppl=5.42, wps=2970.2, ups=0.05, wpb=64854, bsz=128, num_updates=14782, lr=9.98897e-05, gnorm=1.941, loss_scale=8, train_wall=21, gb_free=2.8, wall=169581
2021-06-20 17:45:17 | INFO | train_inner | epoch 005: 2862 / 3002 loss=2.548, ppl=5.85, wps=5787.8, ups=0.09, wpb=64729, bsz=128, num_updates=14783, lr=9.98897e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=169592
2021-06-20 17:45:29 | INFO | train_inner | epoch 005: 2863 / 3002 loss=2.521, ppl=5.74, wps=5816.6, ups=0.09, wpb=64860, bsz=128, num_updates=14784, lr=9.98897e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=169603
2021-06-20 17:45:40 | INFO | train_inner | epoch 005: 2864 / 3002 loss=2.4, ppl=5.28, wps=5851.3, ups=0.09, wpb=64911, bsz=128, num_updates=14785, lr=9.98897e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=169614
2021-06-20 17:45:51 | INFO | train_inner | epoch 005: 2865 / 3002 loss=2.58, ppl=5.98, wps=5812.1, ups=0.09, wpb=64840, bsz=128, num_updates=14786, lr=9.98897e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=169625
2021-06-20 17:46:02 | INFO | train_inner | epoch 005: 2866 / 3002 loss=2.387, ppl=5.23, wps=5988.8, ups=0.09, wpb=64782, bsz=128, num_updates=14787, lr=9.98897e-05, gnorm=1.866, loss_scale=8, train_wall=10, gb_free=2.8, wall=169636
2021-06-20 17:46:13 | INFO | train_inner | epoch 005: 2867 / 3002 loss=2.476, ppl=5.57, wps=5882.7, ups=0.09, wpb=64804, bsz=128, num_updates=14788, lr=9.98897e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=169647
2021-06-20 17:46:24 | INFO | train_inner | epoch 005: 2868 / 3002 loss=2.572, ppl=5.95, wps=5842.6, ups=0.09, wpb=64853, bsz=128, num_updates=14789, lr=9.98897e-05, gnorm=2.309, loss_scale=8, train_wall=11, gb_free=2.8, wall=169658
2021-06-20 17:46:35 | INFO | train_inner | epoch 005: 2869 / 3002 loss=2.438, ppl=5.42, wps=5896.9, ups=0.09, wpb=64923, bsz=128, num_updates=14790, lr=9.98897e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=169669
2021-06-20 17:46:46 | INFO | train_inner | epoch 005: 2870 / 3002 loss=2.413, ppl=5.32, wps=5969.2, ups=0.09, wpb=64837, bsz=128, num_updates=14791, lr=9.98897e-05, gnorm=1.907, loss_scale=8, train_wall=10, gb_free=2.8, wall=169680
2021-06-20 17:46:57 | INFO | train_inner | epoch 005: 2871 / 3002 loss=2.332, ppl=5.04, wps=5851.5, ups=0.09, wpb=64777, bsz=128, num_updates=14792, lr=9.98897e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=169691
2021-06-20 17:47:08 | INFO | train_inner | epoch 005: 2872 / 3002 loss=2.475, ppl=5.56, wps=5814.6, ups=0.09, wpb=64805, bsz=128, num_updates=14793, lr=9.98896e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=169702
2021-06-20 17:47:19 | INFO | train_inner | epoch 005: 2873 / 3002 loss=2.669, ppl=6.36, wps=5854.8, ups=0.09, wpb=64777, bsz=128, num_updates=14794, lr=9.98896e-05, gnorm=3.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=169713
2021-06-20 17:47:30 | INFO | train_inner | epoch 005: 2874 / 3002 loss=2.533, ppl=5.79, wps=5762, ups=0.09, wpb=64794, bsz=128, num_updates=14795, lr=9.98896e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=169724
2021-06-20 17:47:41 | INFO | train_inner | epoch 005: 2875 / 3002 loss=2.493, ppl=5.63, wps=5917.1, ups=0.09, wpb=64824, bsz=128, num_updates=14796, lr=9.98896e-05, gnorm=1.907, loss_scale=8, train_wall=10, gb_free=2.8, wall=169735
2021-06-20 17:47:52 | INFO | train_inner | epoch 005: 2876 / 3002 loss=2.414, ppl=5.33, wps=5900, ups=0.09, wpb=64813, bsz=128, num_updates=14797, lr=9.98896e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=169746
2021-06-20 17:48:03 | INFO | train_inner | epoch 005: 2877 / 3002 loss=2.449, ppl=5.46, wps=5937.6, ups=0.09, wpb=64914, bsz=128, num_updates=14798, lr=9.98896e-05, gnorm=2.136, loss_scale=8, train_wall=11, gb_free=2.8, wall=169757
2021-06-20 17:48:14 | INFO | train_inner | epoch 005: 2878 / 3002 loss=2.593, ppl=6.04, wps=5930.2, ups=0.09, wpb=64773, bsz=128, num_updates=14799, lr=9.98896e-05, gnorm=2.358, loss_scale=8, train_wall=10, gb_free=2.8, wall=169768
2021-06-20 17:48:25 | INFO | train_inner | epoch 005: 2879 / 3002 loss=2.392, ppl=5.25, wps=5943.7, ups=0.09, wpb=64891, bsz=128, num_updates=14800, lr=9.98896e-05, gnorm=1.919, loss_scale=8, train_wall=10, gb_free=2.8, wall=169779
2021-06-20 17:48:36 | INFO | train_inner | epoch 005: 2880 / 3002 loss=2.591, ppl=6.03, wps=5743.2, ups=0.09, wpb=64823, bsz=128, num_updates=14801, lr=9.98896e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=169790
2021-06-20 17:48:47 | INFO | train_inner | epoch 005: 2881 / 3002 loss=2.443, ppl=5.44, wps=5877, ups=0.09, wpb=64843, bsz=128, num_updates=14802, lr=9.98896e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=169802
2021-06-20 17:48:58 | INFO | train_inner | epoch 005: 2882 / 3002 loss=2.621, ppl=6.15, wps=5842.4, ups=0.09, wpb=64799, bsz=128, num_updates=14803, lr=9.98896e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=169813
2021-06-20 17:49:09 | INFO | train_inner | epoch 005: 2883 / 3002 loss=2.514, ppl=5.71, wps=5868.3, ups=0.09, wpb=64838, bsz=128, num_updates=14804, lr=9.98896e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=169824
2021-06-20 17:49:20 | INFO | train_inner | epoch 005: 2884 / 3002 loss=2.291, ppl=4.89, wps=5839.6, ups=0.09, wpb=64819, bsz=128, num_updates=14805, lr=9.98896e-05, gnorm=1.833, loss_scale=8, train_wall=11, gb_free=2.8, wall=169835
2021-06-20 17:49:31 | INFO | train_inner | epoch 005: 2885 / 3002 loss=2.48, ppl=5.58, wps=5887, ups=0.09, wpb=64914, bsz=128, num_updates=14806, lr=9.98895e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=169846
2021-06-20 17:49:43 | INFO | train_inner | epoch 005: 2886 / 3002 loss=2.501, ppl=5.66, wps=5774.5, ups=0.09, wpb=64849, bsz=128, num_updates=14807, lr=9.98895e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=169857
2021-06-20 17:49:54 | INFO | train_inner | epoch 005: 2887 / 3002 loss=2.514, ppl=5.71, wps=5873.7, ups=0.09, wpb=64805, bsz=128, num_updates=14808, lr=9.98895e-05, gnorm=2.465, loss_scale=8, train_wall=11, gb_free=2.8, wall=169868
2021-06-20 17:50:05 | INFO | train_inner | epoch 005: 2888 / 3002 loss=2.451, ppl=5.47, wps=5906.5, ups=0.09, wpb=64799, bsz=128, num_updates=14809, lr=9.98895e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=169879
2021-06-20 17:50:16 | INFO | train_inner | epoch 005: 2889 / 3002 loss=2.465, ppl=5.52, wps=5782.3, ups=0.09, wpb=64825, bsz=128, num_updates=14810, lr=9.98895e-05, gnorm=2.205, loss_scale=8, train_wall=11, gb_free=2.8, wall=169890
2021-06-20 17:50:27 | INFO | train_inner | epoch 005: 2890 / 3002 loss=2.493, ppl=5.63, wps=5890, ups=0.09, wpb=64825, bsz=128, num_updates=14811, lr=9.98895e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=169901
2021-06-20 17:50:38 | INFO | train_inner | epoch 005: 2891 / 3002 loss=2.721, ppl=6.59, wps=5750.2, ups=0.09, wpb=64742, bsz=128, num_updates=14812, lr=9.98895e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=169913
2021-06-20 17:50:49 | INFO | train_inner | epoch 005: 2892 / 3002 loss=2.484, ppl=5.59, wps=5858.1, ups=0.09, wpb=64877, bsz=128, num_updates=14813, lr=9.98895e-05, gnorm=1.949, loss_scale=8, train_wall=11, gb_free=2.8, wall=169924
2021-06-20 17:51:00 | INFO | train_inner | epoch 005: 2893 / 3002 loss=2.348, ppl=5.09, wps=5868.4, ups=0.09, wpb=64931, bsz=128, num_updates=14814, lr=9.98895e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=169935
2021-06-20 17:51:11 | INFO | train_inner | epoch 005: 2894 / 3002 loss=2.566, ppl=5.92, wps=5883.6, ups=0.09, wpb=64848, bsz=128, num_updates=14815, lr=9.98895e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=169946
2021-06-20 17:51:23 | INFO | train_inner | epoch 005: 2895 / 3002 loss=2.375, ppl=5.19, wps=5807.4, ups=0.09, wpb=64893, bsz=128, num_updates=14816, lr=9.98895e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=169957
2021-06-20 17:51:34 | INFO | train_inner | epoch 005: 2896 / 3002 loss=2.481, ppl=5.58, wps=5794.9, ups=0.09, wpb=64817, bsz=128, num_updates=14817, lr=9.98895e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=169968
2021-06-20 17:51:45 | INFO | train_inner | epoch 005: 2897 / 3002 loss=2.519, ppl=5.73, wps=5757.7, ups=0.09, wpb=64800, bsz=128, num_updates=14818, lr=9.98894e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=169979
2021-06-20 17:51:56 | INFO | train_inner | epoch 005: 2898 / 3002 loss=2.421, ppl=5.36, wps=5795, ups=0.09, wpb=64798, bsz=128, num_updates=14819, lr=9.98894e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=169990
2021-06-20 17:52:07 | INFO | train_inner | epoch 005: 2899 / 3002 loss=2.488, ppl=5.61, wps=5776.3, ups=0.09, wpb=64773, bsz=128, num_updates=14820, lr=9.98894e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=170002
2021-06-20 17:52:18 | INFO | train_inner | epoch 005: 2900 / 3002 loss=2.368, ppl=5.16, wps=5835.8, ups=0.09, wpb=64824, bsz=128, num_updates=14821, lr=9.98894e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=170013
2021-06-20 17:52:30 | INFO | train_inner | epoch 005: 2901 / 3002 loss=2.504, ppl=5.67, wps=5856.7, ups=0.09, wpb=64842, bsz=128, num_updates=14822, lr=9.98894e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=170024
2021-06-20 17:52:41 | INFO | train_inner | epoch 005: 2902 / 3002 loss=2.54, ppl=5.82, wps=5788.5, ups=0.09, wpb=64792, bsz=128, num_updates=14823, lr=9.98894e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=170035
2021-06-20 17:52:52 | INFO | train_inner | epoch 005: 2903 / 3002 loss=2.609, ppl=6.1, wps=5824.3, ups=0.09, wpb=64743, bsz=128, num_updates=14824, lr=9.98894e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=170046
2021-06-20 17:53:03 | INFO | train_inner | epoch 005: 2904 / 3002 loss=2.494, ppl=5.63, wps=5877.5, ups=0.09, wpb=64775, bsz=128, num_updates=14825, lr=9.98894e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=170057
2021-06-20 17:53:14 | INFO | train_inner | epoch 005: 2905 / 3002 loss=2.41, ppl=5.31, wps=5859, ups=0.09, wpb=64804, bsz=128, num_updates=14826, lr=9.98894e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=170068
2021-06-20 17:53:25 | INFO | train_inner | epoch 005: 2906 / 3002 loss=2.706, ppl=6.52, wps=5747.4, ups=0.09, wpb=64800, bsz=128, num_updates=14827, lr=9.98894e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=170080
2021-06-20 17:53:36 | INFO | train_inner | epoch 005: 2907 / 3002 loss=2.497, ppl=5.65, wps=5891.3, ups=0.09, wpb=64849, bsz=128, num_updates=14828, lr=9.98894e-05, gnorm=2.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=170091
2021-06-20 17:53:47 | INFO | train_inner | epoch 005: 2908 / 3002 loss=2.44, ppl=5.42, wps=5776.4, ups=0.09, wpb=64754, bsz=128, num_updates=14829, lr=9.98894e-05, gnorm=1.895, loss_scale=8, train_wall=11, gb_free=2.8, wall=170102
2021-06-20 17:53:58 | INFO | train_inner | epoch 005: 2909 / 3002 loss=2.574, ppl=5.95, wps=5905.4, ups=0.09, wpb=64782, bsz=128, num_updates=14830, lr=9.98894e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=170113
2021-06-20 17:54:10 | INFO | train_inner | epoch 005: 2910 / 3002 loss=2.478, ppl=5.57, wps=5797.6, ups=0.09, wpb=64676, bsz=128, num_updates=14831, lr=9.98893e-05, gnorm=1.867, loss_scale=8, train_wall=11, gb_free=2.8, wall=170124
2021-06-20 17:54:21 | INFO | train_inner | epoch 005: 2911 / 3002 loss=2.548, ppl=5.85, wps=5748.6, ups=0.09, wpb=64815, bsz=128, num_updates=14832, lr=9.98893e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=170135
2021-06-20 17:54:32 | INFO | train_inner | epoch 005: 2912 / 3002 loss=2.481, ppl=5.58, wps=5845.3, ups=0.09, wpb=64819, bsz=128, num_updates=14833, lr=9.98893e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=170146
2021-06-20 17:54:43 | INFO | train_inner | epoch 005: 2913 / 3002 loss=2.419, ppl=5.35, wps=5848.1, ups=0.09, wpb=64911, bsz=128, num_updates=14834, lr=9.98893e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=170157
2021-06-20 17:54:54 | INFO | train_inner | epoch 005: 2914 / 3002 loss=2.576, ppl=5.96, wps=6017.1, ups=0.09, wpb=64888, bsz=128, num_updates=14835, lr=9.98893e-05, gnorm=1.926, loss_scale=8, train_wall=10, gb_free=2.8, wall=170168
2021-06-20 17:55:05 | INFO | train_inner | epoch 005: 2915 / 3002 loss=2.506, ppl=5.68, wps=5863, ups=0.09, wpb=64931, bsz=128, num_updates=14836, lr=9.98893e-05, gnorm=2.117, loss_scale=8, train_wall=11, gb_free=2.8, wall=170179
2021-06-20 17:55:16 | INFO | train_inner | epoch 005: 2916 / 3002 loss=2.617, ppl=6.14, wps=5919.1, ups=0.09, wpb=64778, bsz=128, num_updates=14837, lr=9.98893e-05, gnorm=1.96, loss_scale=8, train_wall=10, gb_free=2.8, wall=170190
2021-06-20 17:55:27 | INFO | train_inner | epoch 005: 2917 / 3002 loss=2.544, ppl=5.83, wps=5998, ups=0.09, wpb=64846, bsz=128, num_updates=14838, lr=9.98893e-05, gnorm=1.962, loss_scale=8, train_wall=10, gb_free=2.8, wall=170201
2021-06-20 17:55:38 | INFO | train_inner | epoch 005: 2918 / 3002 loss=2.569, ppl=5.93, wps=5780.4, ups=0.09, wpb=64767, bsz=128, num_updates=14839, lr=9.98893e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=170212
2021-06-20 17:55:49 | INFO | train_inner | epoch 005: 2919 / 3002 loss=2.546, ppl=5.84, wps=5772.5, ups=0.09, wpb=64765, bsz=128, num_updates=14840, lr=9.98893e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=170223
2021-06-20 17:56:00 | INFO | train_inner | epoch 005: 2920 / 3002 loss=2.532, ppl=5.78, wps=5884.1, ups=0.09, wpb=64895, bsz=128, num_updates=14841, lr=9.98893e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=170234
2021-06-20 17:56:11 | INFO | train_inner | epoch 005: 2921 / 3002 loss=2.552, ppl=5.86, wps=5975.4, ups=0.09, wpb=64825, bsz=128, num_updates=14842, lr=9.98893e-05, gnorm=2.09, loss_scale=8, train_wall=10, gb_free=2.8, wall=170245
2021-06-20 17:56:22 | INFO | train_inner | epoch 005: 2922 / 3002 loss=2.471, ppl=5.54, wps=5754.4, ups=0.09, wpb=64816, bsz=128, num_updates=14843, lr=9.98892e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=170257
2021-06-20 17:56:33 | INFO | train_inner | epoch 005: 2923 / 3002 loss=2.433, ppl=5.4, wps=5823.1, ups=0.09, wpb=64843, bsz=128, num_updates=14844, lr=9.98892e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=170268
2021-06-20 17:56:44 | INFO | train_inner | epoch 005: 2924 / 3002 loss=2.491, ppl=5.62, wps=5868.4, ups=0.09, wpb=64887, bsz=128, num_updates=14845, lr=9.98892e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=170279
2021-06-20 17:56:55 | INFO | train_inner | epoch 005: 2925 / 3002 loss=2.501, ppl=5.66, wps=6002, ups=0.09, wpb=64845, bsz=128, num_updates=14846, lr=9.98892e-05, gnorm=1.989, loss_scale=8, train_wall=10, gb_free=2.8, wall=170290
2021-06-20 17:57:06 | INFO | train_inner | epoch 005: 2926 / 3002 loss=2.498, ppl=5.65, wps=5850, ups=0.09, wpb=64880, bsz=128, num_updates=14847, lr=9.98892e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=170301
2021-06-20 17:57:17 | INFO | train_inner | epoch 005: 2927 / 3002 loss=2.481, ppl=5.58, wps=5800.5, ups=0.09, wpb=64863, bsz=128, num_updates=14848, lr=9.98892e-05, gnorm=1.865, loss_scale=8, train_wall=11, gb_free=2.8, wall=170312
2021-06-20 17:57:28 | INFO | train_inner | epoch 005: 2928 / 3002 loss=2.666, ppl=6.35, wps=5933.9, ups=0.09, wpb=64850, bsz=128, num_updates=14849, lr=9.98892e-05, gnorm=1.998, loss_scale=8, train_wall=10, gb_free=2.8, wall=170323
2021-06-20 17:57:39 | INFO | train_inner | epoch 005: 2929 / 3002 loss=2.404, ppl=5.29, wps=5904.6, ups=0.09, wpb=64857, bsz=128, num_updates=14850, lr=9.98892e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=170334
2021-06-20 17:57:51 | INFO | train_inner | epoch 005: 2930 / 3002 loss=2.466, ppl=5.53, wps=5818.5, ups=0.09, wpb=64801, bsz=128, num_updates=14851, lr=9.98892e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=170345
2021-06-20 17:58:01 | INFO | train_inner | epoch 005: 2931 / 3002 loss=2.478, ppl=5.57, wps=5941, ups=0.09, wpb=64865, bsz=128, num_updates=14852, lr=9.98892e-05, gnorm=1.943, loss_scale=8, train_wall=10, gb_free=2.8, wall=170356
2021-06-20 17:58:13 | INFO | train_inner | epoch 005: 2932 / 3002 loss=2.409, ppl=5.31, wps=5756.9, ups=0.09, wpb=64819, bsz=128, num_updates=14853, lr=9.98892e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=170367
2021-06-20 17:58:24 | INFO | train_inner | epoch 005: 2933 / 3002 loss=2.448, ppl=5.46, wps=5929.4, ups=0.09, wpb=64736, bsz=128, num_updates=14854, lr=9.98892e-05, gnorm=1.872, loss_scale=8, train_wall=10, gb_free=2.8, wall=170378
2021-06-20 17:58:35 | INFO | train_inner | epoch 005: 2934 / 3002 loss=2.582, ppl=5.99, wps=5895.9, ups=0.09, wpb=64823, bsz=128, num_updates=14855, lr=9.98892e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=170389
2021-06-20 17:58:46 | INFO | train_inner | epoch 005: 2935 / 3002 loss=2.55, ppl=5.86, wps=5862.1, ups=0.09, wpb=64880, bsz=128, num_updates=14856, lr=9.98891e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=170400
2021-06-20 17:58:57 | INFO | train_inner | epoch 005: 2936 / 3002 loss=2.482, ppl=5.59, wps=5842.7, ups=0.09, wpb=64850, bsz=128, num_updates=14857, lr=9.98891e-05, gnorm=2.068, loss_scale=8, train_wall=11, gb_free=2.8, wall=170411
2021-06-20 17:59:08 | INFO | train_inner | epoch 005: 2937 / 3002 loss=2.386, ppl=5.23, wps=5696.6, ups=0.09, wpb=64830, bsz=128, num_updates=14858, lr=9.98891e-05, gnorm=1.914, loss_scale=8, train_wall=11, gb_free=2.8, wall=170422
2021-06-20 17:59:19 | INFO | train_inner | epoch 005: 2938 / 3002 loss=2.486, ppl=5.6, wps=5812.3, ups=0.09, wpb=64865, bsz=128, num_updates=14859, lr=9.98891e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=170434
2021-06-20 17:59:31 | INFO | train_inner | epoch 005: 2939 / 3002 loss=2.577, ppl=5.97, wps=5738.3, ups=0.09, wpb=64758, bsz=128, num_updates=14860, lr=9.98891e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=170445
2021-06-20 17:59:42 | INFO | train_inner | epoch 005: 2940 / 3002 loss=2.391, ppl=5.24, wps=5918.8, ups=0.09, wpb=64792, bsz=128, num_updates=14861, lr=9.98891e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=170456
2021-06-20 17:59:53 | INFO | train_inner | epoch 005: 2941 / 3002 loss=2.499, ppl=5.65, wps=5743.6, ups=0.09, wpb=64871, bsz=128, num_updates=14862, lr=9.98891e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=170467
2021-06-20 18:00:04 | INFO | train_inner | epoch 005: 2942 / 3002 loss=2.588, ppl=6.01, wps=5812.1, ups=0.09, wpb=64812, bsz=128, num_updates=14863, lr=9.98891e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=170478
2021-06-20 18:00:15 | INFO | train_inner | epoch 005: 2943 / 3002 loss=2.546, ppl=5.84, wps=5873.8, ups=0.09, wpb=64857, bsz=128, num_updates=14864, lr=9.98891e-05, gnorm=2.137, loss_scale=8, train_wall=11, gb_free=2.8, wall=170489
2021-06-20 18:00:26 | INFO | train_inner | epoch 005: 2944 / 3002 loss=2.574, ppl=5.96, wps=5849.5, ups=0.09, wpb=64834, bsz=128, num_updates=14865, lr=9.98891e-05, gnorm=2.063, loss_scale=8, train_wall=11, gb_free=2.8, wall=170500
2021-06-20 18:00:37 | INFO | train_inner | epoch 005: 2945 / 3002 loss=2.344, ppl=5.08, wps=5823.9, ups=0.09, wpb=64785, bsz=128, num_updates=14866, lr=9.98891e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=170512
2021-06-20 18:00:48 | INFO | train_inner | epoch 005: 2946 / 3002 loss=2.538, ppl=5.81, wps=5899.6, ups=0.09, wpb=64829, bsz=128, num_updates=14867, lr=9.98891e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=170523
2021-06-20 18:00:59 | INFO | train_inner | epoch 005: 2947 / 3002 loss=2.565, ppl=5.92, wps=5994.6, ups=0.09, wpb=64935, bsz=128, num_updates=14868, lr=9.9889e-05, gnorm=1.973, loss_scale=8, train_wall=10, gb_free=2.8, wall=170533
2021-06-20 18:01:10 | INFO | train_inner | epoch 005: 2948 / 3002 loss=2.595, ppl=6.04, wps=5843.9, ups=0.09, wpb=64760, bsz=128, num_updates=14869, lr=9.9889e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=170544
2021-06-20 18:01:21 | INFO | train_inner | epoch 005: 2949 / 3002 loss=2.458, ppl=5.5, wps=5877.5, ups=0.09, wpb=64767, bsz=128, num_updates=14870, lr=9.9889e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=170555
2021-06-20 18:01:32 | INFO | train_inner | epoch 005: 2950 / 3002 loss=2.498, ppl=5.65, wps=5794.9, ups=0.09, wpb=64880, bsz=128, num_updates=14871, lr=9.9889e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=170567
2021-06-20 18:01:44 | INFO | train_inner | epoch 005: 2951 / 3002 loss=2.368, ppl=5.16, wps=5773.5, ups=0.09, wpb=64823, bsz=128, num_updates=14872, lr=9.9889e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=170578
2021-06-20 18:01:55 | INFO | train_inner | epoch 005: 2952 / 3002 loss=2.457, ppl=5.49, wps=5820.4, ups=0.09, wpb=64805, bsz=128, num_updates=14873, lr=9.9889e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=170589
2021-06-20 18:02:06 | INFO | train_inner | epoch 005: 2953 / 3002 loss=2.579, ppl=5.98, wps=5852.8, ups=0.09, wpb=64764, bsz=128, num_updates=14874, lr=9.9889e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=170600
2021-06-20 18:02:17 | INFO | train_inner | epoch 005: 2954 / 3002 loss=2.692, ppl=6.46, wps=5744, ups=0.09, wpb=64817, bsz=128, num_updates=14875, lr=9.9889e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=170611
2021-06-20 18:02:28 | INFO | train_inner | epoch 005: 2955 / 3002 loss=2.45, ppl=5.46, wps=5694.8, ups=0.09, wpb=64815, bsz=128, num_updates=14876, lr=9.9889e-05, gnorm=2.066, loss_scale=8, train_wall=11, gb_free=2.8, wall=170623
2021-06-20 18:02:40 | INFO | train_inner | epoch 005: 2956 / 3002 loss=2.609, ppl=6.1, wps=5767.5, ups=0.09, wpb=64793, bsz=128, num_updates=14877, lr=9.9889e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=170634
2021-06-20 18:02:51 | INFO | train_inner | epoch 005: 2957 / 3002 loss=2.599, ppl=6.06, wps=5912, ups=0.09, wpb=64890, bsz=128, num_updates=14878, lr=9.9889e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=170645
2021-06-20 18:03:02 | INFO | train_inner | epoch 005: 2958 / 3002 loss=2.589, ppl=6.02, wps=5837, ups=0.09, wpb=64781, bsz=128, num_updates=14879, lr=9.9889e-05, gnorm=1.836, loss_scale=8, train_wall=11, gb_free=2.8, wall=170656
2021-06-20 18:03:13 | INFO | train_inner | epoch 005: 2959 / 3002 loss=2.611, ppl=6.11, wps=5754.1, ups=0.09, wpb=64879, bsz=128, num_updates=14880, lr=9.9889e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=170667
2021-06-20 18:03:24 | INFO | train_inner | epoch 005: 2960 / 3002 loss=2.555, ppl=5.87, wps=5795.1, ups=0.09, wpb=64772, bsz=128, num_updates=14881, lr=9.98889e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=170679
2021-06-20 18:03:35 | INFO | train_inner | epoch 005: 2961 / 3002 loss=2.682, ppl=6.42, wps=5775.6, ups=0.09, wpb=64763, bsz=128, num_updates=14882, lr=9.98889e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=170690
2021-06-20 18:03:47 | INFO | train_inner | epoch 005: 2962 / 3002 loss=2.418, ppl=5.35, wps=5820.9, ups=0.09, wpb=64889, bsz=128, num_updates=14883, lr=9.98889e-05, gnorm=1.862, loss_scale=8, train_wall=11, gb_free=2.8, wall=170701
2021-06-20 18:03:58 | INFO | train_inner | epoch 005: 2963 / 3002 loss=2.396, ppl=5.26, wps=5890.7, ups=0.09, wpb=64804, bsz=128, num_updates=14884, lr=9.98889e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=170712
2021-06-20 18:04:09 | INFO | train_inner | epoch 005: 2964 / 3002 loss=2.512, ppl=5.7, wps=5808.5, ups=0.09, wpb=64816, bsz=128, num_updates=14885, lr=9.98889e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=170723
2021-06-20 18:04:20 | INFO | train_inner | epoch 005: 2965 / 3002 loss=2.555, ppl=5.88, wps=5759.5, ups=0.09, wpb=64732, bsz=128, num_updates=14886, lr=9.98889e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=170734
2021-06-20 18:04:31 | INFO | train_inner | epoch 005: 2966 / 3002 loss=2.38, ppl=5.2, wps=5763.6, ups=0.09, wpb=64876, bsz=128, num_updates=14887, lr=9.98889e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=170746
2021-06-20 18:04:42 | INFO | train_inner | epoch 005: 2967 / 3002 loss=2.592, ppl=6.03, wps=5781.4, ups=0.09, wpb=64769, bsz=128, num_updates=14888, lr=9.98889e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=170757
2021-06-20 18:04:54 | INFO | train_inner | epoch 005: 2968 / 3002 loss=2.499, ppl=5.65, wps=5787.7, ups=0.09, wpb=64871, bsz=128, num_updates=14889, lr=9.98889e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=170768
2021-06-20 18:05:05 | INFO | train_inner | epoch 005: 2969 / 3002 loss=2.439, ppl=5.42, wps=5792.5, ups=0.09, wpb=64873, bsz=128, num_updates=14890, lr=9.98889e-05, gnorm=1.979, loss_scale=8, train_wall=11, gb_free=2.8, wall=170779
2021-06-20 18:05:16 | INFO | train_inner | epoch 005: 2970 / 3002 loss=2.53, ppl=5.77, wps=5977.4, ups=0.09, wpb=64961, bsz=128, num_updates=14891, lr=9.98889e-05, gnorm=1.98, loss_scale=8, train_wall=10, gb_free=2.8, wall=170790
2021-06-20 18:05:27 | INFO | train_inner | epoch 005: 2971 / 3002 loss=2.378, ppl=5.2, wps=5861, ups=0.09, wpb=64850, bsz=128, num_updates=14892, lr=9.98889e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=170801
2021-06-20 18:05:38 | INFO | train_inner | epoch 005: 2972 / 3002 loss=2.469, ppl=5.54, wps=5931.2, ups=0.09, wpb=64851, bsz=128, num_updates=14893, lr=9.98888e-05, gnorm=1.869, loss_scale=8, train_wall=10, gb_free=2.8, wall=170812
2021-06-20 18:05:49 | INFO | train_inner | epoch 005: 2973 / 3002 loss=2.465, ppl=5.52, wps=5929.9, ups=0.09, wpb=64817, bsz=128, num_updates=14894, lr=9.98888e-05, gnorm=1.891, loss_scale=8, train_wall=10, gb_free=2.8, wall=170823
2021-06-20 18:06:00 | INFO | train_inner | epoch 005: 2974 / 3002 loss=2.417, ppl=5.34, wps=5776.4, ups=0.09, wpb=64793, bsz=128, num_updates=14895, lr=9.98888e-05, gnorm=1.885, loss_scale=8, train_wall=11, gb_free=2.8, wall=170834
2021-06-20 18:06:11 | INFO | train_inner | epoch 005: 2975 / 3002 loss=2.41, ppl=5.31, wps=5776.4, ups=0.09, wpb=64804, bsz=128, num_updates=14896, lr=9.98888e-05, gnorm=2.059, loss_scale=8, train_wall=11, gb_free=2.8, wall=170845
2021-06-20 18:06:22 | INFO | train_inner | epoch 005: 2976 / 3002 loss=2.551, ppl=5.86, wps=5868.2, ups=0.09, wpb=64860, bsz=128, num_updates=14897, lr=9.98888e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=170856
2021-06-20 18:06:33 | INFO | train_inner | epoch 005: 2977 / 3002 loss=2.455, ppl=5.48, wps=5749.7, ups=0.09, wpb=64779, bsz=128, num_updates=14898, lr=9.98888e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=170868
2021-06-20 18:06:44 | INFO | train_inner | epoch 005: 2978 / 3002 loss=2.615, ppl=6.13, wps=6000.7, ups=0.09, wpb=64875, bsz=128, num_updates=14899, lr=9.98888e-05, gnorm=1.888, loss_scale=8, train_wall=10, gb_free=2.8, wall=170879
2021-06-20 18:06:55 | INFO | train_inner | epoch 005: 2979 / 3002 loss=2.474, ppl=5.56, wps=5741.5, ups=0.09, wpb=64806, bsz=128, num_updates=14900, lr=9.98888e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=170890
2021-06-20 18:07:07 | INFO | train_inner | epoch 005: 2980 / 3002 loss=2.56, ppl=5.9, wps=5731.3, ups=0.09, wpb=64692, bsz=128, num_updates=14901, lr=9.98888e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=170901
2021-06-20 18:07:18 | INFO | train_inner | epoch 005: 2981 / 3002 loss=2.456, ppl=5.49, wps=5829.1, ups=0.09, wpb=64878, bsz=128, num_updates=14902, lr=9.98888e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=170912
2021-06-20 18:07:29 | INFO | train_inner | epoch 005: 2982 / 3002 loss=2.479, ppl=5.57, wps=5834.1, ups=0.09, wpb=64850, bsz=128, num_updates=14903, lr=9.98888e-05, gnorm=2.041, loss_scale=8, train_wall=11, gb_free=2.8, wall=170923
2021-06-20 18:07:40 | INFO | train_inner | epoch 005: 2983 / 3002 loss=2.417, ppl=5.34, wps=5847.1, ups=0.09, wpb=64823, bsz=128, num_updates=14904, lr=9.98888e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=170934
2021-06-20 18:07:51 | INFO | train_inner | epoch 005: 2984 / 3002 loss=2.599, ppl=6.06, wps=5880.8, ups=0.09, wpb=64834, bsz=128, num_updates=14905, lr=9.98888e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=170945
2021-06-20 18:08:02 | INFO | train_inner | epoch 005: 2985 / 3002 loss=2.488, ppl=5.61, wps=5885.3, ups=0.09, wpb=64829, bsz=128, num_updates=14906, lr=9.98887e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=170956
2021-06-20 18:08:13 | INFO | train_inner | epoch 005: 2986 / 3002 loss=2.371, ppl=5.17, wps=5795, ups=0.09, wpb=64887, bsz=128, num_updates=14907, lr=9.98887e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=170968
2021-06-20 18:08:24 | INFO | train_inner | epoch 005: 2987 / 3002 loss=2.445, ppl=5.45, wps=5907.7, ups=0.09, wpb=64842, bsz=128, num_updates=14908, lr=9.98887e-05, gnorm=1.875, loss_scale=8, train_wall=10, gb_free=2.8, wall=170979
2021-06-20 18:08:36 | INFO | train_inner | epoch 005: 2988 / 3002 loss=2.493, ppl=5.63, wps=5796.2, ups=0.09, wpb=64784, bsz=128, num_updates=14909, lr=9.98887e-05, gnorm=2.045, loss_scale=16, train_wall=11, gb_free=2.8, wall=170990
2021-06-20 18:08:46 | INFO | train_inner | epoch 005: 2989 / 3002 loss=2.461, ppl=5.51, wps=5899.7, ups=0.09, wpb=64784, bsz=128, num_updates=14910, lr=9.98887e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=171001
2021-06-20 18:08:58 | INFO | train_inner | epoch 005: 2990 / 3002 loss=2.483, ppl=5.59, wps=5850.1, ups=0.09, wpb=64873, bsz=128, num_updates=14911, lr=9.98887e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=171012
2021-06-20 18:09:09 | INFO | train_inner | epoch 005: 2991 / 3002 loss=2.559, ppl=5.89, wps=5801.8, ups=0.09, wpb=64831, bsz=128, num_updates=14912, lr=9.98887e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=171023
2021-06-20 18:09:19 | INFO | train_inner | epoch 005: 2992 / 3002 loss=2.477, ppl=5.57, wps=6068.4, ups=0.09, wpb=64839, bsz=128, num_updates=14913, lr=9.98887e-05, gnorm=2.298, loss_scale=16, train_wall=10, gb_free=2.8, wall=171034
2021-06-20 18:09:30 | INFO | train_inner | epoch 005: 2993 / 3002 loss=2.438, ppl=5.42, wps=5889.5, ups=0.09, wpb=64873, bsz=128, num_updates=14914, lr=9.98887e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=171045
2021-06-20 18:09:41 | INFO | train_inner | epoch 005: 2994 / 3002 loss=2.561, ppl=5.9, wps=5873.7, ups=0.09, wpb=64832, bsz=128, num_updates=14915, lr=9.98887e-05, gnorm=1.963, loss_scale=16, train_wall=11, gb_free=2.8, wall=171056
2021-06-20 18:09:53 | INFO | train_inner | epoch 005: 2995 / 3002 loss=2.46, ppl=5.5, wps=5877.7, ups=0.09, wpb=64901, bsz=128, num_updates=14916, lr=9.98887e-05, gnorm=1.886, loss_scale=16, train_wall=11, gb_free=2.8, wall=171067
2021-06-20 18:10:04 | INFO | train_inner | epoch 005: 2996 / 3002 loss=2.555, ppl=5.88, wps=5743.2, ups=0.09, wpb=64728, bsz=128, num_updates=14917, lr=9.98887e-05, gnorm=1.855, loss_scale=16, train_wall=11, gb_free=2.8, wall=171078
2021-06-20 18:10:15 | INFO | train_inner | epoch 005: 2997 / 3002 loss=2.351, ppl=5.1, wps=5783.6, ups=0.09, wpb=64829, bsz=128, num_updates=14918, lr=9.98886e-05, gnorm=2.275, loss_scale=16, train_wall=11, gb_free=2.8, wall=171089
2021-06-20 18:10:26 | INFO | train_inner | epoch 005: 2998 / 3002 loss=2.497, ppl=5.65, wps=5810.3, ups=0.09, wpb=64818, bsz=128, num_updates=14919, lr=9.98886e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=171100
2021-06-20 18:10:37 | INFO | train_inner | epoch 005: 2999 / 3002 loss=2.355, ppl=5.11, wps=5789.3, ups=0.09, wpb=64852, bsz=128, num_updates=14920, lr=9.98886e-05, gnorm=1.863, loss_scale=16, train_wall=11, gb_free=2.8, wall=171112
2021-06-20 18:10:48 | INFO | train_inner | epoch 005: 3000 / 3002 loss=2.45, ppl=5.46, wps=5932.6, ups=0.09, wpb=64907, bsz=128, num_updates=14921, lr=9.98886e-05, gnorm=1.937, loss_scale=16, train_wall=10, gb_free=2.8, wall=171123
2021-06-20 18:10:59 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 18:11:06 | INFO | train_inner | epoch 005: 3002 / 3002 loss=2.4, ppl=5.28, wps=2104.2, ups=0.06, wpb=36411, bsz=72, num_updates=14922, lr=9.98886e-05, gnorm=2.57, loss_scale=8, train_wall=17, gb_free=2.8, wall=171140
2021-06-20 18:11:06 | INFO | fairseq_cli.train | begin validation on "valid" subset
2021-06-20 18:26:02 | INFO | valid | epoch 005 | valid on 'valid' subset | loss 2.365 | ppl 5.15 | wps 19686.9 | wpb 506.5 | bsz 1 | num_updates 14922 | best_loss 2.365
2021-06-20 18:26:02 | INFO | fairseq.checkpoint_utils | Preparing to save checkpoint for epoch 5 @ 14922 updates
2021-06-20 18:26:02 | INFO | fairseq.trainer | Saving checkpoint to checkpoints/checkpoint5.pt
2021-06-20 18:26:16 | INFO | fairseq.trainer | Finished saving checkpoint to checkpoints/checkpoint5.pt
2021-06-20 18:33:20 | INFO | fairseq.checkpoint_utils | Saved checkpoint checkpoints/checkpoint5.pt (epoch 5 @ 14922 updates, score 2.365) (writing took 437.672937018011 seconds)
2021-06-20 18:33:20 | INFO | fairseq_cli.train | end of epoch 5 (average epoch stats below)
2021-06-20 18:33:20 | INFO | train | epoch 005 | loss 2.514 | ppl 5.71 | wps 5586.4 | ups 0.09 | wpb 64819.4 | bsz 128 | num_updates 14922 | lr 9.98886e-05 | gnorm 2.04 | loss_scale 8 | train_wall 31919 | gb_free 2.8 | wall 172474
2021-06-20 18:33:20 | INFO | fairseq.trainer | begin training epoch 6
2021-06-20 18:33:20 | INFO | fairseq_cli.train | Start iterating over samples
2021-06-20 18:33:31 | INFO | train_inner | epoch 006: 1 / 3002 loss=2.387, ppl=5.23, wps=48.2, ups=0, wpb=64865, bsz=128, num_updates=14923, lr=9.98886e-05, gnorm=1.938, loss_scale=8, train_wall=10, gb_free=2.8, wall=172485
2021-06-20 18:33:41 | INFO | train_inner | epoch 006: 2 / 3002 loss=2.497, ppl=5.65, wps=6178.9, ups=0.1, wpb=64767, bsz=128, num_updates=14924, lr=9.98886e-05, gnorm=2.015, loss_scale=8, train_wall=10, gb_free=2.8, wall=172495
2021-06-20 18:33:52 | INFO | train_inner | epoch 006: 3 / 3002 loss=2.383, ppl=5.22, wps=6143.6, ups=0.09, wpb=64876, bsz=128, num_updates=14925, lr=9.98886e-05, gnorm=1.929, loss_scale=8, train_wall=10, gb_free=2.8, wall=172506
2021-06-20 18:34:02 | INFO | train_inner | epoch 006: 4 / 3002 loss=2.434, ppl=5.4, wps=6198.3, ups=0.1, wpb=64813, bsz=128, num_updates=14926, lr=9.98886e-05, gnorm=1.927, loss_scale=8, train_wall=10, gb_free=2.8, wall=172516
2021-06-20 18:34:12 | INFO | train_inner | epoch 006: 5 / 3002 loss=2.499, ppl=5.65, wps=6257.8, ups=0.1, wpb=64930, bsz=128, num_updates=14927, lr=9.98886e-05, gnorm=1.999, loss_scale=8, train_wall=10, gb_free=2.8, wall=172527
2021-06-20 18:34:23 | INFO | train_inner | epoch 006: 6 / 3002 loss=2.429, ppl=5.38, wps=6183, ups=0.1, wpb=64762, bsz=128, num_updates=14928, lr=9.98886e-05, gnorm=2.012, loss_scale=8, train_wall=10, gb_free=2.8, wall=172537
2021-06-20 18:34:34 | INFO | train_inner | epoch 006: 7 / 3002 loss=2.434, ppl=5.4, wps=5980.6, ups=0.09, wpb=64818, bsz=128, num_updates=14929, lr=9.98886e-05, gnorm=1.92, loss_scale=8, train_wall=10, gb_free=2.8, wall=172548
2021-06-20 18:34:44 | INFO | train_inner | epoch 006: 8 / 3002 loss=2.39, ppl=5.24, wps=6060.8, ups=0.09, wpb=64847, bsz=128, num_updates=14930, lr=9.98886e-05, gnorm=1.939, loss_scale=8, train_wall=10, gb_free=2.8, wall=172559
2021-06-20 18:34:55 | INFO | train_inner | epoch 006: 9 / 3002 loss=2.541, ppl=5.82, wps=6087.3, ups=0.09, wpb=64858, bsz=128, num_updates=14931, lr=9.98885e-05, gnorm=1.912, loss_scale=8, train_wall=10, gb_free=2.8, wall=172569
2021-06-20 18:35:06 | INFO | train_inner | epoch 006: 10 / 3002 loss=2.42, ppl=5.35, wps=6016.6, ups=0.09, wpb=64830, bsz=128, num_updates=14932, lr=9.98885e-05, gnorm=1.918, loss_scale=8, train_wall=10, gb_free=2.8, wall=172580
2021-06-20 18:35:17 | INFO | train_inner | epoch 006: 11 / 3002 loss=2.475, ppl=5.56, wps=6078.8, ups=0.09, wpb=64808, bsz=128, num_updates=14933, lr=9.98885e-05, gnorm=1.903, loss_scale=8, train_wall=10, gb_free=2.8, wall=172591
2021-06-20 18:35:28 | INFO | train_inner | epoch 006: 12 / 3002 loss=2.456, ppl=5.49, wps=5854.3, ups=0.09, wpb=64878, bsz=128, num_updates=14934, lr=9.98885e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=172602
2021-06-20 18:35:39 | INFO | train_inner | epoch 006: 13 / 3002 loss=2.596, ppl=6.05, wps=5918.6, ups=0.09, wpb=64881, bsz=128, num_updates=14935, lr=9.98885e-05, gnorm=2.055, loss_scale=8, train_wall=10, gb_free=2.8, wall=172613
2021-06-20 18:35:50 | INFO | train_inner | epoch 006: 14 / 3002 loss=2.489, ppl=5.61, wps=5884.9, ups=0.09, wpb=64772, bsz=128, num_updates=14936, lr=9.98885e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=172624
2021-06-20 18:36:00 | INFO | train_inner | epoch 006: 15 / 3002 loss=2.531, ppl=5.78, wps=5974.3, ups=0.09, wpb=64763, bsz=128, num_updates=14937, lr=9.98885e-05, gnorm=1.999, loss_scale=8, train_wall=10, gb_free=2.8, wall=172635
2021-06-20 18:36:11 | INFO | train_inner | epoch 006: 16 / 3002 loss=2.598, ppl=6.05, wps=6071.4, ups=0.09, wpb=64804, bsz=128, num_updates=14938, lr=9.98885e-05, gnorm=1.998, loss_scale=8, train_wall=10, gb_free=2.8, wall=172645
2021-06-20 18:36:22 | INFO | train_inner | epoch 006: 17 / 3002 loss=2.477, ppl=5.57, wps=5943.7, ups=0.09, wpb=64817, bsz=128, num_updates=14939, lr=9.98885e-05, gnorm=2.129, loss_scale=8, train_wall=10, gb_free=2.8, wall=172656
2021-06-20 18:36:33 | INFO | train_inner | epoch 006: 18 / 3002 loss=2.427, ppl=5.38, wps=5896, ups=0.09, wpb=64718, bsz=128, num_updates=14940, lr=9.98885e-05, gnorm=2.012, loss_scale=8, train_wall=11, gb_free=2.8, wall=172667
2021-06-20 18:36:44 | INFO | train_inner | epoch 006: 19 / 3002 loss=2.493, ppl=5.63, wps=5891.1, ups=0.09, wpb=64732, bsz=128, num_updates=14941, lr=9.98885e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=172678
2021-06-20 18:36:55 | INFO | train_inner | epoch 006: 20 / 3002 loss=2.657, ppl=6.31, wps=5857.2, ups=0.09, wpb=64799, bsz=128, num_updates=14942, lr=9.98885e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=172689
2021-06-20 18:37:06 | INFO | train_inner | epoch 006: 21 / 3002 loss=2.554, ppl=5.87, wps=5729.6, ups=0.09, wpb=64838, bsz=128, num_updates=14943, lr=9.98884e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=172701
2021-06-20 18:37:18 | INFO | train_inner | epoch 006: 22 / 3002 loss=2.328, ppl=5.02, wps=5764.1, ups=0.09, wpb=64872, bsz=128, num_updates=14944, lr=9.98884e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=172712
2021-06-20 18:37:29 | INFO | train_inner | epoch 006: 23 / 3002 loss=2.537, ppl=5.8, wps=5838.1, ups=0.09, wpb=64789, bsz=128, num_updates=14945, lr=9.98884e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=172723
2021-06-20 18:37:40 | INFO | train_inner | epoch 006: 24 / 3002 loss=2.675, ppl=6.38, wps=5834, ups=0.09, wpb=64885, bsz=128, num_updates=14946, lr=9.98884e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=172734
2021-06-20 18:37:51 | INFO | train_inner | epoch 006: 25 / 3002 loss=2.487, ppl=5.61, wps=5872.9, ups=0.09, wpb=64816, bsz=128, num_updates=14947, lr=9.98884e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=172745
2021-06-20 18:38:02 | INFO | train_inner | epoch 006: 26 / 3002 loss=2.518, ppl=5.73, wps=5798.5, ups=0.09, wpb=64743, bsz=128, num_updates=14948, lr=9.98884e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=172756
2021-06-20 18:38:13 | INFO | train_inner | epoch 006: 27 / 3002 loss=2.407, ppl=5.3, wps=5778.3, ups=0.09, wpb=64740, bsz=128, num_updates=14949, lr=9.98884e-05, gnorm=1.904, loss_scale=8, train_wall=11, gb_free=2.8, wall=172768
2021-06-20 18:38:24 | INFO | train_inner | epoch 006: 28 / 3002 loss=2.328, ppl=5.02, wps=5979.2, ups=0.09, wpb=64879, bsz=128, num_updates=14950, lr=9.98884e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=172778
2021-06-20 18:38:35 | INFO | train_inner | epoch 006: 29 / 3002 loss=2.445, ppl=5.45, wps=5775.5, ups=0.09, wpb=64737, bsz=128, num_updates=14951, lr=9.98884e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=172790
2021-06-20 18:38:47 | INFO | train_inner | epoch 006: 30 / 3002 loss=2.561, ppl=5.9, wps=5753.5, ups=0.09, wpb=64794, bsz=128, num_updates=14952, lr=9.98884e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=172801
2021-06-20 18:38:58 | INFO | train_inner | epoch 006: 31 / 3002 loss=2.39, ppl=5.24, wps=5792.8, ups=0.09, wpb=64788, bsz=128, num_updates=14953, lr=9.98884e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=172812
2021-06-20 18:39:09 | INFO | train_inner | epoch 006: 32 / 3002 loss=2.498, ppl=5.65, wps=5764.9, ups=0.09, wpb=64881, bsz=128, num_updates=14954, lr=9.98884e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=172823
2021-06-20 18:39:20 | INFO | train_inner | epoch 006: 33 / 3002 loss=2.451, ppl=5.47, wps=5875.4, ups=0.09, wpb=64714, bsz=128, num_updates=14955, lr=9.98884e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=172834
2021-06-20 18:39:31 | INFO | train_inner | epoch 006: 34 / 3002 loss=2.51, ppl=5.7, wps=5893.5, ups=0.09, wpb=64763, bsz=128, num_updates=14956, lr=9.98883e-05, gnorm=2.091, loss_scale=8, train_wall=11, gb_free=2.8, wall=172845
2021-06-20 18:39:42 | INFO | train_inner | epoch 006: 35 / 3002 loss=2.425, ppl=5.37, wps=5883.2, ups=0.09, wpb=64773, bsz=128, num_updates=14957, lr=9.98883e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=172856
2021-06-20 18:39:53 | INFO | train_inner | epoch 006: 36 / 3002 loss=2.503, ppl=5.67, wps=5782.8, ups=0.09, wpb=64874, bsz=128, num_updates=14958, lr=9.98883e-05, gnorm=2.297, loss_scale=8, train_wall=11, gb_free=2.8, wall=172868
2021-06-20 18:40:05 | INFO | train_inner | epoch 006: 37 / 3002 loss=2.431, ppl=5.39, wps=5691.8, ups=0.09, wpb=64832, bsz=128, num_updates=14959, lr=9.98883e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=172879
2021-06-20 18:40:16 | INFO | train_inner | epoch 006: 38 / 3002 loss=2.226, ppl=4.68, wps=5711.9, ups=0.09, wpb=64832, bsz=128, num_updates=14960, lr=9.98883e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=172890
2021-06-20 18:40:27 | INFO | train_inner | epoch 006: 39 / 3002 loss=2.367, ppl=5.16, wps=5941.6, ups=0.09, wpb=64893, bsz=128, num_updates=14961, lr=9.98883e-05, gnorm=1.926, loss_scale=8, train_wall=10, gb_free=2.8, wall=172901
2021-06-20 18:40:38 | INFO | train_inner | epoch 006: 40 / 3002 loss=2.493, ppl=5.63, wps=5816.8, ups=0.09, wpb=64841, bsz=128, num_updates=14962, lr=9.98883e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=172912
2021-06-20 18:40:49 | INFO | train_inner | epoch 006: 41 / 3002 loss=2.346, ppl=5.08, wps=5806.7, ups=0.09, wpb=64904, bsz=128, num_updates=14963, lr=9.98883e-05, gnorm=1.859, loss_scale=8, train_wall=11, gb_free=2.8, wall=172924
2021-06-20 18:41:01 | INFO | train_inner | epoch 006: 42 / 3002 loss=2.488, ppl=5.61, wps=5744, ups=0.09, wpb=64809, bsz=128, num_updates=14964, lr=9.98883e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=172935
2021-06-20 18:41:12 | INFO | train_inner | epoch 006: 43 / 3002 loss=2.563, ppl=5.91, wps=5773, ups=0.09, wpb=64860, bsz=128, num_updates=14965, lr=9.98883e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=172946
2021-06-20 18:41:23 | INFO | train_inner | epoch 006: 44 / 3002 loss=2.647, ppl=6.26, wps=5817.4, ups=0.09, wpb=64844, bsz=128, num_updates=14966, lr=9.98883e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=172957
2021-06-20 18:41:34 | INFO | train_inner | epoch 006: 45 / 3002 loss=2.57, ppl=5.94, wps=5770.6, ups=0.09, wpb=64854, bsz=128, num_updates=14967, lr=9.98883e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=172969
2021-06-20 18:41:45 | INFO | train_inner | epoch 006: 46 / 3002 loss=2.583, ppl=5.99, wps=5811.5, ups=0.09, wpb=64835, bsz=128, num_updates=14968, lr=9.98882e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=172980
2021-06-20 18:41:56 | INFO | train_inner | epoch 006: 47 / 3002 loss=2.487, ppl=5.61, wps=5830, ups=0.09, wpb=64788, bsz=128, num_updates=14969, lr=9.98882e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=172991
2021-06-20 18:42:08 | INFO | train_inner | epoch 006: 48 / 3002 loss=2.465, ppl=5.52, wps=5796.5, ups=0.09, wpb=64903, bsz=128, num_updates=14970, lr=9.98882e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=173002
2021-06-20 18:42:19 | INFO | train_inner | epoch 006: 49 / 3002 loss=2.493, ppl=5.63, wps=5782.2, ups=0.09, wpb=64790, bsz=128, num_updates=14971, lr=9.98882e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=173013
2021-06-20 18:42:30 | INFO | train_inner | epoch 006: 50 / 3002 loss=2.575, ppl=5.96, wps=5684.7, ups=0.09, wpb=64785, bsz=128, num_updates=14972, lr=9.98882e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=173025
2021-06-20 18:42:41 | INFO | train_inner | epoch 006: 51 / 3002 loss=2.652, ppl=6.29, wps=5776.9, ups=0.09, wpb=64858, bsz=128, num_updates=14973, lr=9.98882e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=173036
2021-06-20 18:42:53 | INFO | train_inner | epoch 006: 52 / 3002 loss=2.44, ppl=5.43, wps=5653.8, ups=0.09, wpb=64882, bsz=128, num_updates=14974, lr=9.98882e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=173047
2021-06-20 18:43:04 | INFO | train_inner | epoch 006: 53 / 3002 loss=2.558, ppl=5.89, wps=5779.5, ups=0.09, wpb=64912, bsz=128, num_updates=14975, lr=9.98882e-05, gnorm=1.997, loss_scale=8, train_wall=11, gb_free=2.8, wall=173059
2021-06-20 18:43:15 | INFO | train_inner | epoch 006: 54 / 3002 loss=2.541, ppl=5.82, wps=5759.8, ups=0.09, wpb=64879, bsz=128, num_updates=14976, lr=9.98882e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=173070
2021-06-20 18:43:26 | INFO | train_inner | epoch 006: 55 / 3002 loss=2.379, ppl=5.2, wps=5911.9, ups=0.09, wpb=64825, bsz=128, num_updates=14977, lr=9.98882e-05, gnorm=1.91, loss_scale=8, train_wall=10, gb_free=2.8, wall=173081
2021-06-20 18:43:37 | INFO | train_inner | epoch 006: 56 / 3002 loss=2.487, ppl=5.61, wps=5852.4, ups=0.09, wpb=64805, bsz=128, num_updates=14978, lr=9.98882e-05, gnorm=2.131, loss_scale=8, train_wall=11, gb_free=2.8, wall=173092
2021-06-20 18:43:49 | INFO | train_inner | epoch 006: 57 / 3002 loss=2.461, ppl=5.51, wps=5756.5, ups=0.09, wpb=64820, bsz=128, num_updates=14979, lr=9.98882e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=173103
2021-06-20 18:44:00 | INFO | train_inner | epoch 006: 58 / 3002 loss=2.524, ppl=5.75, wps=5882.4, ups=0.09, wpb=64807, bsz=128, num_updates=14980, lr=9.98882e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=173114
2021-06-20 18:44:11 | INFO | train_inner | epoch 006: 59 / 3002 loss=2.359, ppl=5.13, wps=5778.3, ups=0.09, wpb=64850, bsz=128, num_updates=14981, lr=9.98881e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=173125
2021-06-20 18:44:22 | INFO | train_inner | epoch 006: 60 / 3002 loss=2.46, ppl=5.5, wps=5888.5, ups=0.09, wpb=64869, bsz=128, num_updates=14982, lr=9.98881e-05, gnorm=2.088, loss_scale=8, train_wall=11, gb_free=2.8, wall=173136
2021-06-20 18:44:33 | INFO | train_inner | epoch 006: 61 / 3002 loss=2.309, ppl=4.96, wps=5787.8, ups=0.09, wpb=64894, bsz=128, num_updates=14983, lr=9.98881e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=173148
2021-06-20 18:44:44 | INFO | train_inner | epoch 006: 62 / 3002 loss=2.496, ppl=5.64, wps=5801.1, ups=0.09, wpb=64835, bsz=128, num_updates=14984, lr=9.98881e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=173159
2021-06-20 18:44:55 | INFO | train_inner | epoch 006: 63 / 3002 loss=2.571, ppl=5.94, wps=5956, ups=0.09, wpb=64794, bsz=128, num_updates=14985, lr=9.98881e-05, gnorm=1.985, loss_scale=8, train_wall=10, gb_free=2.8, wall=173170
2021-06-20 18:45:06 | INFO | train_inner | epoch 006: 64 / 3002 loss=2.533, ppl=5.79, wps=5881.1, ups=0.09, wpb=64815, bsz=128, num_updates=14986, lr=9.98881e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=173181
2021-06-20 18:45:17 | INFO | train_inner | epoch 006: 65 / 3002 loss=2.448, ppl=5.46, wps=5884.6, ups=0.09, wpb=64840, bsz=128, num_updates=14987, lr=9.98881e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=173192
2021-06-20 18:45:28 | INFO | train_inner | epoch 006: 66 / 3002 loss=2.615, ppl=6.13, wps=5832.5, ups=0.09, wpb=64831, bsz=128, num_updates=14988, lr=9.98881e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=173203
2021-06-20 18:45:40 | INFO | train_inner | epoch 006: 67 / 3002 loss=2.372, ppl=5.18, wps=5837.9, ups=0.09, wpb=64866, bsz=128, num_updates=14989, lr=9.98881e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=173214
2021-06-20 18:45:51 | INFO | train_inner | epoch 006: 68 / 3002 loss=2.513, ppl=5.71, wps=5789.8, ups=0.09, wpb=64849, bsz=128, num_updates=14990, lr=9.98881e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=173225
2021-06-20 18:46:02 | INFO | train_inner | epoch 006: 69 / 3002 loss=2.407, ppl=5.3, wps=5767.6, ups=0.09, wpb=64891, bsz=128, num_updates=14991, lr=9.98881e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=173236
2021-06-20 18:46:13 | INFO | train_inner | epoch 006: 70 / 3002 loss=2.526, ppl=5.76, wps=5772, ups=0.09, wpb=64818, bsz=128, num_updates=14992, lr=9.98881e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=173248
2021-06-20 18:46:25 | INFO | train_inner | epoch 006: 71 / 3002 loss=2.455, ppl=5.48, wps=5707.2, ups=0.09, wpb=64799, bsz=128, num_updates=14993, lr=9.9888e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=173259
2021-06-20 18:46:36 | INFO | train_inner | epoch 006: 72 / 3002 loss=2.395, ppl=5.26, wps=5872.5, ups=0.09, wpb=64827, bsz=128, num_updates=14994, lr=9.9888e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=173270
2021-06-20 18:46:47 | INFO | train_inner | epoch 006: 73 / 3002 loss=2.423, ppl=5.36, wps=5767.5, ups=0.09, wpb=64787, bsz=128, num_updates=14995, lr=9.9888e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=173281
2021-06-20 18:46:58 | INFO | train_inner | epoch 006: 74 / 3002 loss=2.429, ppl=5.39, wps=5891.4, ups=0.09, wpb=64893, bsz=128, num_updates=14996, lr=9.9888e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=173292
2021-06-20 18:47:09 | INFO | train_inner | epoch 006: 75 / 3002 loss=2.454, ppl=5.48, wps=5759.3, ups=0.09, wpb=64793, bsz=128, num_updates=14997, lr=9.9888e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=173303
2021-06-20 18:47:20 | INFO | train_inner | epoch 006: 76 / 3002 loss=2.409, ppl=5.31, wps=5828.8, ups=0.09, wpb=64858, bsz=128, num_updates=14998, lr=9.9888e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=173315
2021-06-20 18:47:31 | INFO | train_inner | epoch 006: 77 / 3002 loss=2.468, ppl=5.53, wps=5858, ups=0.09, wpb=64840, bsz=128, num_updates=14999, lr=9.9888e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=173326
2021-06-20 18:47:42 | INFO | train_inner | epoch 006: 78 / 3002 loss=2.455, ppl=5.48, wps=5849.4, ups=0.09, wpb=64900, bsz=128, num_updates=15000, lr=9.9888e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=173337
2021-06-20 18:47:54 | INFO | train_inner | epoch 006: 79 / 3002 loss=2.421, ppl=5.35, wps=5838.6, ups=0.09, wpb=64812, bsz=128, num_updates=15001, lr=9.9888e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=173348
2021-06-20 18:48:05 | INFO | train_inner | epoch 006: 80 / 3002 loss=2.434, ppl=5.41, wps=5819.1, ups=0.09, wpb=64890, bsz=128, num_updates=15002, lr=9.9888e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=173359
2021-06-20 18:48:16 | INFO | train_inner | epoch 006: 81 / 3002 loss=2.437, ppl=5.42, wps=5777.1, ups=0.09, wpb=64839, bsz=128, num_updates=15003, lr=9.9888e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=173370
2021-06-20 18:48:27 | INFO | train_inner | epoch 006: 82 / 3002 loss=2.328, ppl=5.02, wps=5891.5, ups=0.09, wpb=64851, bsz=128, num_updates=15004, lr=9.9888e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=173381
2021-06-20 18:48:38 | INFO | train_inner | epoch 006: 83 / 3002 loss=2.288, ppl=4.88, wps=5838.7, ups=0.09, wpb=64878, bsz=128, num_updates=15005, lr=9.9888e-05, gnorm=1.87, loss_scale=8, train_wall=11, gb_free=2.8, wall=173392
2021-06-20 18:48:49 | INFO | train_inner | epoch 006: 84 / 3002 loss=2.379, ppl=5.2, wps=5732.5, ups=0.09, wpb=64908, bsz=128, num_updates=15006, lr=9.98879e-05, gnorm=2.003, loss_scale=8, train_wall=11, gb_free=2.8, wall=173404
2021-06-20 18:49:00 | INFO | train_inner | epoch 006: 85 / 3002 loss=2.541, ppl=5.82, wps=5844, ups=0.09, wpb=64872, bsz=128, num_updates=15007, lr=9.98879e-05, gnorm=2.072, loss_scale=8, train_wall=11, gb_free=2.8, wall=173415
2021-06-20 18:49:12 | INFO | train_inner | epoch 006: 86 / 3002 loss=2.457, ppl=5.49, wps=5821.7, ups=0.09, wpb=64867, bsz=128, num_updates=15008, lr=9.98879e-05, gnorm=2.096, loss_scale=8, train_wall=11, gb_free=2.8, wall=173426
2021-06-20 18:49:23 | INFO | train_inner | epoch 006: 87 / 3002 loss=2.342, ppl=5.07, wps=5882.7, ups=0.09, wpb=64843, bsz=128, num_updates=15009, lr=9.98879e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=173437
2021-06-20 18:49:34 | INFO | train_inner | epoch 006: 88 / 3002 loss=2.517, ppl=5.72, wps=5791.5, ups=0.09, wpb=64756, bsz=128, num_updates=15010, lr=9.98879e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=173448
2021-06-20 18:49:45 | INFO | train_inner | epoch 006: 89 / 3002 loss=2.415, ppl=5.33, wps=5713.4, ups=0.09, wpb=64742, bsz=128, num_updates=15011, lr=9.98879e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=173459
2021-06-20 18:49:56 | INFO | train_inner | epoch 006: 90 / 3002 loss=2.483, ppl=5.59, wps=5764.6, ups=0.09, wpb=64793, bsz=128, num_updates=15012, lr=9.98879e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=173471
2021-06-20 18:50:08 | INFO | train_inner | epoch 006: 91 / 3002 loss=2.495, ppl=5.64, wps=5662.8, ups=0.09, wpb=64830, bsz=128, num_updates=15013, lr=9.98879e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=173482
2021-06-20 18:50:19 | INFO | train_inner | epoch 006: 92 / 3002 loss=2.379, ppl=5.2, wps=5784.1, ups=0.09, wpb=64850, bsz=128, num_updates=15014, lr=9.98879e-05, gnorm=1.847, loss_scale=8, train_wall=11, gb_free=2.8, wall=173493
2021-06-20 18:50:30 | INFO | train_inner | epoch 006: 93 / 3002 loss=2.426, ppl=5.37, wps=5895.9, ups=0.09, wpb=64863, bsz=128, num_updates=15015, lr=9.98879e-05, gnorm=2.033, loss_scale=8, train_wall=11, gb_free=2.8, wall=173504
2021-06-20 18:50:41 | INFO | train_inner | epoch 006: 94 / 3002 loss=2.472, ppl=5.55, wps=5907.8, ups=0.09, wpb=64881, bsz=128, num_updates=15016, lr=9.98879e-05, gnorm=1.96, loss_scale=8, train_wall=10, gb_free=2.8, wall=173515
2021-06-20 18:50:52 | INFO | train_inner | epoch 006: 95 / 3002 loss=2.412, ppl=5.32, wps=5818.9, ups=0.09, wpb=64775, bsz=128, num_updates=15017, lr=9.98879e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=173526
2021-06-20 18:51:03 | INFO | train_inner | epoch 006: 96 / 3002 loss=2.448, ppl=5.46, wps=5741, ups=0.09, wpb=64810, bsz=128, num_updates=15018, lr=9.98878e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=173538
2021-06-20 18:51:15 | INFO | train_inner | epoch 006: 97 / 3002 loss=2.491, ppl=5.62, wps=5721.2, ups=0.09, wpb=64853, bsz=128, num_updates=15019, lr=9.98878e-05, gnorm=2.067, loss_scale=8, train_wall=11, gb_free=2.8, wall=173549
2021-06-20 18:51:26 | INFO | train_inner | epoch 006: 98 / 3002 loss=2.376, ppl=5.19, wps=5868, ups=0.09, wpb=64810, bsz=128, num_updates=15020, lr=9.98878e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=173560
2021-06-20 18:51:37 | INFO | train_inner | epoch 006: 99 / 3002 loss=2.355, ppl=5.12, wps=5851.5, ups=0.09, wpb=64798, bsz=128, num_updates=15021, lr=9.98878e-05, gnorm=2.333, loss_scale=8, train_wall=11, gb_free=2.8, wall=173571
2021-06-20 18:51:48 | INFO | train_inner | epoch 006: 100 / 3002 loss=2.368, ppl=5.16, wps=5722.8, ups=0.09, wpb=64802, bsz=128, num_updates=15022, lr=9.98878e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=173583
2021-06-20 18:51:59 | INFO | train_inner | epoch 006: 101 / 3002 loss=2.546, ppl=5.84, wps=5840.5, ups=0.09, wpb=64781, bsz=128, num_updates=15023, lr=9.98878e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=173594
2021-06-20 18:52:10 | INFO | train_inner | epoch 006: 102 / 3002 loss=2.376, ppl=5.19, wps=5845.3, ups=0.09, wpb=64932, bsz=128, num_updates=15024, lr=9.98878e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=173605
2021-06-20 18:52:21 | INFO | train_inner | epoch 006: 103 / 3002 loss=2.462, ppl=5.51, wps=5859.5, ups=0.09, wpb=64844, bsz=128, num_updates=15025, lr=9.98878e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=173616
2021-06-20 18:52:33 | INFO | train_inner | epoch 006: 104 / 3002 loss=2.433, ppl=5.4, wps=5839.7, ups=0.09, wpb=64879, bsz=128, num_updates=15026, lr=9.98878e-05, gnorm=1.871, loss_scale=8, train_wall=11, gb_free=2.8, wall=173627
2021-06-20 18:52:44 | INFO | train_inner | epoch 006: 105 / 3002 loss=2.538, ppl=5.81, wps=5840.7, ups=0.09, wpb=64828, bsz=128, num_updates=15027, lr=9.98878e-05, gnorm=1.842, loss_scale=8, train_wall=11, gb_free=2.8, wall=173638
2021-06-20 18:52:55 | INFO | train_inner | epoch 006: 106 / 3002 loss=2.366, ppl=5.15, wps=5815.6, ups=0.09, wpb=64849, bsz=128, num_updates=15028, lr=9.98878e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=173649
2021-06-20 18:53:06 | INFO | train_inner | epoch 006: 107 / 3002 loss=2.457, ppl=5.49, wps=5912.6, ups=0.09, wpb=64855, bsz=128, num_updates=15029, lr=9.98878e-05, gnorm=1.908, loss_scale=8, train_wall=10, gb_free=2.8, wall=173660
2021-06-20 18:53:17 | INFO | train_inner | epoch 006: 108 / 3002 loss=2.356, ppl=5.12, wps=5687, ups=0.09, wpb=64784, bsz=128, num_updates=15030, lr=9.98878e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=173672
2021-06-20 18:53:29 | INFO | train_inner | epoch 006: 109 / 3002 loss=2.496, ppl=5.64, wps=5718.4, ups=0.09, wpb=64804, bsz=128, num_updates=15031, lr=9.98877e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=173683
2021-06-20 18:53:40 | INFO | train_inner | epoch 006: 110 / 3002 loss=2.635, ppl=6.21, wps=5897.7, ups=0.09, wpb=64832, bsz=128, num_updates=15032, lr=9.98877e-05, gnorm=1.867, loss_scale=8, train_wall=11, gb_free=2.8, wall=173694
2021-06-20 18:53:51 | INFO | train_inner | epoch 006: 111 / 3002 loss=2.511, ppl=5.7, wps=5820.9, ups=0.09, wpb=64801, bsz=128, num_updates=15033, lr=9.98877e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=173705
2021-06-20 18:54:02 | INFO | train_inner | epoch 006: 112 / 3002 loss=2.467, ppl=5.53, wps=5924.9, ups=0.09, wpb=64871, bsz=128, num_updates=15034, lr=9.98877e-05, gnorm=1.909, loss_scale=8, train_wall=10, gb_free=2.8, wall=173716
2021-06-20 18:54:13 | INFO | train_inner | epoch 006: 113 / 3002 loss=2.289, ppl=4.89, wps=5810.1, ups=0.09, wpb=64826, bsz=128, num_updates=15035, lr=9.98877e-05, gnorm=1.818, loss_scale=8, train_wall=11, gb_free=2.8, wall=173727
2021-06-20 18:54:24 | INFO | train_inner | epoch 006: 114 / 3002 loss=2.405, ppl=5.3, wps=5808.4, ups=0.09, wpb=64878, bsz=128, num_updates=15036, lr=9.98877e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=173738
2021-06-20 18:54:35 | INFO | train_inner | epoch 006: 115 / 3002 loss=2.577, ppl=5.97, wps=5769.5, ups=0.09, wpb=64834, bsz=128, num_updates=15037, lr=9.98877e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=173749
2021-06-20 18:54:46 | INFO | train_inner | epoch 006: 116 / 3002 loss=2.441, ppl=5.43, wps=5819.9, ups=0.09, wpb=64861, bsz=128, num_updates=15038, lr=9.98877e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=173761
2021-06-20 18:54:57 | INFO | train_inner | epoch 006: 117 / 3002 loss=2.571, ppl=5.94, wps=5863.9, ups=0.09, wpb=64808, bsz=128, num_updates=15039, lr=9.98877e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=173772
2021-06-20 18:55:09 | INFO | train_inner | epoch 006: 118 / 3002 loss=2.62, ppl=6.15, wps=5788.7, ups=0.09, wpb=64835, bsz=128, num_updates=15040, lr=9.98877e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=173783
2021-06-20 18:55:20 | INFO | train_inner | epoch 006: 119 / 3002 loss=2.368, ppl=5.16, wps=5904.5, ups=0.09, wpb=64851, bsz=128, num_updates=15041, lr=9.98877e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=173794
2021-06-20 18:55:31 | INFO | train_inner | epoch 006: 120 / 3002 loss=2.552, ppl=5.86, wps=5824.6, ups=0.09, wpb=64814, bsz=128, num_updates=15042, lr=9.98877e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=173805
2021-06-20 18:55:42 | INFO | train_inner | epoch 006: 121 / 3002 loss=2.417, ppl=5.34, wps=5799.6, ups=0.09, wpb=64806, bsz=128, num_updates=15043, lr=9.98876e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=173816
2021-06-20 18:55:53 | INFO | train_inner | epoch 006: 122 / 3002 loss=2.529, ppl=5.77, wps=5839.4, ups=0.09, wpb=64822, bsz=128, num_updates=15044, lr=9.98876e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=173827
2021-06-20 18:56:04 | INFO | train_inner | epoch 006: 123 / 3002 loss=2.404, ppl=5.29, wps=5838.2, ups=0.09, wpb=64763, bsz=128, num_updates=15045, lr=9.98876e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=173838
2021-06-20 18:56:15 | INFO | train_inner | epoch 006: 124 / 3002 loss=2.397, ppl=5.27, wps=5717.6, ups=0.09, wpb=64852, bsz=128, num_updates=15046, lr=9.98876e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=173850
2021-06-20 18:56:26 | INFO | train_inner | epoch 006: 125 / 3002 loss=2.496, ppl=5.64, wps=5857.4, ups=0.09, wpb=64897, bsz=128, num_updates=15047, lr=9.98876e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=173861
2021-06-20 18:56:38 | INFO | train_inner | epoch 006: 126 / 3002 loss=2.514, ppl=5.71, wps=5758.2, ups=0.09, wpb=64810, bsz=128, num_updates=15048, lr=9.98876e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=173872
2021-06-20 18:56:49 | INFO | train_inner | epoch 006: 127 / 3002 loss=2.413, ppl=5.33, wps=5805.8, ups=0.09, wpb=64769, bsz=128, num_updates=15049, lr=9.98876e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=173883
2021-06-20 18:57:00 | INFO | train_inner | epoch 006: 128 / 3002 loss=2.54, ppl=5.82, wps=5645.8, ups=0.09, wpb=64770, bsz=128, num_updates=15050, lr=9.98876e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=173895
2021-06-20 18:57:12 | INFO | train_inner | epoch 006: 129 / 3002 loss=2.534, ppl=5.79, wps=5784.2, ups=0.09, wpb=64782, bsz=128, num_updates=15051, lr=9.98876e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=173906
2021-06-20 18:57:23 | INFO | train_inner | epoch 006: 130 / 3002 loss=2.401, ppl=5.28, wps=5897.3, ups=0.09, wpb=64910, bsz=128, num_updates=15052, lr=9.98876e-05, gnorm=1.954, loss_scale=16, train_wall=11, gb_free=2.8, wall=173917
2021-06-20 18:57:34 | INFO | train_inner | epoch 006: 131 / 3002 loss=2.549, ppl=5.85, wps=5741.2, ups=0.09, wpb=64845, bsz=128, num_updates=15053, lr=9.98876e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=173928
2021-06-20 18:57:45 | INFO | train_inner | epoch 006: 132 / 3002 loss=2.619, ppl=6.14, wps=5753.8, ups=0.09, wpb=64858, bsz=128, num_updates=15054, lr=9.98876e-05, gnorm=2.087, loss_scale=16, train_wall=11, gb_free=2.8, wall=173939
2021-06-20 18:57:56 | INFO | train_inner | epoch 006: 133 / 3002 loss=2.432, ppl=5.4, wps=5887.3, ups=0.09, wpb=64857, bsz=128, num_updates=15055, lr=9.98876e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=173950
2021-06-20 18:58:07 | INFO | train_inner | epoch 006: 134 / 3002 loss=2.54, ppl=5.81, wps=5869.1, ups=0.09, wpb=64803, bsz=128, num_updates=15056, lr=9.98875e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=173962
2021-06-20 18:58:18 | INFO | train_inner | epoch 006: 135 / 3002 loss=2.5, ppl=5.66, wps=5838.4, ups=0.09, wpb=64874, bsz=128, num_updates=15057, lr=9.98875e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=173973
2021-06-20 18:58:30 | INFO | train_inner | epoch 006: 136 / 3002 loss=2.512, ppl=5.7, wps=5756.1, ups=0.09, wpb=64768, bsz=128, num_updates=15058, lr=9.98875e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=173984
2021-06-20 18:58:41 | INFO | train_inner | epoch 006: 137 / 3002 loss=2.574, ppl=5.95, wps=5835.8, ups=0.09, wpb=64766, bsz=128, num_updates=15059, lr=9.98875e-05, gnorm=1.997, loss_scale=16, train_wall=11, gb_free=2.8, wall=173995
2021-06-20 18:58:52 | INFO | train_inner | epoch 006: 138 / 3002 loss=2.526, ppl=5.76, wps=5876.9, ups=0.09, wpb=64858, bsz=128, num_updates=15060, lr=9.98875e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=174006
2021-06-20 18:59:03 | INFO | train_inner | epoch 006: 139 / 3002 loss=2.466, ppl=5.52, wps=5853.7, ups=0.09, wpb=64831, bsz=128, num_updates=15061, lr=9.98875e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=174017
2021-06-20 18:59:14 | INFO | train_inner | epoch 006: 140 / 3002 loss=2.593, ppl=6.03, wps=5829.8, ups=0.09, wpb=64793, bsz=128, num_updates=15062, lr=9.98875e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=174028
2021-06-20 18:59:25 | INFO | train_inner | epoch 006: 141 / 3002 loss=2.687, ppl=6.44, wps=5798.2, ups=0.09, wpb=64853, bsz=128, num_updates=15063, lr=9.98875e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=174039
2021-06-20 18:59:36 | INFO | train_inner | epoch 006: 142 / 3002 loss=2.601, ppl=6.07, wps=5848.2, ups=0.09, wpb=64849, bsz=128, num_updates=15064, lr=9.98875e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=174050
2021-06-20 18:59:47 | INFO | train_inner | epoch 006: 143 / 3002 loss=2.532, ppl=5.79, wps=5788.7, ups=0.09, wpb=64857, bsz=128, num_updates=15065, lr=9.98875e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=174062
2021-06-20 18:59:58 | INFO | train_inner | epoch 006: 144 / 3002 loss=2.502, ppl=5.66, wps=5821.8, ups=0.09, wpb=64876, bsz=128, num_updates=15066, lr=9.98875e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=174073
2021-06-20 19:00:10 | INFO | train_inner | epoch 006: 145 / 3002 loss=2.522, ppl=5.74, wps=5877.2, ups=0.09, wpb=64916, bsz=128, num_updates=15067, lr=9.98875e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=174084
2021-06-20 19:00:21 | INFO | train_inner | epoch 006: 146 / 3002 loss=2.497, ppl=5.65, wps=5729.9, ups=0.09, wpb=64749, bsz=128, num_updates=15068, lr=9.98874e-05, gnorm=2.046, loss_scale=16, train_wall=11, gb_free=2.8, wall=174095
2021-06-20 19:00:32 | INFO | train_inner | epoch 006: 147 / 3002 loss=2.415, ppl=5.33, wps=5827.5, ups=0.09, wpb=64765, bsz=128, num_updates=15069, lr=9.98874e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=174106
2021-06-20 19:00:43 | INFO | train_inner | epoch 006: 148 / 3002 loss=2.612, ppl=6.11, wps=5773.9, ups=0.09, wpb=64809, bsz=128, num_updates=15070, lr=9.98874e-05, gnorm=2.273, loss_scale=16, train_wall=11, gb_free=2.8, wall=174117
2021-06-20 19:00:54 | INFO | train_inner | epoch 006: 149 / 3002 loss=2.622, ppl=6.16, wps=5741.8, ups=0.09, wpb=64779, bsz=128, num_updates=15071, lr=9.98874e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=174129
2021-06-20 19:01:06 | INFO | train_inner | epoch 006: 150 / 3002 loss=2.493, ppl=5.63, wps=5805.9, ups=0.09, wpb=64775, bsz=128, num_updates=15072, lr=9.98874e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=174140
2021-06-20 19:01:17 | INFO | train_inner | epoch 006: 151 / 3002 loss=2.509, ppl=5.69, wps=5870.5, ups=0.09, wpb=64805, bsz=128, num_updates=15073, lr=9.98874e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=174151
2021-06-20 19:01:28 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 19:01:39 | INFO | train_inner | epoch 006: 153 / 3002 loss=2.497, ppl=5.65, wps=2882.5, ups=0.04, wpb=64824, bsz=128, num_updates=15074, lr=9.98874e-05, gnorm=1.979, loss_scale=8, train_wall=22, gb_free=2.8, wall=174173
2021-06-20 19:01:50 | INFO | train_inner | epoch 006: 154 / 3002 loss=2.439, ppl=5.42, wps=5710.6, ups=0.09, wpb=64799, bsz=128, num_updates=15075, lr=9.98874e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=174185
2021-06-20 19:02:02 | INFO | train_inner | epoch 006: 155 / 3002 loss=2.607, ppl=6.09, wps=5727.8, ups=0.09, wpb=64818, bsz=128, num_updates=15076, lr=9.98874e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=174196
2021-06-20 19:02:13 | INFO | train_inner | epoch 006: 156 / 3002 loss=2.472, ppl=5.55, wps=5717, ups=0.09, wpb=64832, bsz=128, num_updates=15077, lr=9.98874e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=174207
2021-06-20 19:02:24 | INFO | train_inner | epoch 006: 157 / 3002 loss=2.456, ppl=5.49, wps=5727.4, ups=0.09, wpb=64800, bsz=128, num_updates=15078, lr=9.98874e-05, gnorm=1.858, loss_scale=8, train_wall=11, gb_free=2.8, wall=174219
2021-06-20 19:02:36 | INFO | train_inner | epoch 006: 158 / 3002 loss=2.298, ppl=4.92, wps=5788.9, ups=0.09, wpb=64754, bsz=128, num_updates=15079, lr=9.98874e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=174230
2021-06-20 19:02:47 | INFO | train_inner | epoch 006: 159 / 3002 loss=2.551, ppl=5.86, wps=5872.6, ups=0.09, wpb=64898, bsz=128, num_updates=15080, lr=9.98874e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=174241
2021-06-20 19:02:58 | INFO | train_inner | epoch 006: 160 / 3002 loss=2.627, ppl=6.18, wps=5861.1, ups=0.09, wpb=64824, bsz=128, num_updates=15081, lr=9.98873e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=174252
2021-06-20 19:03:09 | INFO | train_inner | epoch 006: 161 / 3002 loss=2.44, ppl=5.43, wps=5830.1, ups=0.09, wpb=64835, bsz=128, num_updates=15082, lr=9.98873e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=174263
2021-06-20 19:03:20 | INFO | train_inner | epoch 006: 162 / 3002 loss=2.38, ppl=5.2, wps=5779.1, ups=0.09, wpb=64859, bsz=128, num_updates=15083, lr=9.98873e-05, gnorm=1.814, loss_scale=8, train_wall=11, gb_free=2.8, wall=174274
2021-06-20 19:03:31 | INFO | train_inner | epoch 006: 163 / 3002 loss=2.516, ppl=5.72, wps=5792.2, ups=0.09, wpb=64787, bsz=128, num_updates=15084, lr=9.98873e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=174286
2021-06-20 19:03:43 | INFO | train_inner | epoch 006: 164 / 3002 loss=2.478, ppl=5.57, wps=5755.5, ups=0.09, wpb=64831, bsz=128, num_updates=15085, lr=9.98873e-05, gnorm=1.909, loss_scale=8, train_wall=11, gb_free=2.8, wall=174297
2021-06-20 19:03:54 | INFO | train_inner | epoch 006: 165 / 3002 loss=2.549, ppl=5.85, wps=5741.7, ups=0.09, wpb=64730, bsz=128, num_updates=15086, lr=9.98873e-05, gnorm=2.267, loss_scale=8, train_wall=11, gb_free=2.8, wall=174308
2021-06-20 19:04:05 | INFO | train_inner | epoch 006: 166 / 3002 loss=2.47, ppl=5.54, wps=5734, ups=0.09, wpb=64810, bsz=128, num_updates=15087, lr=9.98873e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=174319
2021-06-20 19:04:16 | INFO | train_inner | epoch 006: 167 / 3002 loss=2.558, ppl=5.89, wps=5788.6, ups=0.09, wpb=64782, bsz=128, num_updates=15088, lr=9.98873e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=174331
2021-06-20 19:04:27 | INFO | train_inner | epoch 006: 168 / 3002 loss=2.568, ppl=5.93, wps=5900.4, ups=0.09, wpb=64815, bsz=128, num_updates=15089, lr=9.98873e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=174342
2021-06-20 19:04:39 | INFO | train_inner | epoch 006: 169 / 3002 loss=2.321, ppl=5, wps=5764, ups=0.09, wpb=64843, bsz=128, num_updates=15090, lr=9.98873e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=174353
2021-06-20 19:04:50 | INFO | train_inner | epoch 006: 170 / 3002 loss=2.439, ppl=5.42, wps=5804.6, ups=0.09, wpb=64919, bsz=128, num_updates=15091, lr=9.98873e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=174364
2021-06-20 19:05:01 | INFO | train_inner | epoch 006: 171 / 3002 loss=2.51, ppl=5.7, wps=5739.8, ups=0.09, wpb=64785, bsz=128, num_updates=15092, lr=9.98873e-05, gnorm=2.087, loss_scale=8, train_wall=11, gb_free=2.8, wall=174375
2021-06-20 19:05:12 | INFO | train_inner | epoch 006: 172 / 3002 loss=2.411, ppl=5.32, wps=5810.2, ups=0.09, wpb=64737, bsz=128, num_updates=15093, lr=9.98872e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=174386
2021-06-20 19:05:23 | INFO | train_inner | epoch 006: 173 / 3002 loss=2.52, ppl=5.73, wps=5826.4, ups=0.09, wpb=64778, bsz=128, num_updates=15094, lr=9.98872e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=174398
2021-06-20 19:05:35 | INFO | train_inner | epoch 006: 174 / 3002 loss=2.62, ppl=6.15, wps=5779.4, ups=0.09, wpb=64895, bsz=128, num_updates=15095, lr=9.98872e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=174409
2021-06-20 19:05:46 | INFO | train_inner | epoch 006: 175 / 3002 loss=2.476, ppl=5.56, wps=5851.8, ups=0.09, wpb=64836, bsz=128, num_updates=15096, lr=9.98872e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=174420
2021-06-20 19:05:57 | INFO | train_inner | epoch 006: 176 / 3002 loss=2.585, ppl=6, wps=5748.5, ups=0.09, wpb=64807, bsz=128, num_updates=15097, lr=9.98872e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=174431
2021-06-20 19:06:08 | INFO | train_inner | epoch 006: 177 / 3002 loss=2.415, ppl=5.33, wps=5821.5, ups=0.09, wpb=64862, bsz=128, num_updates=15098, lr=9.98872e-05, gnorm=2.086, loss_scale=8, train_wall=11, gb_free=2.8, wall=174442
2021-06-20 19:06:19 | INFO | train_inner | epoch 006: 178 / 3002 loss=2.421, ppl=5.36, wps=5725.4, ups=0.09, wpb=64854, bsz=128, num_updates=15099, lr=9.98872e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=174454
2021-06-20 19:06:30 | INFO | train_inner | epoch 006: 179 / 3002 loss=2.509, ppl=5.69, wps=5878.6, ups=0.09, wpb=64867, bsz=128, num_updates=15100, lr=9.98872e-05, gnorm=1.922, loss_scale=8, train_wall=11, gb_free=2.8, wall=174465
2021-06-20 19:06:41 | INFO | train_inner | epoch 006: 180 / 3002 loss=2.432, ppl=5.4, wps=5900.5, ups=0.09, wpb=64775, bsz=128, num_updates=15101, lr=9.98872e-05, gnorm=2.147, loss_scale=8, train_wall=11, gb_free=2.8, wall=174476
2021-06-20 19:06:52 | INFO | train_inner | epoch 006: 181 / 3002 loss=2.403, ppl=5.29, wps=5822.8, ups=0.09, wpb=64792, bsz=128, num_updates=15102, lr=9.98872e-05, gnorm=1.998, loss_scale=8, train_wall=11, gb_free=2.8, wall=174487
2021-06-20 19:07:04 | INFO | train_inner | epoch 006: 182 / 3002 loss=2.469, ppl=5.54, wps=5820, ups=0.09, wpb=64850, bsz=128, num_updates=15103, lr=9.98872e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=174498
2021-06-20 19:07:15 | INFO | train_inner | epoch 006: 183 / 3002 loss=2.48, ppl=5.58, wps=5767.9, ups=0.09, wpb=64878, bsz=128, num_updates=15104, lr=9.98872e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=174509
2021-06-20 19:07:26 | INFO | train_inner | epoch 006: 184 / 3002 loss=2.401, ppl=5.28, wps=5741.7, ups=0.09, wpb=64849, bsz=128, num_updates=15105, lr=9.98872e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=174520
2021-06-20 19:07:37 | INFO | train_inner | epoch 006: 185 / 3002 loss=2.41, ppl=5.32, wps=5807.7, ups=0.09, wpb=64849, bsz=128, num_updates=15106, lr=9.98871e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=174532
2021-06-20 19:07:49 | INFO | train_inner | epoch 006: 186 / 3002 loss=2.481, ppl=5.58, wps=5779.5, ups=0.09, wpb=64788, bsz=128, num_updates=15107, lr=9.98871e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=174543
2021-06-20 19:08:00 | INFO | train_inner | epoch 006: 187 / 3002 loss=2.591, ppl=6.03, wps=5687.1, ups=0.09, wpb=64846, bsz=128, num_updates=15108, lr=9.98871e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=174554
2021-06-20 19:08:11 | INFO | train_inner | epoch 006: 188 / 3002 loss=2.499, ppl=5.65, wps=5750, ups=0.09, wpb=64801, bsz=128, num_updates=15109, lr=9.98871e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=174566
2021-06-20 19:08:22 | INFO | train_inner | epoch 006: 189 / 3002 loss=2.528, ppl=5.77, wps=5780.8, ups=0.09, wpb=64916, bsz=128, num_updates=15110, lr=9.98871e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=174577
2021-06-20 19:08:34 | INFO | train_inner | epoch 006: 190 / 3002 loss=2.506, ppl=5.68, wps=5767.4, ups=0.09, wpb=64747, bsz=128, num_updates=15111, lr=9.98871e-05, gnorm=2.354, loss_scale=8, train_wall=11, gb_free=2.8, wall=174588
2021-06-20 19:08:45 | INFO | train_inner | epoch 006: 191 / 3002 loss=2.527, ppl=5.77, wps=5838.6, ups=0.09, wpb=64872, bsz=128, num_updates=15112, lr=9.98871e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=174599
2021-06-20 19:08:56 | INFO | train_inner | epoch 006: 192 / 3002 loss=2.466, ppl=5.52, wps=5771.8, ups=0.09, wpb=64878, bsz=128, num_updates=15113, lr=9.98871e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=174610
2021-06-20 19:09:07 | INFO | train_inner | epoch 006: 193 / 3002 loss=2.39, ppl=5.24, wps=5702.4, ups=0.09, wpb=64774, bsz=128, num_updates=15114, lr=9.98871e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=174622
2021-06-20 19:09:18 | INFO | train_inner | epoch 006: 194 / 3002 loss=2.563, ppl=5.91, wps=5869.5, ups=0.09, wpb=64778, bsz=128, num_updates=15115, lr=9.98871e-05, gnorm=2.153, loss_scale=8, train_wall=11, gb_free=2.8, wall=174633
2021-06-20 19:09:30 | INFO | train_inner | epoch 006: 195 / 3002 loss=2.399, ppl=5.27, wps=5817.9, ups=0.09, wpb=64880, bsz=128, num_updates=15116, lr=9.98871e-05, gnorm=2.018, loss_scale=8, train_wall=11, gb_free=2.8, wall=174644
2021-06-20 19:09:41 | INFO | train_inner | epoch 006: 196 / 3002 loss=2.382, ppl=5.21, wps=5889.7, ups=0.09, wpb=64766, bsz=128, num_updates=15117, lr=9.98871e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=174655
2021-06-20 19:09:52 | INFO | train_inner | epoch 006: 197 / 3002 loss=2.535, ppl=5.8, wps=5773, ups=0.09, wpb=64795, bsz=128, num_updates=15118, lr=9.9887e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=174666
2021-06-20 19:10:03 | INFO | train_inner | epoch 006: 198 / 3002 loss=2.477, ppl=5.57, wps=5775.7, ups=0.09, wpb=64819, bsz=128, num_updates=15119, lr=9.9887e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=174677
2021-06-20 19:10:14 | INFO | train_inner | epoch 006: 199 / 3002 loss=2.578, ppl=5.97, wps=5748.6, ups=0.09, wpb=64763, bsz=128, num_updates=15120, lr=9.9887e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=174689
2021-06-20 19:10:26 | INFO | train_inner | epoch 006: 200 / 3002 loss=2.427, ppl=5.38, wps=5645.5, ups=0.09, wpb=64770, bsz=128, num_updates=15121, lr=9.9887e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=174700
2021-06-20 19:10:37 | INFO | train_inner | epoch 006: 201 / 3002 loss=2.467, ppl=5.53, wps=5715.1, ups=0.09, wpb=64885, bsz=128, num_updates=15122, lr=9.9887e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=174711
2021-06-20 19:10:48 | INFO | train_inner | epoch 006: 202 / 3002 loss=2.636, ppl=6.22, wps=5795.9, ups=0.09, wpb=64770, bsz=128, num_updates=15123, lr=9.9887e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=174723
2021-06-20 19:11:00 | INFO | train_inner | epoch 006: 203 / 3002 loss=2.499, ppl=5.65, wps=5600.4, ups=0.09, wpb=64777, bsz=128, num_updates=15124, lr=9.9887e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=174734
2021-06-20 19:11:11 | INFO | train_inner | epoch 006: 204 / 3002 loss=2.499, ppl=5.65, wps=5915, ups=0.09, wpb=64863, bsz=128, num_updates=15125, lr=9.9887e-05, gnorm=1.958, loss_scale=8, train_wall=10, gb_free=2.8, wall=174745
2021-06-20 19:11:22 | INFO | train_inner | epoch 006: 205 / 3002 loss=2.53, ppl=5.78, wps=5848.2, ups=0.09, wpb=64825, bsz=128, num_updates=15126, lr=9.9887e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=174756
2021-06-20 19:11:33 | INFO | train_inner | epoch 006: 206 / 3002 loss=2.494, ppl=5.63, wps=5757.1, ups=0.09, wpb=64825, bsz=128, num_updates=15127, lr=9.9887e-05, gnorm=2.054, loss_scale=8, train_wall=11, gb_free=2.8, wall=174767
2021-06-20 19:11:44 | INFO | train_inner | epoch 006: 207 / 3002 loss=2.593, ppl=6.03, wps=5790.7, ups=0.09, wpb=64843, bsz=128, num_updates=15128, lr=9.9887e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=174779
2021-06-20 19:11:56 | INFO | train_inner | epoch 006: 208 / 3002 loss=2.586, ppl=6, wps=5716.3, ups=0.09, wpb=64891, bsz=128, num_updates=15129, lr=9.9887e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=174790
2021-06-20 19:12:07 | INFO | train_inner | epoch 006: 209 / 3002 loss=2.433, ppl=5.4, wps=5799.5, ups=0.09, wpb=64862, bsz=128, num_updates=15130, lr=9.9887e-05, gnorm=1.86, loss_scale=8, train_wall=11, gb_free=2.8, wall=174801
2021-06-20 19:12:18 | INFO | train_inner | epoch 006: 210 / 3002 loss=2.402, ppl=5.29, wps=5829.9, ups=0.09, wpb=64853, bsz=128, num_updates=15131, lr=9.98869e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=174812
2021-06-20 19:12:29 | INFO | train_inner | epoch 006: 211 / 3002 loss=2.439, ppl=5.42, wps=5760.9, ups=0.09, wpb=64792, bsz=128, num_updates=15132, lr=9.98869e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=174824
2021-06-20 19:12:40 | INFO | train_inner | epoch 006: 212 / 3002 loss=2.552, ppl=5.87, wps=5815.6, ups=0.09, wpb=64733, bsz=128, num_updates=15133, lr=9.98869e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=174835
2021-06-20 19:12:52 | INFO | train_inner | epoch 006: 213 / 3002 loss=2.412, ppl=5.32, wps=5815.3, ups=0.09, wpb=64784, bsz=128, num_updates=15134, lr=9.98869e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=174846
2021-06-20 19:13:03 | INFO | train_inner | epoch 006: 214 / 3002 loss=2.636, ppl=6.22, wps=5781.7, ups=0.09, wpb=64713, bsz=128, num_updates=15135, lr=9.98869e-05, gnorm=2.182, loss_scale=8, train_wall=11, gb_free=2.8, wall=174857
2021-06-20 19:13:14 | INFO | train_inner | epoch 006: 215 / 3002 loss=2.507, ppl=5.69, wps=5655.4, ups=0.09, wpb=64835, bsz=128, num_updates=15136, lr=9.98869e-05, gnorm=2.005, loss_scale=8, train_wall=11, gb_free=2.8, wall=174869
2021-06-20 19:13:25 | INFO | train_inner | epoch 006: 216 / 3002 loss=2.444, ppl=5.44, wps=5750.8, ups=0.09, wpb=64801, bsz=128, num_updates=15137, lr=9.98869e-05, gnorm=2.019, loss_scale=8, train_wall=11, gb_free=2.8, wall=174880
2021-06-20 19:13:37 | INFO | train_inner | epoch 006: 217 / 3002 loss=2.388, ppl=5.23, wps=5776, ups=0.09, wpb=64854, bsz=128, num_updates=15138, lr=9.98869e-05, gnorm=1.876, loss_scale=8, train_wall=11, gb_free=2.8, wall=174891
2021-06-20 19:13:48 | INFO | train_inner | epoch 006: 218 / 3002 loss=2.62, ppl=6.15, wps=5781.1, ups=0.09, wpb=64843, bsz=128, num_updates=15139, lr=9.98869e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=174902
2021-06-20 19:13:59 | INFO | train_inner | epoch 006: 219 / 3002 loss=2.402, ppl=5.28, wps=5685.6, ups=0.09, wpb=64748, bsz=128, num_updates=15140, lr=9.98869e-05, gnorm=2.027, loss_scale=8, train_wall=11, gb_free=2.8, wall=174914
2021-06-20 19:14:11 | INFO | train_inner | epoch 006: 220 / 3002 loss=2.582, ppl=5.99, wps=5780.9, ups=0.09, wpb=64790, bsz=128, num_updates=15141, lr=9.98869e-05, gnorm=2, loss_scale=8, train_wall=11, gb_free=2.8, wall=174925
2021-06-20 19:14:21 | INFO | train_inner | epoch 006: 221 / 3002 loss=2.447, ppl=5.45, wps=5897.1, ups=0.09, wpb=64834, bsz=128, num_updates=15142, lr=9.98869e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=174936
2021-06-20 19:14:32 | INFO | train_inner | epoch 006: 222 / 3002 loss=2.466, ppl=5.52, wps=5923.1, ups=0.09, wpb=64832, bsz=128, num_updates=15143, lr=9.98868e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=174947
2021-06-20 19:14:44 | INFO | train_inner | epoch 006: 223 / 3002 loss=2.5, ppl=5.66, wps=5838.1, ups=0.09, wpb=64817, bsz=128, num_updates=15144, lr=9.98868e-05, gnorm=2.019, loss_scale=8, train_wall=11, gb_free=2.8, wall=174958
2021-06-20 19:14:55 | INFO | train_inner | epoch 006: 224 / 3002 loss=2.478, ppl=5.57, wps=5801.9, ups=0.09, wpb=64753, bsz=128, num_updates=15145, lr=9.98868e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=174969
2021-06-20 19:15:06 | INFO | train_inner | epoch 006: 225 / 3002 loss=2.546, ppl=5.84, wps=5764.7, ups=0.09, wpb=64826, bsz=128, num_updates=15146, lr=9.98868e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=174980
2021-06-20 19:15:17 | INFO | train_inner | epoch 006: 226 / 3002 loss=2.479, ppl=5.58, wps=5853.4, ups=0.09, wpb=64782, bsz=128, num_updates=15147, lr=9.98868e-05, gnorm=1.843, loss_scale=8, train_wall=11, gb_free=2.8, wall=174991
2021-06-20 19:15:28 | INFO | train_inner | epoch 006: 227 / 3002 loss=2.526, ppl=5.76, wps=5789.8, ups=0.09, wpb=64763, bsz=128, num_updates=15148, lr=9.98868e-05, gnorm=1.899, loss_scale=8, train_wall=11, gb_free=2.8, wall=175003
2021-06-20 19:15:39 | INFO | train_inner | epoch 006: 228 / 3002 loss=2.434, ppl=5.4, wps=5820.6, ups=0.09, wpb=64853, bsz=128, num_updates=15149, lr=9.98868e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=175014
2021-06-20 19:15:51 | INFO | train_inner | epoch 006: 229 / 3002 loss=2.4, ppl=5.28, wps=5796.9, ups=0.09, wpb=64747, bsz=128, num_updates=15150, lr=9.98868e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=175025
2021-06-20 19:16:02 | INFO | train_inner | epoch 006: 230 / 3002 loss=2.591, ppl=6.02, wps=5886.9, ups=0.09, wpb=64900, bsz=128, num_updates=15151, lr=9.98868e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=175036
2021-06-20 19:16:13 | INFO | train_inner | epoch 006: 231 / 3002 loss=2.418, ppl=5.34, wps=5779.8, ups=0.09, wpb=64847, bsz=128, num_updates=15152, lr=9.98868e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=175047
2021-06-20 19:16:24 | INFO | train_inner | epoch 006: 232 / 3002 loss=2.422, ppl=5.36, wps=5706.4, ups=0.09, wpb=64801, bsz=128, num_updates=15153, lr=9.98868e-05, gnorm=2.073, loss_scale=8, train_wall=11, gb_free=2.8, wall=175058
2021-06-20 19:16:35 | INFO | train_inner | epoch 006: 233 / 3002 loss=2.554, ppl=5.87, wps=5806.4, ups=0.09, wpb=64821, bsz=128, num_updates=15154, lr=9.98868e-05, gnorm=2.173, loss_scale=8, train_wall=11, gb_free=2.8, wall=175070
2021-06-20 19:16:46 | INFO | train_inner | epoch 006: 234 / 3002 loss=2.519, ppl=5.73, wps=5831.7, ups=0.09, wpb=64891, bsz=128, num_updates=15155, lr=9.98868e-05, gnorm=1.999, loss_scale=8, train_wall=11, gb_free=2.8, wall=175081
2021-06-20 19:16:58 | INFO | train_inner | epoch 006: 235 / 3002 loss=2.372, ppl=5.18, wps=5669.8, ups=0.09, wpb=64833, bsz=128, num_updates=15156, lr=9.98867e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=175092
2021-06-20 19:17:09 | INFO | train_inner | epoch 006: 236 / 3002 loss=2.495, ppl=5.64, wps=5767.9, ups=0.09, wpb=64823, bsz=128, num_updates=15157, lr=9.98867e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=175103
2021-06-20 19:17:20 | INFO | train_inner | epoch 006: 237 / 3002 loss=2.498, ppl=5.65, wps=5798.9, ups=0.09, wpb=64859, bsz=128, num_updates=15158, lr=9.98867e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=175115
2021-06-20 19:17:31 | INFO | train_inner | epoch 006: 238 / 3002 loss=2.535, ppl=5.8, wps=5775.1, ups=0.09, wpb=64740, bsz=128, num_updates=15159, lr=9.98867e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=175126
2021-06-20 19:17:43 | INFO | train_inner | epoch 006: 239 / 3002 loss=2.694, ppl=6.47, wps=5771.1, ups=0.09, wpb=64852, bsz=128, num_updates=15160, lr=9.98867e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=175137
2021-06-20 19:17:54 | INFO | train_inner | epoch 006: 240 / 3002 loss=2.44, ppl=5.43, wps=5732, ups=0.09, wpb=64802, bsz=128, num_updates=15161, lr=9.98867e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=175148
2021-06-20 19:18:05 | INFO | train_inner | epoch 006: 241 / 3002 loss=2.458, ppl=5.49, wps=5739.4, ups=0.09, wpb=64838, bsz=128, num_updates=15162, lr=9.98867e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=175160
2021-06-20 19:18:16 | INFO | train_inner | epoch 006: 242 / 3002 loss=2.431, ppl=5.39, wps=5886.7, ups=0.09, wpb=64861, bsz=128, num_updates=15163, lr=9.98867e-05, gnorm=2.425, loss_scale=8, train_wall=11, gb_free=2.8, wall=175171
2021-06-20 19:18:27 | INFO | train_inner | epoch 006: 243 / 3002 loss=2.42, ppl=5.35, wps=5825.4, ups=0.09, wpb=64798, bsz=128, num_updates=15164, lr=9.98867e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=175182
2021-06-20 19:18:39 | INFO | train_inner | epoch 006: 244 / 3002 loss=2.334, ppl=5.04, wps=5681.6, ups=0.09, wpb=64852, bsz=128, num_updates=15165, lr=9.98867e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=175193
2021-06-20 19:18:50 | INFO | train_inner | epoch 006: 245 / 3002 loss=2.413, ppl=5.33, wps=5673.2, ups=0.09, wpb=64741, bsz=128, num_updates=15166, lr=9.98867e-05, gnorm=2.897, loss_scale=8, train_wall=11, gb_free=2.8, wall=175205
2021-06-20 19:19:02 | INFO | train_inner | epoch 006: 246 / 3002 loss=2.513, ppl=5.71, wps=5686.7, ups=0.09, wpb=64813, bsz=128, num_updates=15167, lr=9.98867e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=175216
2021-06-20 19:19:13 | INFO | train_inner | epoch 006: 247 / 3002 loss=2.605, ppl=6.08, wps=5770.8, ups=0.09, wpb=64866, bsz=128, num_updates=15168, lr=9.98866e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=175227
2021-06-20 19:19:24 | INFO | train_inner | epoch 006: 248 / 3002 loss=2.43, ppl=5.39, wps=5886.5, ups=0.09, wpb=64899, bsz=128, num_updates=15169, lr=9.98866e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=175238
2021-06-20 19:19:35 | INFO | train_inner | epoch 006: 249 / 3002 loss=2.572, ppl=5.94, wps=5894.7, ups=0.09, wpb=64852, bsz=128, num_updates=15170, lr=9.98866e-05, gnorm=2.817, loss_scale=8, train_wall=11, gb_free=2.8, wall=175249
2021-06-20 19:19:46 | INFO | train_inner | epoch 006: 250 / 3002 loss=2.38, ppl=5.2, wps=5824.1, ups=0.09, wpb=64899, bsz=128, num_updates=15171, lr=9.98866e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=175260
2021-06-20 19:19:57 | INFO | train_inner | epoch 006: 251 / 3002 loss=2.332, ppl=5.03, wps=5774, ups=0.09, wpb=64896, bsz=128, num_updates=15172, lr=9.98866e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=175272
2021-06-20 19:20:08 | INFO | train_inner | epoch 006: 252 / 3002 loss=2.454, ppl=5.48, wps=5805.2, ups=0.09, wpb=64766, bsz=128, num_updates=15173, lr=9.98866e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=175283
2021-06-20 19:20:20 | INFO | train_inner | epoch 006: 253 / 3002 loss=2.355, ppl=5.12, wps=5792, ups=0.09, wpb=64843, bsz=128, num_updates=15174, lr=9.98866e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=175294
2021-06-20 19:20:31 | INFO | train_inner | epoch 006: 254 / 3002 loss=2.377, ppl=5.2, wps=5847.9, ups=0.09, wpb=64828, bsz=128, num_updates=15175, lr=9.98866e-05, gnorm=2.082, loss_scale=8, train_wall=11, gb_free=2.8, wall=175305
2021-06-20 19:20:42 | INFO | train_inner | epoch 006: 255 / 3002 loss=2.386, ppl=5.23, wps=5802.3, ups=0.09, wpb=64732, bsz=128, num_updates=15176, lr=9.98866e-05, gnorm=2.05, loss_scale=8, train_wall=11, gb_free=2.8, wall=175316
2021-06-20 19:20:53 | INFO | train_inner | epoch 006: 256 / 3002 loss=2.523, ppl=5.75, wps=5813.2, ups=0.09, wpb=64824, bsz=128, num_updates=15177, lr=9.98866e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=175327
2021-06-20 19:21:04 | INFO | train_inner | epoch 006: 257 / 3002 loss=2.386, ppl=5.23, wps=5727.8, ups=0.09, wpb=64858, bsz=128, num_updates=15178, lr=9.98866e-05, gnorm=2.007, loss_scale=8, train_wall=11, gb_free=2.8, wall=175339
2021-06-20 19:21:16 | INFO | train_inner | epoch 006: 258 / 3002 loss=2.528, ppl=5.77, wps=5784.7, ups=0.09, wpb=64865, bsz=128, num_updates=15179, lr=9.98866e-05, gnorm=2.068, loss_scale=8, train_wall=11, gb_free=2.8, wall=175350
2021-06-20 19:21:27 | INFO | train_inner | epoch 006: 259 / 3002 loss=2.48, ppl=5.58, wps=5848.6, ups=0.09, wpb=64807, bsz=128, num_updates=15180, lr=9.98866e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=175361
2021-06-20 19:21:38 | INFO | train_inner | epoch 006: 260 / 3002 loss=2.324, ppl=5.01, wps=5826.9, ups=0.09, wpb=64782, bsz=128, num_updates=15181, lr=9.98865e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=175372
2021-06-20 19:21:49 | INFO | train_inner | epoch 006: 261 / 3002 loss=2.336, ppl=5.05, wps=5824.4, ups=0.09, wpb=64954, bsz=128, num_updates=15182, lr=9.98865e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=175383
2021-06-20 19:22:00 | INFO | train_inner | epoch 006: 262 / 3002 loss=2.658, ppl=6.31, wps=5660, ups=0.09, wpb=64775, bsz=128, num_updates=15183, lr=9.98865e-05, gnorm=2.022, loss_scale=8, train_wall=11, gb_free=2.8, wall=175395
2021-06-20 19:22:12 | INFO | train_inner | epoch 006: 263 / 3002 loss=2.488, ppl=5.61, wps=5733.2, ups=0.09, wpb=64871, bsz=128, num_updates=15184, lr=9.98865e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=175406
2021-06-20 19:22:23 | INFO | train_inner | epoch 006: 264 / 3002 loss=2.3, ppl=4.93, wps=5775.3, ups=0.09, wpb=64842, bsz=128, num_updates=15185, lr=9.98865e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=175417
2021-06-20 19:22:34 | INFO | train_inner | epoch 006: 265 / 3002 loss=2.37, ppl=5.17, wps=5820.5, ups=0.09, wpb=64926, bsz=128, num_updates=15186, lr=9.98865e-05, gnorm=2.028, loss_scale=8, train_wall=11, gb_free=2.8, wall=175428
2021-06-20 19:22:45 | INFO | train_inner | epoch 006: 266 / 3002 loss=2.6, ppl=6.06, wps=5771, ups=0.09, wpb=64855, bsz=128, num_updates=15187, lr=9.98865e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=175440
2021-06-20 19:22:56 | INFO | train_inner | epoch 006: 267 / 3002 loss=2.518, ppl=5.73, wps=5992.7, ups=0.09, wpb=64930, bsz=128, num_updates=15188, lr=9.98865e-05, gnorm=1.929, loss_scale=8, train_wall=10, gb_free=2.8, wall=175451
2021-06-20 19:23:07 | INFO | train_inner | epoch 006: 268 / 3002 loss=2.528, ppl=5.77, wps=5858.1, ups=0.09, wpb=64795, bsz=128, num_updates=15189, lr=9.98865e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=175462
2021-06-20 19:23:18 | INFO | train_inner | epoch 006: 269 / 3002 loss=2.476, ppl=5.57, wps=5875, ups=0.09, wpb=64901, bsz=128, num_updates=15190, lr=9.98865e-05, gnorm=2.04, loss_scale=8, train_wall=11, gb_free=2.8, wall=175473
2021-06-20 19:23:29 | INFO | train_inner | epoch 006: 270 / 3002 loss=2.556, ppl=5.88, wps=5790.4, ups=0.09, wpb=64850, bsz=128, num_updates=15191, lr=9.98865e-05, gnorm=1.974, loss_scale=8, train_wall=11, gb_free=2.8, wall=175484
2021-06-20 19:23:40 | INFO | train_inner | epoch 006: 271 / 3002 loss=2.578, ppl=5.97, wps=5940.5, ups=0.09, wpb=64904, bsz=128, num_updates=15192, lr=9.98865e-05, gnorm=1.977, loss_scale=8, train_wall=10, gb_free=2.8, wall=175495
2021-06-20 19:23:52 | INFO | train_inner | epoch 006: 272 / 3002 loss=2.503, ppl=5.67, wps=5811.8, ups=0.09, wpb=64833, bsz=128, num_updates=15193, lr=9.98864e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=175506
2021-06-20 19:24:03 | INFO | train_inner | epoch 006: 273 / 3002 loss=2.549, ppl=5.85, wps=5694.7, ups=0.09, wpb=64823, bsz=128, num_updates=15194, lr=9.98864e-05, gnorm=2.039, loss_scale=8, train_wall=11, gb_free=2.8, wall=175517
2021-06-20 19:24:14 | INFO | train_inner | epoch 006: 274 / 3002 loss=2.556, ppl=5.88, wps=5822.7, ups=0.09, wpb=64763, bsz=128, num_updates=15195, lr=9.98864e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=175528
2021-06-20 19:24:25 | INFO | train_inner | epoch 006: 275 / 3002 loss=2.575, ppl=5.96, wps=5914.4, ups=0.09, wpb=64824, bsz=128, num_updates=15196, lr=9.98864e-05, gnorm=1.936, loss_scale=8, train_wall=10, gb_free=2.8, wall=175539
2021-06-20 19:24:36 | INFO | train_inner | epoch 006: 276 / 3002 loss=2.5, ppl=5.66, wps=5860.2, ups=0.09, wpb=64768, bsz=128, num_updates=15197, lr=9.98864e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=175550
2021-06-20 19:24:47 | INFO | train_inner | epoch 006: 277 / 3002 loss=2.484, ppl=5.59, wps=5861.1, ups=0.09, wpb=64886, bsz=128, num_updates=15198, lr=9.98864e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=175562
2021-06-20 19:24:58 | INFO | train_inner | epoch 006: 278 / 3002 loss=2.426, ppl=5.38, wps=5784.6, ups=0.09, wpb=64964, bsz=128, num_updates=15199, lr=9.98864e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=175573
2021-06-20 19:25:10 | INFO | train_inner | epoch 006: 279 / 3002 loss=2.466, ppl=5.52, wps=5714.1, ups=0.09, wpb=64778, bsz=128, num_updates=15200, lr=9.98864e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=175584
2021-06-20 19:25:21 | INFO | train_inner | epoch 006: 280 / 3002 loss=2.481, ppl=5.58, wps=5692.7, ups=0.09, wpb=64872, bsz=128, num_updates=15201, lr=9.98864e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=175595
2021-06-20 19:25:32 | INFO | train_inner | epoch 006: 281 / 3002 loss=2.399, ppl=5.27, wps=5799.4, ups=0.09, wpb=64860, bsz=128, num_updates=15202, lr=9.98864e-05, gnorm=2.028, loss_scale=16, train_wall=11, gb_free=2.8, wall=175607
2021-06-20 19:25:44 | INFO | train_inner | epoch 006: 282 / 3002 loss=2.4, ppl=5.28, wps=5751.2, ups=0.09, wpb=64818, bsz=128, num_updates=15203, lr=9.98864e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=175618
2021-06-20 19:25:55 | INFO | train_inner | epoch 006: 283 / 3002 loss=2.485, ppl=5.6, wps=5799, ups=0.09, wpb=64796, bsz=128, num_updates=15204, lr=9.98864e-05, gnorm=2.014, loss_scale=16, train_wall=11, gb_free=2.8, wall=175629
2021-06-20 19:26:06 | INFO | train_inner | epoch 006: 284 / 3002 loss=2.371, ppl=5.17, wps=5735.8, ups=0.09, wpb=64783, bsz=128, num_updates=15205, lr=9.98864e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=175640
2021-06-20 19:26:17 | INFO | train_inner | epoch 006: 285 / 3002 loss=2.487, ppl=5.61, wps=5756.7, ups=0.09, wpb=64788, bsz=128, num_updates=15206, lr=9.98863e-05, gnorm=1.971, loss_scale=16, train_wall=11, gb_free=2.8, wall=175652
2021-06-20 19:26:29 | INFO | train_inner | epoch 006: 286 / 3002 loss=2.669, ppl=6.36, wps=5692.7, ups=0.09, wpb=64789, bsz=128, num_updates=15207, lr=9.98863e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=175663
2021-06-20 19:26:40 | INFO | train_inner | epoch 006: 287 / 3002 loss=2.419, ppl=5.35, wps=5866.8, ups=0.09, wpb=64772, bsz=128, num_updates=15208, lr=9.98863e-05, gnorm=1.85, loss_scale=16, train_wall=11, gb_free=2.8, wall=175674
2021-06-20 19:26:51 | INFO | train_inner | epoch 006: 288 / 3002 loss=2.514, ppl=5.71, wps=5712.7, ups=0.09, wpb=64846, bsz=128, num_updates=15209, lr=9.98863e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=175685
2021-06-20 19:27:02 | INFO | train_inner | epoch 006: 289 / 3002 loss=2.509, ppl=5.69, wps=5930.1, ups=0.09, wpb=64797, bsz=128, num_updates=15210, lr=9.98863e-05, gnorm=1.964, loss_scale=16, train_wall=10, gb_free=2.8, wall=175696
2021-06-20 19:27:13 | INFO | train_inner | epoch 006: 290 / 3002 loss=2.426, ppl=5.37, wps=5807.1, ups=0.09, wpb=64946, bsz=128, num_updates=15211, lr=9.98863e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=175708
2021-06-20 19:27:24 | INFO | train_inner | epoch 006: 291 / 3002 loss=2.436, ppl=5.41, wps=5961.6, ups=0.09, wpb=64948, bsz=128, num_updates=15212, lr=9.98863e-05, gnorm=1.908, loss_scale=16, train_wall=10, gb_free=2.8, wall=175718
2021-06-20 19:27:35 | INFO | train_inner | epoch 006: 292 / 3002 loss=2.417, ppl=5.34, wps=5789.9, ups=0.09, wpb=64804, bsz=128, num_updates=15213, lr=9.98863e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=175730
2021-06-20 19:27:46 | INFO | train_inner | epoch 006: 293 / 3002 loss=2.581, ppl=5.98, wps=5910.1, ups=0.09, wpb=64772, bsz=128, num_updates=15214, lr=9.98863e-05, gnorm=1.966, loss_scale=16, train_wall=10, gb_free=2.8, wall=175741
2021-06-20 19:27:57 | INFO | train_inner | epoch 006: 294 / 3002 loss=2.544, ppl=5.83, wps=5837, ups=0.09, wpb=64830, bsz=128, num_updates=15215, lr=9.98863e-05, gnorm=1.961, loss_scale=16, train_wall=11, gb_free=2.8, wall=175752
2021-06-20 19:28:08 | INFO | train_inner | epoch 006: 295 / 3002 loss=2.617, ppl=6.13, wps=5832.3, ups=0.09, wpb=64797, bsz=128, num_updates=15216, lr=9.98863e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=175763
2021-06-20 19:28:20 | INFO | train_inner | epoch 006: 296 / 3002 loss=2.446, ppl=5.45, wps=5829.8, ups=0.09, wpb=64792, bsz=128, num_updates=15217, lr=9.98863e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=175774
2021-06-20 19:28:31 | INFO | train_inner | epoch 006: 297 / 3002 loss=2.379, ppl=5.2, wps=5849.7, ups=0.09, wpb=64897, bsz=128, num_updates=15218, lr=9.98862e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=175785
2021-06-20 19:28:42 | INFO | train_inner | epoch 006: 298 / 3002 loss=2.459, ppl=5.5, wps=5940.5, ups=0.09, wpb=64816, bsz=128, num_updates=15219, lr=9.98862e-05, gnorm=1.904, loss_scale=16, train_wall=10, gb_free=2.8, wall=175796
2021-06-20 19:28:53 | INFO | train_inner | epoch 006: 299 / 3002 loss=2.43, ppl=5.39, wps=5840.6, ups=0.09, wpb=64858, bsz=128, num_updates=15220, lr=9.98862e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=175807
2021-06-20 19:29:04 | INFO | train_inner | epoch 006: 300 / 3002 loss=2.581, ppl=5.98, wps=5713.6, ups=0.09, wpb=64828, bsz=128, num_updates=15221, lr=9.98862e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=175818
2021-06-20 19:29:15 | INFO | train_inner | epoch 006: 301 / 3002 loss=2.503, ppl=5.67, wps=5797.4, ups=0.09, wpb=64771, bsz=128, num_updates=15222, lr=9.98862e-05, gnorm=1.846, loss_scale=16, train_wall=11, gb_free=2.8, wall=175830
2021-06-20 19:29:26 | INFO | train_inner | epoch 006: 302 / 3002 loss=2.486, ppl=5.6, wps=5745.1, ups=0.09, wpb=64833, bsz=128, num_updates=15223, lr=9.98862e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=175841
2021-06-20 19:29:38 | INFO | train_inner | epoch 006: 303 / 3002 loss=2.352, ppl=5.11, wps=5862.9, ups=0.09, wpb=64886, bsz=128, num_updates=15224, lr=9.98862e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=175852
2021-06-20 19:29:49 | INFO | train_inner | epoch 006: 304 / 3002 loss=2.489, ppl=5.61, wps=5891, ups=0.09, wpb=64862, bsz=128, num_updates=15225, lr=9.98862e-05, gnorm=2.274, loss_scale=16, train_wall=11, gb_free=2.8, wall=175863
2021-06-20 19:30:00 | INFO | train_inner | epoch 006: 305 / 3002 loss=2.489, ppl=5.61, wps=5795.9, ups=0.09, wpb=64795, bsz=128, num_updates=15226, lr=9.98862e-05, gnorm=1.967, loss_scale=16, train_wall=11, gb_free=2.8, wall=175874
2021-06-20 19:30:11 | INFO | train_inner | epoch 006: 306 / 3002 loss=2.557, ppl=5.88, wps=5726, ups=0.09, wpb=64824, bsz=128, num_updates=15227, lr=9.98862e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=175885
2021-06-20 19:30:22 | INFO | train_inner | epoch 006: 307 / 3002 loss=2.426, ppl=5.37, wps=5739.5, ups=0.09, wpb=64755, bsz=128, num_updates=15228, lr=9.98862e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=175897
2021-06-20 19:30:33 | INFO | train_inner | epoch 006: 308 / 3002 loss=2.371, ppl=5.17, wps=5888.3, ups=0.09, wpb=64824, bsz=128, num_updates=15229, lr=9.98862e-05, gnorm=1.911, loss_scale=16, train_wall=10, gb_free=2.8, wall=175908
2021-06-20 19:30:44 | INFO | train_inner | epoch 006: 309 / 3002 loss=2.612, ppl=6.11, wps=5836.6, ups=0.09, wpb=64771, bsz=128, num_updates=15230, lr=9.98862e-05, gnorm=1.828, loss_scale=16, train_wall=11, gb_free=2.8, wall=175919
2021-06-20 19:30:56 | INFO | train_inner | epoch 006: 310 / 3002 loss=2.515, ppl=5.71, wps=5878, ups=0.09, wpb=64895, bsz=128, num_updates=15231, lr=9.98861e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=175930
2021-06-20 19:31:06 | INFO | train_inner | epoch 006: 311 / 3002 loss=2.335, ppl=5.05, wps=5903.5, ups=0.09, wpb=64799, bsz=128, num_updates=15232, lr=9.98861e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=175941
2021-06-20 19:31:18 | INFO | train_inner | epoch 006: 312 / 3002 loss=2.49, ppl=5.62, wps=5769.9, ups=0.09, wpb=64794, bsz=128, num_updates=15233, lr=9.98861e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=175952
2021-06-20 19:31:29 | INFO | train_inner | epoch 006: 313 / 3002 loss=2.296, ppl=4.91, wps=5850.7, ups=0.09, wpb=64822, bsz=128, num_updates=15234, lr=9.98861e-05, gnorm=1.995, loss_scale=16, train_wall=11, gb_free=2.8, wall=175963
2021-06-20 19:31:40 | INFO | train_inner | epoch 006: 314 / 3002 loss=2.402, ppl=5.28, wps=5862.7, ups=0.09, wpb=64828, bsz=128, num_updates=15235, lr=9.98861e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=175974
2021-06-20 19:31:51 | INFO | train_inner | epoch 006: 315 / 3002 loss=2.568, ppl=5.93, wps=5842.7, ups=0.09, wpb=64714, bsz=128, num_updates=15236, lr=9.98861e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=175985
2021-06-20 19:32:02 | INFO | train_inner | epoch 006: 316 / 3002 loss=2.517, ppl=5.73, wps=5846.7, ups=0.09, wpb=64820, bsz=128, num_updates=15237, lr=9.98861e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=175996
2021-06-20 19:32:13 | INFO | train_inner | epoch 006: 317 / 3002 loss=2.474, ppl=5.55, wps=5829.8, ups=0.09, wpb=64837, bsz=128, num_updates=15238, lr=9.98861e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=176007
2021-06-20 19:32:24 | INFO | train_inner | epoch 006: 318 / 3002 loss=2.471, ppl=5.54, wps=5765.3, ups=0.09, wpb=64786, bsz=128, num_updates=15239, lr=9.98861e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=176019
2021-06-20 19:32:36 | INFO | train_inner | epoch 006: 319 / 3002 loss=2.464, ppl=5.52, wps=5740.7, ups=0.09, wpb=64876, bsz=128, num_updates=15240, lr=9.98861e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=176030
2021-06-20 19:32:47 | INFO | train_inner | epoch 006: 320 / 3002 loss=2.364, ppl=5.15, wps=5793, ups=0.09, wpb=64855, bsz=128, num_updates=15241, lr=9.98861e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=176041
2021-06-20 19:32:58 | INFO | train_inner | epoch 006: 321 / 3002 loss=2.525, ppl=5.76, wps=5804.8, ups=0.09, wpb=64822, bsz=128, num_updates=15242, lr=9.98861e-05, gnorm=2.058, loss_scale=16, train_wall=11, gb_free=2.8, wall=176052
2021-06-20 19:33:09 | INFO | train_inner | epoch 006: 322 / 3002 loss=2.487, ppl=5.61, wps=5780.5, ups=0.09, wpb=64803, bsz=128, num_updates=15243, lr=9.9886e-05, gnorm=2.009, loss_scale=16, train_wall=11, gb_free=2.8, wall=176064
2021-06-20 19:33:20 | INFO | train_inner | epoch 006: 323 / 3002 loss=2.532, ppl=5.79, wps=5818.5, ups=0.09, wpb=64903, bsz=128, num_updates=15244, lr=9.9886e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=176075
2021-06-20 19:33:32 | INFO | train_inner | epoch 006: 324 / 3002 loss=2.502, ppl=5.66, wps=5747.5, ups=0.09, wpb=64783, bsz=128, num_updates=15245, lr=9.9886e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=176086
2021-06-20 19:33:43 | INFO | train_inner | epoch 006: 325 / 3002 loss=2.454, ppl=5.48, wps=5971, ups=0.09, wpb=64819, bsz=128, num_updates=15246, lr=9.9886e-05, gnorm=1.983, loss_scale=16, train_wall=10, gb_free=2.8, wall=176097
2021-06-20 19:33:54 | INFO | train_inner | epoch 006: 326 / 3002 loss=2.406, ppl=5.3, wps=5732, ups=0.09, wpb=64870, bsz=128, num_updates=15247, lr=9.9886e-05, gnorm=2.057, loss_scale=16, train_wall=11, gb_free=2.8, wall=176108
2021-06-20 19:34:05 | INFO | train_inner | epoch 006: 327 / 3002 loss=2.47, ppl=5.54, wps=5813.9, ups=0.09, wpb=64825, bsz=128, num_updates=15248, lr=9.9886e-05, gnorm=2.002, loss_scale=16, train_wall=11, gb_free=2.8, wall=176119
2021-06-20 19:34:16 | INFO | train_inner | epoch 006: 328 / 3002 loss=2.453, ppl=5.48, wps=5638.3, ups=0.09, wpb=64820, bsz=128, num_updates=15249, lr=9.9886e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=176131
2021-06-20 19:34:27 | INFO | train_inner | epoch 006: 329 / 3002 loss=2.709, ppl=6.54, wps=5892.5, ups=0.09, wpb=64812, bsz=128, num_updates=15250, lr=9.9886e-05, gnorm=2.029, loss_scale=16, train_wall=11, gb_free=2.8, wall=176142
2021-06-20 19:34:39 | INFO | train_inner | epoch 006: 330 / 3002 loss=2.518, ppl=5.73, wps=5706.7, ups=0.09, wpb=64801, bsz=128, num_updates=15251, lr=9.9886e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=176153
2021-06-20 19:34:50 | INFO | train_inner | epoch 006: 331 / 3002 loss=2.469, ppl=5.54, wps=5782.7, ups=0.09, wpb=64833, bsz=128, num_updates=15252, lr=9.9886e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=176164
2021-06-20 19:35:01 | INFO | train_inner | epoch 006: 332 / 3002 loss=2.561, ppl=5.9, wps=5893.6, ups=0.09, wpb=64907, bsz=128, num_updates=15253, lr=9.9886e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=176175
2021-06-20 19:35:12 | INFO | train_inner | epoch 006: 333 / 3002 loss=2.657, ppl=6.31, wps=5878.9, ups=0.09, wpb=64801, bsz=128, num_updates=15254, lr=9.9886e-05, gnorm=1.954, loss_scale=16, train_wall=11, gb_free=2.8, wall=176186
2021-06-20 19:35:23 | INFO | train_inner | epoch 006: 334 / 3002 loss=2.476, ppl=5.56, wps=5926.1, ups=0.09, wpb=64927, bsz=128, num_updates=15255, lr=9.9886e-05, gnorm=1.992, loss_scale=16, train_wall=10, gb_free=2.8, wall=176197
2021-06-20 19:35:34 | INFO | train_inner | epoch 006: 335 / 3002 loss=2.508, ppl=5.69, wps=5827.4, ups=0.09, wpb=64890, bsz=128, num_updates=15256, lr=9.98859e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=176209
2021-06-20 19:35:45 | INFO | train_inner | epoch 006: 336 / 3002 loss=2.353, ppl=5.11, wps=5863.8, ups=0.09, wpb=64882, bsz=128, num_updates=15257, lr=9.98859e-05, gnorm=1.956, loss_scale=16, train_wall=11, gb_free=2.8, wall=176220
2021-06-20 19:35:56 | INFO | train_inner | epoch 006: 337 / 3002 loss=2.244, ppl=4.74, wps=5865.2, ups=0.09, wpb=64856, bsz=128, num_updates=15258, lr=9.98859e-05, gnorm=1.915, loss_scale=16, train_wall=11, gb_free=2.8, wall=176231
2021-06-20 19:36:07 | INFO | train_inner | epoch 006: 338 / 3002 loss=2.643, ppl=6.25, wps=5792.3, ups=0.09, wpb=64811, bsz=128, num_updates=15259, lr=9.98859e-05, gnorm=2.028, loss_scale=16, train_wall=11, gb_free=2.8, wall=176242
2021-06-20 19:36:19 | INFO | train_inner | epoch 006: 339 / 3002 loss=2.265, ppl=4.81, wps=5793, ups=0.09, wpb=64854, bsz=128, num_updates=15260, lr=9.98859e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=176253
2021-06-20 19:36:30 | INFO | train_inner | epoch 006: 340 / 3002 loss=2.648, ppl=6.27, wps=5817.6, ups=0.09, wpb=64834, bsz=128, num_updates=15261, lr=9.98859e-05, gnorm=1.979, loss_scale=16, train_wall=11, gb_free=2.8, wall=176264
2021-06-20 19:36:41 | INFO | train_inner | epoch 006: 341 / 3002 loss=2.558, ppl=5.89, wps=5865.1, ups=0.09, wpb=64876, bsz=128, num_updates=15262, lr=9.98859e-05, gnorm=2.481, loss_scale=16, train_wall=11, gb_free=2.8, wall=176275
2021-06-20 19:36:52 | INFO | train_inner | epoch 006: 342 / 3002 loss=2.465, ppl=5.52, wps=5702.3, ups=0.09, wpb=64869, bsz=128, num_updates=15263, lr=9.98859e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=176287
2021-06-20 19:37:03 | INFO | train_inner | epoch 006: 343 / 3002 loss=2.538, ppl=5.81, wps=5890.6, ups=0.09, wpb=64928, bsz=128, num_updates=15264, lr=9.98859e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=176298
2021-06-20 19:37:14 | INFO | train_inner | epoch 006: 344 / 3002 loss=2.503, ppl=5.67, wps=5824, ups=0.09, wpb=64775, bsz=128, num_updates=15265, lr=9.98859e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=176309
2021-06-20 19:37:26 | INFO | train_inner | epoch 006: 345 / 3002 loss=2.569, ppl=5.94, wps=5825.6, ups=0.09, wpb=64800, bsz=128, num_updates=15266, lr=9.98859e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=176320
2021-06-20 19:37:37 | INFO | train_inner | epoch 006: 346 / 3002 loss=2.381, ppl=5.21, wps=5717.4, ups=0.09, wpb=64819, bsz=128, num_updates=15267, lr=9.98859e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=176331
2021-06-20 19:37:48 | INFO | train_inner | epoch 006: 347 / 3002 loss=2.489, ppl=5.61, wps=5912, ups=0.09, wpb=64865, bsz=128, num_updates=15268, lr=9.98858e-05, gnorm=1.948, loss_scale=16, train_wall=10, gb_free=2.8, wall=176342
2021-06-20 19:37:59 | INFO | train_inner | epoch 006: 348 / 3002 loss=2.511, ppl=5.7, wps=5783.7, ups=0.09, wpb=64801, bsz=128, num_updates=15269, lr=9.98858e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=176353
2021-06-20 19:38:10 | INFO | train_inner | epoch 006: 349 / 3002 loss=2.491, ppl=5.62, wps=5818.5, ups=0.09, wpb=64876, bsz=128, num_updates=15270, lr=9.98858e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=176365
2021-06-20 19:38:21 | INFO | train_inner | epoch 006: 350 / 3002 loss=2.434, ppl=5.4, wps=5764.5, ups=0.09, wpb=64714, bsz=128, num_updates=15271, lr=9.98858e-05, gnorm=2.102, loss_scale=16, train_wall=11, gb_free=2.8, wall=176376
2021-06-20 19:38:33 | INFO | train_inner | epoch 006: 351 / 3002 loss=2.303, ppl=4.93, wps=5733.6, ups=0.09, wpb=64796, bsz=128, num_updates=15272, lr=9.98858e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=176387
2021-06-20 19:38:44 | INFO | train_inner | epoch 006: 352 / 3002 loss=2.391, ppl=5.25, wps=5768.4, ups=0.09, wpb=64816, bsz=128, num_updates=15273, lr=9.98858e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=176398
2021-06-20 19:38:55 | INFO | train_inner | epoch 006: 353 / 3002 loss=2.483, ppl=5.59, wps=5889.2, ups=0.09, wpb=64856, bsz=128, num_updates=15274, lr=9.98858e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=176409
2021-06-20 19:39:06 | INFO | train_inner | epoch 006: 354 / 3002 loss=2.335, ppl=5.05, wps=5763.7, ups=0.09, wpb=64849, bsz=128, num_updates=15275, lr=9.98858e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=176421
2021-06-20 19:39:17 | INFO | train_inner | epoch 006: 355 / 3002 loss=2.447, ppl=5.45, wps=5776.3, ups=0.09, wpb=64764, bsz=128, num_updates=15276, lr=9.98858e-05, gnorm=1.86, loss_scale=16, train_wall=11, gb_free=2.8, wall=176432
2021-06-20 19:39:29 | INFO | train_inner | epoch 006: 356 / 3002 loss=2.565, ppl=5.92, wps=5815.9, ups=0.09, wpb=64801, bsz=128, num_updates=15277, lr=9.98858e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=176443
2021-06-20 19:39:40 | INFO | train_inner | epoch 006: 357 / 3002 loss=2.457, ppl=5.49, wps=5762, ups=0.09, wpb=64843, bsz=128, num_updates=15278, lr=9.98858e-05, gnorm=1.888, loss_scale=16, train_wall=11, gb_free=2.8, wall=176454
2021-06-20 19:39:51 | INFO | train_inner | epoch 006: 358 / 3002 loss=2.614, ppl=6.12, wps=5774.1, ups=0.09, wpb=64794, bsz=128, num_updates=15279, lr=9.98858e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=176465
2021-06-20 19:40:02 | INFO | train_inner | epoch 006: 359 / 3002 loss=2.443, ppl=5.44, wps=5803.9, ups=0.09, wpb=64877, bsz=128, num_updates=15280, lr=9.98858e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=176477
2021-06-20 19:40:14 | INFO | train_inner | epoch 006: 360 / 3002 loss=2.332, ppl=5.04, wps=5725.4, ups=0.09, wpb=64850, bsz=128, num_updates=15281, lr=9.98857e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=176488
2021-06-20 19:40:25 | INFO | train_inner | epoch 006: 361 / 3002 loss=2.517, ppl=5.72, wps=5827, ups=0.09, wpb=64856, bsz=128, num_updates=15282, lr=9.98857e-05, gnorm=3.886, loss_scale=16, train_wall=11, gb_free=2.8, wall=176499
2021-06-20 19:40:36 | INFO | train_inner | epoch 006: 362 / 3002 loss=2.459, ppl=5.5, wps=5699.3, ups=0.09, wpb=64781, bsz=128, num_updates=15283, lr=9.98857e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=176510
2021-06-20 19:40:47 | INFO | train_inner | epoch 006: 363 / 3002 loss=2.49, ppl=5.62, wps=5700.3, ups=0.09, wpb=64848, bsz=128, num_updates=15284, lr=9.98857e-05, gnorm=1.858, loss_scale=16, train_wall=11, gb_free=2.8, wall=176522
2021-06-20 19:40:59 | INFO | train_inner | epoch 006: 364 / 3002 loss=2.637, ppl=6.22, wps=5794.7, ups=0.09, wpb=64749, bsz=128, num_updates=15285, lr=9.98857e-05, gnorm=1.929, loss_scale=16, train_wall=11, gb_free=2.8, wall=176533
2021-06-20 19:41:10 | INFO | train_inner | epoch 006: 365 / 3002 loss=2.456, ppl=5.49, wps=5791.7, ups=0.09, wpb=64900, bsz=128, num_updates=15286, lr=9.98857e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=176544
2021-06-20 19:41:21 | INFO | train_inner | epoch 006: 366 / 3002 loss=2.52, ppl=5.74, wps=5856.7, ups=0.09, wpb=64879, bsz=128, num_updates=15287, lr=9.98857e-05, gnorm=1.951, loss_scale=16, train_wall=11, gb_free=2.8, wall=176555
2021-06-20 19:41:32 | INFO | train_inner | epoch 006: 367 / 3002 loss=2.511, ppl=5.7, wps=5787.5, ups=0.09, wpb=64870, bsz=128, num_updates=15288, lr=9.98857e-05, gnorm=1.888, loss_scale=16, train_wall=11, gb_free=2.8, wall=176566
2021-06-20 19:41:43 | INFO | train_inner | epoch 006: 368 / 3002 loss=2.399, ppl=5.28, wps=5852.1, ups=0.09, wpb=64903, bsz=128, num_updates=15289, lr=9.98857e-05, gnorm=1.88, loss_scale=16, train_wall=11, gb_free=2.8, wall=176578
2021-06-20 19:41:54 | INFO | train_inner | epoch 006: 369 / 3002 loss=2.482, ppl=5.59, wps=5770, ups=0.09, wpb=64834, bsz=128, num_updates=15290, lr=9.98857e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=176589
2021-06-20 19:42:06 | INFO | train_inner | epoch 006: 370 / 3002 loss=2.554, ppl=5.87, wps=5854.5, ups=0.09, wpb=64827, bsz=128, num_updates=15291, lr=9.98857e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=176600
2021-06-20 19:42:17 | INFO | train_inner | epoch 006: 371 / 3002 loss=2.555, ppl=5.88, wps=5817.7, ups=0.09, wpb=64732, bsz=128, num_updates=15292, lr=9.98857e-05, gnorm=2.049, loss_scale=16, train_wall=11, gb_free=2.8, wall=176611
2021-06-20 19:42:28 | INFO | train_inner | epoch 006: 372 / 3002 loss=2.398, ppl=5.27, wps=5844, ups=0.09, wpb=64797, bsz=128, num_updates=15293, lr=9.98856e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=176622
2021-06-20 19:42:39 | INFO | train_inner | epoch 006: 373 / 3002 loss=2.574, ppl=5.96, wps=5728.3, ups=0.09, wpb=64830, bsz=128, num_updates=15294, lr=9.98856e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=176633
2021-06-20 19:42:50 | INFO | train_inner | epoch 006: 374 / 3002 loss=2.441, ppl=5.43, wps=5852.9, ups=0.09, wpb=64862, bsz=128, num_updates=15295, lr=9.98856e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=176644
2021-06-20 19:43:01 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 19:43:12 | INFO | train_inner | epoch 006: 376 / 3002 loss=2.52, ppl=5.74, wps=2921.6, ups=0.05, wpb=64788, bsz=128, num_updates=15296, lr=9.98856e-05, gnorm=2.037, loss_scale=8, train_wall=21, gb_free=2.8, wall=176667
2021-06-20 19:43:23 | INFO | train_inner | epoch 006: 377 / 3002 loss=2.471, ppl=5.54, wps=5839.9, ups=0.09, wpb=64871, bsz=128, num_updates=15297, lr=9.98856e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=176678
2021-06-20 19:43:34 | INFO | train_inner | epoch 006: 378 / 3002 loss=2.449, ppl=5.46, wps=5899, ups=0.09, wpb=64881, bsz=128, num_updates=15298, lr=9.98856e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=176689
2021-06-20 19:43:45 | INFO | train_inner | epoch 006: 379 / 3002 loss=2.389, ppl=5.24, wps=5882.3, ups=0.09, wpb=64834, bsz=128, num_updates=15299, lr=9.98856e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=176700
2021-06-20 19:43:56 | INFO | train_inner | epoch 006: 380 / 3002 loss=2.513, ppl=5.71, wps=5900, ups=0.09, wpb=64802, bsz=128, num_updates=15300, lr=9.98856e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=176711
2021-06-20 19:44:07 | INFO | train_inner | epoch 006: 381 / 3002 loss=2.47, ppl=5.54, wps=5878.1, ups=0.09, wpb=64869, bsz=128, num_updates=15301, lr=9.98856e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=176722
2021-06-20 19:44:19 | INFO | train_inner | epoch 006: 382 / 3002 loss=2.411, ppl=5.32, wps=5799.6, ups=0.09, wpb=64852, bsz=128, num_updates=15302, lr=9.98856e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=176733
2021-06-20 19:44:30 | INFO | train_inner | epoch 006: 383 / 3002 loss=2.562, ppl=5.9, wps=5733.6, ups=0.09, wpb=64883, bsz=128, num_updates=15303, lr=9.98856e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=176744
2021-06-20 19:44:41 | INFO | train_inner | epoch 006: 384 / 3002 loss=2.385, ppl=5.22, wps=5772.4, ups=0.09, wpb=64745, bsz=128, num_updates=15304, lr=9.98856e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=176756
2021-06-20 19:44:52 | INFO | train_inner | epoch 006: 385 / 3002 loss=2.489, ppl=5.61, wps=5768.1, ups=0.09, wpb=64812, bsz=128, num_updates=15305, lr=9.98856e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=176767
2021-06-20 19:45:04 | INFO | train_inner | epoch 006: 386 / 3002 loss=2.352, ppl=5.1, wps=5662.2, ups=0.09, wpb=64865, bsz=128, num_updates=15306, lr=9.98855e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=176778
2021-06-20 19:45:15 | INFO | train_inner | epoch 006: 387 / 3002 loss=2.426, ppl=5.37, wps=5801.2, ups=0.09, wpb=64832, bsz=128, num_updates=15307, lr=9.98855e-05, gnorm=1.883, loss_scale=8, train_wall=11, gb_free=2.8, wall=176789
2021-06-20 19:45:26 | INFO | train_inner | epoch 006: 388 / 3002 loss=2.444, ppl=5.44, wps=5741.1, ups=0.09, wpb=64817, bsz=128, num_updates=15308, lr=9.98855e-05, gnorm=2.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=176801
2021-06-20 19:45:37 | INFO | train_inner | epoch 006: 389 / 3002 loss=2.423, ppl=5.36, wps=5844.8, ups=0.09, wpb=64830, bsz=128, num_updates=15309, lr=9.98855e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=176812
2021-06-20 19:45:49 | INFO | train_inner | epoch 006: 390 / 3002 loss=2.496, ppl=5.64, wps=5787.4, ups=0.09, wpb=64789, bsz=128, num_updates=15310, lr=9.98855e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=176823
2021-06-20 19:46:00 | INFO | train_inner | epoch 006: 391 / 3002 loss=2.548, ppl=5.85, wps=5932, ups=0.09, wpb=64773, bsz=128, num_updates=15311, lr=9.98855e-05, gnorm=1.947, loss_scale=8, train_wall=10, gb_free=2.8, wall=176834
2021-06-20 19:46:10 | INFO | train_inner | epoch 006: 392 / 3002 loss=2.454, ppl=5.48, wps=6008.8, ups=0.09, wpb=64809, bsz=128, num_updates=15312, lr=9.98855e-05, gnorm=1.905, loss_scale=8, train_wall=10, gb_free=2.8, wall=176845
2021-06-20 19:46:21 | INFO | train_inner | epoch 006: 393 / 3002 loss=2.59, ppl=6.02, wps=5838.6, ups=0.09, wpb=64802, bsz=128, num_updates=15313, lr=9.98855e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=176856
2021-06-20 19:46:32 | INFO | train_inner | epoch 006: 394 / 3002 loss=2.375, ppl=5.19, wps=5977.7, ups=0.09, wpb=64813, bsz=128, num_updates=15314, lr=9.98855e-05, gnorm=2.019, loss_scale=8, train_wall=10, gb_free=2.8, wall=176867
2021-06-20 19:46:43 | INFO | train_inner | epoch 006: 395 / 3002 loss=2.632, ppl=6.2, wps=5944.4, ups=0.09, wpb=64836, bsz=128, num_updates=15315, lr=9.98855e-05, gnorm=1.993, loss_scale=8, train_wall=10, gb_free=2.8, wall=176878
2021-06-20 19:46:54 | INFO | train_inner | epoch 006: 396 / 3002 loss=2.504, ppl=5.67, wps=5964.7, ups=0.09, wpb=64840, bsz=128, num_updates=15316, lr=9.98855e-05, gnorm=1.903, loss_scale=8, train_wall=10, gb_free=2.8, wall=176888
2021-06-20 19:47:05 | INFO | train_inner | epoch 006: 397 / 3002 loss=2.606, ppl=6.09, wps=5940.4, ups=0.09, wpb=64759, bsz=128, num_updates=15317, lr=9.98855e-05, gnorm=2.06, loss_scale=8, train_wall=10, gb_free=2.8, wall=176899
2021-06-20 19:47:16 | INFO | train_inner | epoch 006: 398 / 3002 loss=2.491, ppl=5.62, wps=5844.1, ups=0.09, wpb=64811, bsz=128, num_updates=15318, lr=9.98854e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=176910
2021-06-20 19:47:27 | INFO | train_inner | epoch 006: 399 / 3002 loss=2.425, ppl=5.37, wps=5840, ups=0.09, wpb=64836, bsz=128, num_updates=15319, lr=9.98854e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=176921
2021-06-20 19:47:38 | INFO | train_inner | epoch 006: 400 / 3002 loss=2.541, ppl=5.82, wps=5770.9, ups=0.09, wpb=64767, bsz=128, num_updates=15320, lr=9.98854e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=176933
2021-06-20 19:47:50 | INFO | train_inner | epoch 006: 401 / 3002 loss=2.487, ppl=5.61, wps=5779.2, ups=0.09, wpb=64805, bsz=128, num_updates=15321, lr=9.98854e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=176944
2021-06-20 19:48:01 | INFO | train_inner | epoch 006: 402 / 3002 loss=2.491, ppl=5.62, wps=5839.8, ups=0.09, wpb=64807, bsz=128, num_updates=15322, lr=9.98854e-05, gnorm=2.016, loss_scale=8, train_wall=11, gb_free=2.8, wall=176955
2021-06-20 19:48:12 | INFO | train_inner | epoch 006: 403 / 3002 loss=2.43, ppl=5.39, wps=5931.8, ups=0.09, wpb=64800, bsz=128, num_updates=15323, lr=9.98854e-05, gnorm=1.885, loss_scale=8, train_wall=10, gb_free=2.8, wall=176966
2021-06-20 19:48:23 | INFO | train_inner | epoch 006: 404 / 3002 loss=2.51, ppl=5.7, wps=5833.4, ups=0.09, wpb=64798, bsz=128, num_updates=15324, lr=9.98854e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=176977
2021-06-20 19:48:34 | INFO | train_inner | epoch 006: 405 / 3002 loss=2.504, ppl=5.67, wps=5801.2, ups=0.09, wpb=64811, bsz=128, num_updates=15325, lr=9.98854e-05, gnorm=2.042, loss_scale=8, train_wall=11, gb_free=2.8, wall=176988
2021-06-20 19:48:45 | INFO | train_inner | epoch 006: 406 / 3002 loss=2.611, ppl=6.11, wps=5964.1, ups=0.09, wpb=64822, bsz=128, num_updates=15326, lr=9.98854e-05, gnorm=1.978, loss_scale=8, train_wall=10, gb_free=2.8, wall=176999
2021-06-20 19:48:56 | INFO | train_inner | epoch 006: 407 / 3002 loss=2.429, ppl=5.39, wps=5856.5, ups=0.09, wpb=64828, bsz=128, num_updates=15327, lr=9.98854e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=177010
2021-06-20 19:49:07 | INFO | train_inner | epoch 006: 408 / 3002 loss=2.437, ppl=5.42, wps=5894.5, ups=0.09, wpb=64861, bsz=128, num_updates=15328, lr=9.98854e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=177021
2021-06-20 19:49:18 | INFO | train_inner | epoch 006: 409 / 3002 loss=2.603, ppl=6.08, wps=5757.5, ups=0.09, wpb=64874, bsz=128, num_updates=15329, lr=9.98854e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=177032
2021-06-20 19:49:29 | INFO | train_inner | epoch 006: 410 / 3002 loss=2.382, ppl=5.21, wps=5917.4, ups=0.09, wpb=64857, bsz=128, num_updates=15330, lr=9.98854e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=177043
2021-06-20 19:49:40 | INFO | train_inner | epoch 006: 411 / 3002 loss=2.4, ppl=5.28, wps=5925, ups=0.09, wpb=64858, bsz=128, num_updates=15331, lr=9.98853e-05, gnorm=1.852, loss_scale=8, train_wall=11, gb_free=2.8, wall=177054
2021-06-20 19:49:51 | INFO | train_inner | epoch 006: 412 / 3002 loss=2.399, ppl=5.28, wps=5767.3, ups=0.09, wpb=64793, bsz=128, num_updates=15332, lr=9.98853e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=177066
2021-06-20 19:50:02 | INFO | train_inner | epoch 006: 413 / 3002 loss=2.4, ppl=5.28, wps=5793.3, ups=0.09, wpb=64844, bsz=128, num_updates=15333, lr=9.98853e-05, gnorm=2.009, loss_scale=8, train_wall=11, gb_free=2.8, wall=177077
2021-06-20 19:50:13 | INFO | train_inner | epoch 006: 414 / 3002 loss=2.582, ppl=5.99, wps=5867.7, ups=0.09, wpb=64831, bsz=128, num_updates=15334, lr=9.98853e-05, gnorm=1.985, loss_scale=8, train_wall=11, gb_free=2.8, wall=177088
2021-06-20 19:50:25 | INFO | train_inner | epoch 006: 415 / 3002 loss=2.534, ppl=5.79, wps=5822.4, ups=0.09, wpb=64880, bsz=128, num_updates=15335, lr=9.98853e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=177099
2021-06-20 19:50:36 | INFO | train_inner | epoch 006: 416 / 3002 loss=2.48, ppl=5.58, wps=5908.3, ups=0.09, wpb=64752, bsz=128, num_updates=15336, lr=9.98853e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=177110
2021-06-20 19:50:47 | INFO | train_inner | epoch 006: 417 / 3002 loss=2.499, ppl=5.65, wps=5838.8, ups=0.09, wpb=64802, bsz=128, num_updates=15337, lr=9.98853e-05, gnorm=2.471, loss_scale=8, train_wall=11, gb_free=2.8, wall=177121
2021-06-20 19:50:58 | INFO | train_inner | epoch 006: 418 / 3002 loss=2.487, ppl=5.61, wps=5923.8, ups=0.09, wpb=64872, bsz=128, num_updates=15338, lr=9.98853e-05, gnorm=1.988, loss_scale=8, train_wall=10, gb_free=2.8, wall=177132
2021-06-20 19:51:09 | INFO | train_inner | epoch 006: 419 / 3002 loss=2.367, ppl=5.16, wps=5868.4, ups=0.09, wpb=64814, bsz=128, num_updates=15339, lr=9.98853e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=177143
2021-06-20 19:51:20 | INFO | train_inner | epoch 006: 420 / 3002 loss=2.488, ppl=5.61, wps=5794.9, ups=0.09, wpb=64836, bsz=128, num_updates=15340, lr=9.98853e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=177154
2021-06-20 19:51:31 | INFO | train_inner | epoch 006: 421 / 3002 loss=2.429, ppl=5.39, wps=5798.3, ups=0.09, wpb=64805, bsz=128, num_updates=15341, lr=9.98853e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=177165
2021-06-20 19:51:42 | INFO | train_inner | epoch 006: 422 / 3002 loss=2.437, ppl=5.41, wps=5874.1, ups=0.09, wpb=64781, bsz=128, num_updates=15342, lr=9.98853e-05, gnorm=1.872, loss_scale=8, train_wall=11, gb_free=2.8, wall=177176
2021-06-20 19:51:53 | INFO | train_inner | epoch 006: 423 / 3002 loss=2.545, ppl=5.84, wps=5910.9, ups=0.09, wpb=64814, bsz=128, num_updates=15343, lr=9.98852e-05, gnorm=2.548, loss_scale=8, train_wall=11, gb_free=2.8, wall=177187
2021-06-20 19:52:04 | INFO | train_inner | epoch 006: 424 / 3002 loss=2.469, ppl=5.54, wps=5901.7, ups=0.09, wpb=64779, bsz=128, num_updates=15344, lr=9.98852e-05, gnorm=1.911, loss_scale=8, train_wall=10, gb_free=2.8, wall=177198
2021-06-20 19:52:15 | INFO | train_inner | epoch 006: 425 / 3002 loss=2.595, ppl=6.04, wps=5817, ups=0.09, wpb=64699, bsz=128, num_updates=15345, lr=9.98852e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=177209
2021-06-20 19:52:26 | INFO | train_inner | epoch 006: 426 / 3002 loss=2.331, ppl=5.03, wps=5919.1, ups=0.09, wpb=64832, bsz=128, num_updates=15346, lr=9.98852e-05, gnorm=1.968, loss_scale=8, train_wall=10, gb_free=2.8, wall=177220
2021-06-20 19:52:37 | INFO | train_inner | epoch 006: 427 / 3002 loss=2.472, ppl=5.55, wps=5871.1, ups=0.09, wpb=64862, bsz=128, num_updates=15347, lr=9.98852e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=177231
2021-06-20 19:52:48 | INFO | train_inner | epoch 006: 428 / 3002 loss=2.495, ppl=5.64, wps=5819.5, ups=0.09, wpb=64788, bsz=128, num_updates=15348, lr=9.98852e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=177243
2021-06-20 19:52:59 | INFO | train_inner | epoch 006: 429 / 3002 loss=2.547, ppl=5.84, wps=5809.5, ups=0.09, wpb=64811, bsz=128, num_updates=15349, lr=9.98852e-05, gnorm=1.965, loss_scale=8, train_wall=11, gb_free=2.8, wall=177254
2021-06-20 19:53:10 | INFO | train_inner | epoch 006: 430 / 3002 loss=2.509, ppl=5.69, wps=5931.3, ups=0.09, wpb=64825, bsz=128, num_updates=15350, lr=9.98852e-05, gnorm=2.419, loss_scale=8, train_wall=10, gb_free=2.8, wall=177265
2021-06-20 19:53:21 | INFO | train_inner | epoch 006: 431 / 3002 loss=2.408, ppl=5.31, wps=5862.5, ups=0.09, wpb=64817, bsz=128, num_updates=15351, lr=9.98852e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=177276
2021-06-20 19:53:33 | INFO | train_inner | epoch 006: 432 / 3002 loss=2.416, ppl=5.34, wps=5842.5, ups=0.09, wpb=64850, bsz=128, num_updates=15352, lr=9.98852e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=177287
2021-06-20 19:53:44 | INFO | train_inner | epoch 006: 433 / 3002 loss=2.45, ppl=5.46, wps=5680.4, ups=0.09, wpb=64855, bsz=128, num_updates=15353, lr=9.98852e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=177298
2021-06-20 19:53:55 | INFO | train_inner | epoch 006: 434 / 3002 loss=2.489, ppl=5.62, wps=5731.9, ups=0.09, wpb=64840, bsz=128, num_updates=15354, lr=9.98852e-05, gnorm=1.956, loss_scale=8, train_wall=11, gb_free=2.8, wall=177310
2021-06-20 19:54:06 | INFO | train_inner | epoch 006: 435 / 3002 loss=2.362, ppl=5.14, wps=5827.5, ups=0.09, wpb=64801, bsz=128, num_updates=15355, lr=9.98852e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=177321
2021-06-20 19:54:17 | INFO | train_inner | epoch 006: 436 / 3002 loss=2.543, ppl=5.83, wps=5834.7, ups=0.09, wpb=64876, bsz=128, num_updates=15356, lr=9.98851e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=177332
2021-06-20 19:54:29 | INFO | train_inner | epoch 006: 437 / 3002 loss=2.406, ppl=5.3, wps=5880, ups=0.09, wpb=64924, bsz=128, num_updates=15357, lr=9.98851e-05, gnorm=2.261, loss_scale=8, train_wall=11, gb_free=2.8, wall=177343
2021-06-20 19:54:40 | INFO | train_inner | epoch 006: 438 / 3002 loss=2.438, ppl=5.42, wps=5882, ups=0.09, wpb=64855, bsz=128, num_updates=15358, lr=9.98851e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=177354
2021-06-20 19:54:51 | INFO | train_inner | epoch 006: 439 / 3002 loss=2.415, ppl=5.33, wps=5821.5, ups=0.09, wpb=64799, bsz=128, num_updates=15359, lr=9.98851e-05, gnorm=2.075, loss_scale=8, train_wall=11, gb_free=2.8, wall=177365
2021-06-20 19:55:02 | INFO | train_inner | epoch 006: 440 / 3002 loss=2.486, ppl=5.6, wps=5812.4, ups=0.09, wpb=64822, bsz=128, num_updates=15360, lr=9.98851e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=177376
2021-06-20 19:55:13 | INFO | train_inner | epoch 006: 441 / 3002 loss=2.378, ppl=5.2, wps=5858.1, ups=0.09, wpb=64800, bsz=128, num_updates=15361, lr=9.98851e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=177387
2021-06-20 19:55:24 | INFO | train_inner | epoch 006: 442 / 3002 loss=2.524, ppl=5.75, wps=5808.5, ups=0.09, wpb=64796, bsz=128, num_updates=15362, lr=9.98851e-05, gnorm=1.859, loss_scale=8, train_wall=11, gb_free=2.8, wall=177398
2021-06-20 19:55:35 | INFO | train_inner | epoch 006: 443 / 3002 loss=2.586, ppl=6, wps=5826.6, ups=0.09, wpb=64819, bsz=128, num_updates=15363, lr=9.98851e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=177410
2021-06-20 19:55:46 | INFO | train_inner | epoch 006: 444 / 3002 loss=2.396, ppl=5.26, wps=5881.8, ups=0.09, wpb=64781, bsz=128, num_updates=15364, lr=9.98851e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=177421
2021-06-20 19:55:57 | INFO | train_inner | epoch 006: 445 / 3002 loss=2.464, ppl=5.52, wps=5880.4, ups=0.09, wpb=64821, bsz=128, num_updates=15365, lr=9.98851e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=177432
2021-06-20 19:56:08 | INFO | train_inner | epoch 006: 446 / 3002 loss=2.446, ppl=5.45, wps=5943.3, ups=0.09, wpb=64812, bsz=128, num_updates=15366, lr=9.98851e-05, gnorm=1.962, loss_scale=8, train_wall=10, gb_free=2.8, wall=177442
2021-06-20 19:56:19 | INFO | train_inner | epoch 006: 447 / 3002 loss=2.48, ppl=5.58, wps=5881.3, ups=0.09, wpb=64805, bsz=128, num_updates=15367, lr=9.98851e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=177453
2021-06-20 19:56:30 | INFO | train_inner | epoch 006: 448 / 3002 loss=2.529, ppl=5.77, wps=5798.5, ups=0.09, wpb=64850, bsz=128, num_updates=15368, lr=9.9885e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=177465
2021-06-20 19:56:42 | INFO | train_inner | epoch 006: 449 / 3002 loss=2.533, ppl=5.79, wps=5754.4, ups=0.09, wpb=64797, bsz=128, num_updates=15369, lr=9.9885e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=177476
2021-06-20 19:56:53 | INFO | train_inner | epoch 006: 450 / 3002 loss=2.507, ppl=5.68, wps=5836.6, ups=0.09, wpb=64835, bsz=128, num_updates=15370, lr=9.9885e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=177487
2021-06-20 19:57:04 | INFO | train_inner | epoch 006: 451 / 3002 loss=2.471, ppl=5.54, wps=5920.2, ups=0.09, wpb=64867, bsz=128, num_updates=15371, lr=9.9885e-05, gnorm=1.964, loss_scale=8, train_wall=10, gb_free=2.8, wall=177498
2021-06-20 19:57:15 | INFO | train_inner | epoch 006: 452 / 3002 loss=2.45, ppl=5.46, wps=5782.2, ups=0.09, wpb=64791, bsz=128, num_updates=15372, lr=9.9885e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=177509
2021-06-20 19:57:26 | INFO | train_inner | epoch 006: 453 / 3002 loss=2.462, ppl=5.51, wps=5862.6, ups=0.09, wpb=64848, bsz=128, num_updates=15373, lr=9.9885e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=177520
2021-06-20 19:57:37 | INFO | train_inner | epoch 006: 454 / 3002 loss=2.387, ppl=5.23, wps=5795.8, ups=0.09, wpb=64790, bsz=128, num_updates=15374, lr=9.9885e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=177531
2021-06-20 19:57:48 | INFO | train_inner | epoch 006: 455 / 3002 loss=2.301, ppl=4.93, wps=5853, ups=0.09, wpb=64903, bsz=128, num_updates=15375, lr=9.9885e-05, gnorm=1.914, loss_scale=8, train_wall=11, gb_free=2.8, wall=177543
2021-06-20 19:57:59 | INFO | train_inner | epoch 006: 456 / 3002 loss=2.453, ppl=5.48, wps=5898.4, ups=0.09, wpb=64849, bsz=128, num_updates=15376, lr=9.9885e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=177554
2021-06-20 19:58:10 | INFO | train_inner | epoch 006: 457 / 3002 loss=2.581, ppl=5.98, wps=5886.6, ups=0.09, wpb=64808, bsz=128, num_updates=15377, lr=9.9885e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=177565
2021-06-20 19:58:21 | INFO | train_inner | epoch 006: 458 / 3002 loss=2.448, ppl=5.46, wps=5807.3, ups=0.09, wpb=64840, bsz=128, num_updates=15378, lr=9.9885e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=177576
2021-06-20 19:58:32 | INFO | train_inner | epoch 006: 459 / 3002 loss=2.436, ppl=5.41, wps=5826.6, ups=0.09, wpb=64842, bsz=128, num_updates=15379, lr=9.9885e-05, gnorm=2.081, loss_scale=8, train_wall=11, gb_free=2.8, wall=177587
2021-06-20 19:58:43 | INFO | train_inner | epoch 006: 460 / 3002 loss=2.495, ppl=5.64, wps=6020.1, ups=0.09, wpb=64837, bsz=128, num_updates=15380, lr=9.9885e-05, gnorm=1.973, loss_scale=8, train_wall=10, gb_free=2.8, wall=177598
2021-06-20 19:58:54 | INFO | train_inner | epoch 006: 461 / 3002 loss=2.468, ppl=5.53, wps=5906.6, ups=0.09, wpb=64792, bsz=128, num_updates=15381, lr=9.98849e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=177609
2021-06-20 19:59:05 | INFO | train_inner | epoch 006: 462 / 3002 loss=2.493, ppl=5.63, wps=5842.5, ups=0.09, wpb=64809, bsz=128, num_updates=15382, lr=9.98849e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=177620
2021-06-20 19:59:16 | INFO | train_inner | epoch 006: 463 / 3002 loss=2.418, ppl=5.35, wps=5849.2, ups=0.09, wpb=64800, bsz=128, num_updates=15383, lr=9.98849e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=177631
2021-06-20 19:59:28 | INFO | train_inner | epoch 006: 464 / 3002 loss=2.626, ppl=6.17, wps=5759.8, ups=0.09, wpb=64778, bsz=128, num_updates=15384, lr=9.98849e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=177642
2021-06-20 19:59:39 | INFO | train_inner | epoch 006: 465 / 3002 loss=2.392, ppl=5.25, wps=5786.7, ups=0.09, wpb=64836, bsz=128, num_updates=15385, lr=9.98849e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=177653
2021-06-20 19:59:50 | INFO | train_inner | epoch 006: 466 / 3002 loss=2.536, ppl=5.8, wps=5746, ups=0.09, wpb=64780, bsz=128, num_updates=15386, lr=9.98849e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=177664
2021-06-20 20:00:01 | INFO | train_inner | epoch 006: 467 / 3002 loss=2.45, ppl=5.46, wps=5738.2, ups=0.09, wpb=64821, bsz=128, num_updates=15387, lr=9.98849e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=177676
2021-06-20 20:00:12 | INFO | train_inner | epoch 006: 468 / 3002 loss=2.517, ppl=5.72, wps=5923.5, ups=0.09, wpb=64826, bsz=128, num_updates=15388, lr=9.98849e-05, gnorm=1.978, loss_scale=8, train_wall=10, gb_free=2.8, wall=177687
2021-06-20 20:00:23 | INFO | train_inner | epoch 006: 469 / 3002 loss=2.559, ppl=5.89, wps=5904.9, ups=0.09, wpb=64878, bsz=128, num_updates=15389, lr=9.98849e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=177698
2021-06-20 20:00:34 | INFO | train_inner | epoch 006: 470 / 3002 loss=2.27, ppl=4.82, wps=5880.9, ups=0.09, wpb=64868, bsz=128, num_updates=15390, lr=9.98849e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=177709
2021-06-20 20:00:45 | INFO | train_inner | epoch 006: 471 / 3002 loss=2.424, ppl=5.37, wps=5915.1, ups=0.09, wpb=64898, bsz=128, num_updates=15391, lr=9.98849e-05, gnorm=1.982, loss_scale=8, train_wall=10, gb_free=2.8, wall=177720
2021-06-20 20:00:56 | INFO | train_inner | epoch 006: 472 / 3002 loss=2.343, ppl=5.07, wps=5867.3, ups=0.09, wpb=64793, bsz=128, num_updates=15392, lr=9.98849e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=177731
2021-06-20 20:01:08 | INFO | train_inner | epoch 006: 473 / 3002 loss=2.538, ppl=5.81, wps=5799.3, ups=0.09, wpb=64835, bsz=128, num_updates=15393, lr=9.98848e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=177742
2021-06-20 20:01:19 | INFO | train_inner | epoch 006: 474 / 3002 loss=2.543, ppl=5.83, wps=5799.5, ups=0.09, wpb=64815, bsz=128, num_updates=15394, lr=9.98848e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=177753
2021-06-20 20:01:30 | INFO | train_inner | epoch 006: 475 / 3002 loss=2.364, ppl=5.15, wps=5830.7, ups=0.09, wpb=64865, bsz=128, num_updates=15395, lr=9.98848e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=177764
2021-06-20 20:01:41 | INFO | train_inner | epoch 006: 476 / 3002 loss=2.453, ppl=5.48, wps=5855.9, ups=0.09, wpb=64876, bsz=128, num_updates=15396, lr=9.98848e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=177775
2021-06-20 20:01:52 | INFO | train_inner | epoch 006: 477 / 3002 loss=2.734, ppl=6.65, wps=5953.4, ups=0.09, wpb=64857, bsz=128, num_updates=15397, lr=9.98848e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=177786
2021-06-20 20:02:03 | INFO | train_inner | epoch 006: 478 / 3002 loss=2.478, ppl=5.57, wps=5844.2, ups=0.09, wpb=64748, bsz=128, num_updates=15398, lr=9.98848e-05, gnorm=1.933, loss_scale=8, train_wall=11, gb_free=2.8, wall=177797
2021-06-20 20:02:14 | INFO | train_inner | epoch 006: 479 / 3002 loss=2.563, ppl=5.91, wps=5773.5, ups=0.09, wpb=64775, bsz=128, num_updates=15399, lr=9.98848e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=177808
2021-06-20 20:02:25 | INFO | train_inner | epoch 006: 480 / 3002 loss=2.391, ppl=5.25, wps=5982.7, ups=0.09, wpb=64779, bsz=128, num_updates=15400, lr=9.98848e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=177819
2021-06-20 20:02:36 | INFO | train_inner | epoch 006: 481 / 3002 loss=2.434, ppl=5.4, wps=6004.3, ups=0.09, wpb=64860, bsz=128, num_updates=15401, lr=9.98848e-05, gnorm=1.992, loss_scale=8, train_wall=10, gb_free=2.8, wall=177830
2021-06-20 20:02:47 | INFO | train_inner | epoch 006: 482 / 3002 loss=2.543, ppl=5.83, wps=5874.7, ups=0.09, wpb=64848, bsz=128, num_updates=15402, lr=9.98848e-05, gnorm=1.971, loss_scale=8, train_wall=11, gb_free=2.8, wall=177841
2021-06-20 20:02:58 | INFO | train_inner | epoch 006: 483 / 3002 loss=2.382, ppl=5.21, wps=5987.2, ups=0.09, wpb=64833, bsz=128, num_updates=15403, lr=9.98848e-05, gnorm=1.959, loss_scale=8, train_wall=10, gb_free=2.8, wall=177852
2021-06-20 20:03:09 | INFO | train_inner | epoch 006: 484 / 3002 loss=2.504, ppl=5.67, wps=5955.8, ups=0.09, wpb=64957, bsz=128, num_updates=15404, lr=9.98848e-05, gnorm=1.976, loss_scale=8, train_wall=10, gb_free=2.8, wall=177863
2021-06-20 20:03:20 | INFO | train_inner | epoch 006: 485 / 3002 loss=2.416, ppl=5.34, wps=5807.4, ups=0.09, wpb=64839, bsz=128, num_updates=15405, lr=9.98848e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=177874
2021-06-20 20:03:31 | INFO | train_inner | epoch 006: 486 / 3002 loss=2.468, ppl=5.53, wps=5892.9, ups=0.09, wpb=64768, bsz=128, num_updates=15406, lr=9.98847e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=177885
2021-06-20 20:03:42 | INFO | train_inner | epoch 006: 487 / 3002 loss=2.39, ppl=5.24, wps=5908.8, ups=0.09, wpb=64847, bsz=128, num_updates=15407, lr=9.98847e-05, gnorm=1.835, loss_scale=8, train_wall=11, gb_free=2.8, wall=177896
2021-06-20 20:03:53 | INFO | train_inner | epoch 006: 488 / 3002 loss=2.42, ppl=5.35, wps=5832.4, ups=0.09, wpb=64821, bsz=128, num_updates=15408, lr=9.98847e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=177907
2021-06-20 20:04:04 | INFO | train_inner | epoch 006: 489 / 3002 loss=2.544, ppl=5.83, wps=5881.7, ups=0.09, wpb=64826, bsz=128, num_updates=15409, lr=9.98847e-05, gnorm=2.013, loss_scale=8, train_wall=11, gb_free=2.8, wall=177918
2021-06-20 20:04:15 | INFO | train_inner | epoch 006: 490 / 3002 loss=2.591, ppl=6.02, wps=5733.5, ups=0.09, wpb=64776, bsz=128, num_updates=15410, lr=9.98847e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=177929
2021-06-20 20:04:26 | INFO | train_inner | epoch 006: 491 / 3002 loss=2.354, ppl=5.11, wps=5910.6, ups=0.09, wpb=64862, bsz=128, num_updates=15411, lr=9.98847e-05, gnorm=1.878, loss_scale=8, train_wall=10, gb_free=2.8, wall=177940
2021-06-20 20:04:37 | INFO | train_inner | epoch 006: 492 / 3002 loss=2.375, ppl=5.19, wps=5931.7, ups=0.09, wpb=64869, bsz=128, num_updates=15412, lr=9.98847e-05, gnorm=1.928, loss_scale=8, train_wall=10, gb_free=2.8, wall=177951
2021-06-20 20:04:48 | INFO | train_inner | epoch 006: 493 / 3002 loss=2.394, ppl=5.26, wps=5858.1, ups=0.09, wpb=64793, bsz=128, num_updates=15413, lr=9.98847e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=177962
2021-06-20 20:04:59 | INFO | train_inner | epoch 006: 494 / 3002 loss=2.356, ppl=5.12, wps=5947, ups=0.09, wpb=64943, bsz=128, num_updates=15414, lr=9.98847e-05, gnorm=2.078, loss_scale=8, train_wall=10, gb_free=2.8, wall=177973
2021-06-20 20:05:10 | INFO | train_inner | epoch 006: 495 / 3002 loss=2.623, ppl=6.16, wps=5794.9, ups=0.09, wpb=64832, bsz=128, num_updates=15415, lr=9.98847e-05, gnorm=2.105, loss_scale=8, train_wall=11, gb_free=2.8, wall=177985
2021-06-20 20:05:21 | INFO | train_inner | epoch 006: 496 / 3002 loss=2.466, ppl=5.53, wps=5741.8, ups=0.09, wpb=64831, bsz=128, num_updates=15416, lr=9.98847e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=177996
2021-06-20 20:05:32 | INFO | train_inner | epoch 006: 497 / 3002 loss=2.519, ppl=5.73, wps=5887.5, ups=0.09, wpb=64819, bsz=128, num_updates=15417, lr=9.98847e-05, gnorm=2.008, loss_scale=8, train_wall=11, gb_free=2.8, wall=178007
2021-06-20 20:05:44 | INFO | train_inner | epoch 006: 498 / 3002 loss=2.361, ppl=5.14, wps=5853.9, ups=0.09, wpb=64885, bsz=128, num_updates=15418, lr=9.98846e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=178018
2021-06-20 20:05:55 | INFO | train_inner | epoch 006: 499 / 3002 loss=2.496, ppl=5.64, wps=5792.4, ups=0.09, wpb=64717, bsz=128, num_updates=15419, lr=9.98846e-05, gnorm=1.89, loss_scale=8, train_wall=11, gb_free=2.8, wall=178029
2021-06-20 20:06:06 | INFO | train_inner | epoch 006: 500 / 3002 loss=2.499, ppl=5.65, wps=5854.8, ups=0.09, wpb=64797, bsz=128, num_updates=15420, lr=9.98846e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=178040
2021-06-20 20:06:17 | INFO | train_inner | epoch 006: 501 / 3002 loss=2.429, ppl=5.39, wps=5793.2, ups=0.09, wpb=64831, bsz=128, num_updates=15421, lr=9.98846e-05, gnorm=1.845, loss_scale=8, train_wall=11, gb_free=2.8, wall=178051
2021-06-20 20:06:28 | INFO | train_inner | epoch 006: 502 / 3002 loss=2.553, ppl=5.87, wps=5879.1, ups=0.09, wpb=64884, bsz=128, num_updates=15422, lr=9.98846e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=178062
2021-06-20 20:06:39 | INFO | train_inner | epoch 006: 503 / 3002 loss=2.43, ppl=5.39, wps=5860.3, ups=0.09, wpb=64871, bsz=128, num_updates=15423, lr=9.98846e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=178073
2021-06-20 20:06:50 | INFO | train_inner | epoch 006: 504 / 3002 loss=2.504, ppl=5.67, wps=5962.4, ups=0.09, wpb=64782, bsz=128, num_updates=15424, lr=9.98846e-05, gnorm=1.915, loss_scale=16, train_wall=10, gb_free=2.8, wall=178084
2021-06-20 20:07:01 | INFO | train_inner | epoch 006: 505 / 3002 loss=2.469, ppl=5.54, wps=5821, ups=0.09, wpb=64817, bsz=128, num_updates=15425, lr=9.98846e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=178095
2021-06-20 20:07:12 | INFO | train_inner | epoch 006: 506 / 3002 loss=2.573, ppl=5.95, wps=5927.3, ups=0.09, wpb=64835, bsz=128, num_updates=15426, lr=9.98846e-05, gnorm=1.949, loss_scale=16, train_wall=10, gb_free=2.8, wall=178106
2021-06-20 20:07:23 | INFO | train_inner | epoch 006: 507 / 3002 loss=2.296, ppl=4.91, wps=5837.4, ups=0.09, wpb=64859, bsz=128, num_updates=15427, lr=9.98846e-05, gnorm=2.002, loss_scale=16, train_wall=11, gb_free=2.8, wall=178118
2021-06-20 20:07:34 | INFO | train_inner | epoch 006: 508 / 3002 loss=2.498, ppl=5.65, wps=5915.7, ups=0.09, wpb=64776, bsz=128, num_updates=15428, lr=9.98846e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=178128
2021-06-20 20:07:45 | INFO | train_inner | epoch 006: 509 / 3002 loss=2.556, ppl=5.88, wps=5885.5, ups=0.09, wpb=64908, bsz=128, num_updates=15429, lr=9.98846e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=178139
2021-06-20 20:07:56 | INFO | train_inner | epoch 006: 510 / 3002 loss=2.411, ppl=5.32, wps=5853.6, ups=0.09, wpb=64747, bsz=128, num_updates=15430, lr=9.98846e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=178151
2021-06-20 20:08:07 | INFO | train_inner | epoch 006: 511 / 3002 loss=2.519, ppl=5.73, wps=5924.4, ups=0.09, wpb=64794, bsz=128, num_updates=15431, lr=9.98845e-05, gnorm=1.944, loss_scale=16, train_wall=10, gb_free=2.8, wall=178161
2021-06-20 20:08:18 | INFO | train_inner | epoch 006: 512 / 3002 loss=2.292, ppl=4.9, wps=5908.2, ups=0.09, wpb=64832, bsz=128, num_updates=15432, lr=9.98845e-05, gnorm=1.851, loss_scale=16, train_wall=11, gb_free=2.8, wall=178172
2021-06-20 20:08:29 | INFO | train_inner | epoch 006: 513 / 3002 loss=2.582, ppl=5.99, wps=5878.8, ups=0.09, wpb=64821, bsz=128, num_updates=15433, lr=9.98845e-05, gnorm=2.118, loss_scale=16, train_wall=11, gb_free=2.8, wall=178183
2021-06-20 20:08:40 | INFO | train_inner | epoch 006: 514 / 3002 loss=2.48, ppl=5.58, wps=5765.9, ups=0.09, wpb=64817, bsz=128, num_updates=15434, lr=9.98845e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=178195
2021-06-20 20:08:52 | INFO | train_inner | epoch 006: 515 / 3002 loss=2.518, ppl=5.73, wps=5786, ups=0.09, wpb=64841, bsz=128, num_updates=15435, lr=9.98845e-05, gnorm=1.978, loss_scale=16, train_wall=11, gb_free=2.8, wall=178206
2021-06-20 20:09:02 | INFO | train_inner | epoch 006: 516 / 3002 loss=2.454, ppl=5.48, wps=5947.6, ups=0.09, wpb=64784, bsz=128, num_updates=15436, lr=9.98845e-05, gnorm=1.887, loss_scale=16, train_wall=10, gb_free=2.8, wall=178217
2021-06-20 20:09:14 | INFO | train_inner | epoch 006: 517 / 3002 loss=2.375, ppl=5.19, wps=5861, ups=0.09, wpb=64819, bsz=128, num_updates=15437, lr=9.98845e-05, gnorm=1.851, loss_scale=16, train_wall=11, gb_free=2.8, wall=178228
2021-06-20 20:09:25 | INFO | train_inner | epoch 006: 518 / 3002 loss=2.402, ppl=5.29, wps=5890.7, ups=0.09, wpb=64824, bsz=128, num_updates=15438, lr=9.98845e-05, gnorm=1.892, loss_scale=16, train_wall=11, gb_free=2.8, wall=178239
2021-06-20 20:09:36 | INFO | train_inner | epoch 006: 519 / 3002 loss=2.549, ppl=5.85, wps=5845.5, ups=0.09, wpb=64835, bsz=128, num_updates=15439, lr=9.98845e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=178250
2021-06-20 20:09:47 | INFO | train_inner | epoch 006: 520 / 3002 loss=2.509, ppl=5.69, wps=5944.3, ups=0.09, wpb=64802, bsz=128, num_updates=15440, lr=9.98845e-05, gnorm=1.871, loss_scale=16, train_wall=10, gb_free=2.8, wall=178261
2021-06-20 20:09:58 | INFO | train_inner | epoch 006: 521 / 3002 loss=2.576, ppl=5.96, wps=5831.8, ups=0.09, wpb=64839, bsz=128, num_updates=15441, lr=9.98845e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=178272
2021-06-20 20:10:09 | INFO | train_inner | epoch 006: 522 / 3002 loss=2.438, ppl=5.42, wps=5886.8, ups=0.09, wpb=64869, bsz=128, num_updates=15442, lr=9.98845e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=178283
2021-06-20 20:10:20 | INFO | train_inner | epoch 006: 523 / 3002 loss=2.368, ppl=5.16, wps=5894, ups=0.09, wpb=64897, bsz=128, num_updates=15443, lr=9.98844e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=178294
2021-06-20 20:10:31 | INFO | train_inner | epoch 006: 524 / 3002 loss=2.541, ppl=5.82, wps=5882.1, ups=0.09, wpb=64915, bsz=128, num_updates=15444, lr=9.98844e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=178305
2021-06-20 20:10:41 | INFO | train_inner | epoch 006: 525 / 3002 loss=2.502, ppl=5.66, wps=6086.9, ups=0.09, wpb=64855, bsz=128, num_updates=15445, lr=9.98844e-05, gnorm=1.926, loss_scale=16, train_wall=10, gb_free=2.8, wall=178316
2021-06-20 20:10:52 | INFO | train_inner | epoch 006: 526 / 3002 loss=2.448, ppl=5.45, wps=5915.9, ups=0.09, wpb=64764, bsz=128, num_updates=15446, lr=9.98844e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=178327
2021-06-20 20:11:04 | INFO | train_inner | epoch 006: 527 / 3002 loss=2.411, ppl=5.32, wps=5698.6, ups=0.09, wpb=64788, bsz=128, num_updates=15447, lr=9.98844e-05, gnorm=2.018, loss_scale=16, train_wall=11, gb_free=2.8, wall=178338
2021-06-20 20:11:14 | INFO | train_inner | epoch 006: 528 / 3002 loss=2.427, ppl=5.38, wps=6032.4, ups=0.09, wpb=64858, bsz=128, num_updates=15448, lr=9.98844e-05, gnorm=2.01, loss_scale=16, train_wall=10, gb_free=2.8, wall=178349
2021-06-20 20:11:25 | INFO | train_inner | epoch 006: 529 / 3002 loss=2.508, ppl=5.69, wps=5927.7, ups=0.09, wpb=64895, bsz=128, num_updates=15449, lr=9.98844e-05, gnorm=1.969, loss_scale=16, train_wall=10, gb_free=2.8, wall=178360
2021-06-20 20:11:37 | INFO | train_inner | epoch 006: 530 / 3002 loss=2.502, ppl=5.67, wps=5810.3, ups=0.09, wpb=64868, bsz=128, num_updates=15450, lr=9.98844e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=178371
2021-06-20 20:11:48 | INFO | train_inner | epoch 006: 531 / 3002 loss=2.376, ppl=5.19, wps=5866.1, ups=0.09, wpb=64748, bsz=128, num_updates=15451, lr=9.98844e-05, gnorm=1.811, loss_scale=16, train_wall=11, gb_free=2.8, wall=178382
2021-06-20 20:11:59 | INFO | train_inner | epoch 006: 532 / 3002 loss=2.318, ppl=4.98, wps=5783.3, ups=0.09, wpb=64935, bsz=128, num_updates=15452, lr=9.98844e-05, gnorm=2.098, loss_scale=16, train_wall=11, gb_free=2.8, wall=178393
2021-06-20 20:12:10 | INFO | train_inner | epoch 006: 533 / 3002 loss=2.518, ppl=5.73, wps=5772.2, ups=0.09, wpb=64900, bsz=128, num_updates=15453, lr=9.98844e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=178404
2021-06-20 20:12:21 | INFO | train_inner | epoch 006: 534 / 3002 loss=2.423, ppl=5.36, wps=5777.7, ups=0.09, wpb=64822, bsz=128, num_updates=15454, lr=9.98844e-05, gnorm=1.905, loss_scale=16, train_wall=11, gb_free=2.8, wall=178416
2021-06-20 20:12:33 | INFO | train_inner | epoch 006: 535 / 3002 loss=2.537, ppl=5.8, wps=5735.9, ups=0.09, wpb=64801, bsz=128, num_updates=15455, lr=9.98844e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=178427
2021-06-20 20:12:44 | INFO | train_inner | epoch 006: 536 / 3002 loss=2.403, ppl=5.29, wps=5902.7, ups=0.09, wpb=64799, bsz=128, num_updates=15456, lr=9.98843e-05, gnorm=1.986, loss_scale=16, train_wall=10, gb_free=2.8, wall=178438
2021-06-20 20:12:55 | INFO | train_inner | epoch 006: 537 / 3002 loss=2.552, ppl=5.87, wps=5811.9, ups=0.09, wpb=64786, bsz=128, num_updates=15457, lr=9.98843e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=178449
2021-06-20 20:13:06 | INFO | train_inner | epoch 006: 538 / 3002 loss=2.416, ppl=5.34, wps=5812.4, ups=0.09, wpb=64850, bsz=128, num_updates=15458, lr=9.98843e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=178460
2021-06-20 20:13:17 | INFO | train_inner | epoch 006: 539 / 3002 loss=2.568, ppl=5.93, wps=5798.1, ups=0.09, wpb=64756, bsz=128, num_updates=15459, lr=9.98843e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=178471
2021-06-20 20:13:28 | INFO | train_inner | epoch 006: 540 / 3002 loss=2.477, ppl=5.57, wps=5807.6, ups=0.09, wpb=64776, bsz=128, num_updates=15460, lr=9.98843e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=178483
2021-06-20 20:13:39 | INFO | train_inner | epoch 006: 541 / 3002 loss=2.527, ppl=5.76, wps=5822.1, ups=0.09, wpb=64831, bsz=128, num_updates=15461, lr=9.98843e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=178494
2021-06-20 20:13:51 | INFO | train_inner | epoch 006: 542 / 3002 loss=2.572, ppl=5.95, wps=5789.5, ups=0.09, wpb=64886, bsz=128, num_updates=15462, lr=9.98843e-05, gnorm=1.973, loss_scale=16, train_wall=11, gb_free=2.8, wall=178505
2021-06-20 20:14:02 | INFO | train_inner | epoch 006: 543 / 3002 loss=2.529, ppl=5.77, wps=5876.5, ups=0.09, wpb=64884, bsz=128, num_updates=15463, lr=9.98843e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=178516
2021-06-20 20:14:13 | INFO | train_inner | epoch 006: 544 / 3002 loss=2.64, ppl=6.23, wps=5863.3, ups=0.09, wpb=64814, bsz=128, num_updates=15464, lr=9.98843e-05, gnorm=1.988, loss_scale=16, train_wall=11, gb_free=2.8, wall=178527
2021-06-20 20:14:24 | INFO | train_inner | epoch 006: 545 / 3002 loss=2.409, ppl=5.31, wps=5863, ups=0.09, wpb=64799, bsz=128, num_updates=15465, lr=9.98843e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=178538
2021-06-20 20:14:35 | INFO | train_inner | epoch 006: 546 / 3002 loss=2.621, ppl=6.15, wps=5810.9, ups=0.09, wpb=64895, bsz=128, num_updates=15466, lr=9.98843e-05, gnorm=2.021, loss_scale=16, train_wall=11, gb_free=2.8, wall=178549
2021-06-20 20:14:46 | INFO | train_inner | epoch 006: 547 / 3002 loss=2.534, ppl=5.79, wps=5928.7, ups=0.09, wpb=64834, bsz=128, num_updates=15467, lr=9.98843e-05, gnorm=2.271, loss_scale=16, train_wall=10, gb_free=2.8, wall=178560
2021-06-20 20:14:57 | INFO | train_inner | epoch 006: 548 / 3002 loss=2.516, ppl=5.72, wps=5798.6, ups=0.09, wpb=64828, bsz=128, num_updates=15468, lr=9.98842e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=178571
2021-06-20 20:15:08 | INFO | train_inner | epoch 006: 549 / 3002 loss=2.631, ppl=6.19, wps=5808.3, ups=0.09, wpb=64870, bsz=128, num_updates=15469, lr=9.98842e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=178582
2021-06-20 20:15:19 | INFO | train_inner | epoch 006: 550 / 3002 loss=2.504, ppl=5.67, wps=5838.1, ups=0.09, wpb=64851, bsz=128, num_updates=15470, lr=9.98842e-05, gnorm=2.002, loss_scale=16, train_wall=11, gb_free=2.8, wall=178594
2021-06-20 20:15:30 | INFO | train_inner | epoch 006: 551 / 3002 loss=2.472, ppl=5.55, wps=5863.1, ups=0.09, wpb=64856, bsz=128, num_updates=15471, lr=9.98842e-05, gnorm=1.992, loss_scale=16, train_wall=11, gb_free=2.8, wall=178605
2021-06-20 20:15:41 | INFO | train_inner | epoch 006: 552 / 3002 loss=2.399, ppl=5.27, wps=5837.2, ups=0.09, wpb=64867, bsz=128, num_updates=15472, lr=9.98842e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=178616
2021-06-20 20:15:52 | INFO | train_inner | epoch 006: 553 / 3002 loss=2.328, ppl=5.02, wps=5893.5, ups=0.09, wpb=64908, bsz=128, num_updates=15473, lr=9.98842e-05, gnorm=1.879, loss_scale=16, train_wall=11, gb_free=2.8, wall=178627
2021-06-20 20:16:03 | INFO | train_inner | epoch 006: 554 / 3002 loss=2.319, ppl=4.99, wps=5867, ups=0.09, wpb=64833, bsz=128, num_updates=15474, lr=9.98842e-05, gnorm=1.922, loss_scale=16, train_wall=11, gb_free=2.8, wall=178638
2021-06-20 20:16:15 | INFO | train_inner | epoch 006: 555 / 3002 loss=2.332, ppl=5.04, wps=5785.9, ups=0.09, wpb=64834, bsz=128, num_updates=15475, lr=9.98842e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=178649
2021-06-20 20:16:26 | INFO | train_inner | epoch 006: 556 / 3002 loss=2.678, ppl=6.4, wps=5816.9, ups=0.09, wpb=64854, bsz=128, num_updates=15476, lr=9.98842e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=178660
2021-06-20 20:16:37 | INFO | train_inner | epoch 006: 557 / 3002 loss=2.401, ppl=5.28, wps=5812.4, ups=0.09, wpb=64713, bsz=128, num_updates=15477, lr=9.98842e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=178671
2021-06-20 20:16:48 | INFO | train_inner | epoch 006: 558 / 3002 loss=2.364, ppl=5.15, wps=5896, ups=0.09, wpb=64773, bsz=128, num_updates=15478, lr=9.98842e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=178682
2021-06-20 20:16:59 | INFO | train_inner | epoch 006: 559 / 3002 loss=2.613, ppl=6.12, wps=5918, ups=0.09, wpb=64873, bsz=128, num_updates=15479, lr=9.98842e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=178693
2021-06-20 20:17:10 | INFO | train_inner | epoch 006: 560 / 3002 loss=2.439, ppl=5.42, wps=5918, ups=0.09, wpb=64798, bsz=128, num_updates=15480, lr=9.98842e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=178704
2021-06-20 20:17:21 | INFO | train_inner | epoch 006: 561 / 3002 loss=2.394, ppl=5.26, wps=5931.7, ups=0.09, wpb=64846, bsz=128, num_updates=15481, lr=9.98841e-05, gnorm=1.902, loss_scale=16, train_wall=10, gb_free=2.8, wall=178715
2021-06-20 20:17:32 | INFO | train_inner | epoch 006: 562 / 3002 loss=2.462, ppl=5.51, wps=5818.6, ups=0.09, wpb=64909, bsz=128, num_updates=15482, lr=9.98841e-05, gnorm=1.971, loss_scale=16, train_wall=11, gb_free=2.8, wall=178726
2021-06-20 20:17:43 | INFO | train_inner | epoch 006: 563 / 3002 loss=2.388, ppl=5.23, wps=5760.5, ups=0.09, wpb=64923, bsz=128, num_updates=15483, lr=9.98841e-05, gnorm=1.961, loss_scale=16, train_wall=11, gb_free=2.8, wall=178738
2021-06-20 20:17:54 | INFO | train_inner | epoch 006: 564 / 3002 loss=2.4, ppl=5.28, wps=5761.9, ups=0.09, wpb=64743, bsz=128, num_updates=15484, lr=9.98841e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=178749
2021-06-20 20:18:05 | INFO | train_inner | epoch 006: 565 / 3002 loss=2.374, ppl=5.18, wps=5880.7, ups=0.09, wpb=64764, bsz=128, num_updates=15485, lr=9.98841e-05, gnorm=2.058, loss_scale=16, train_wall=11, gb_free=2.8, wall=178760
2021-06-20 20:18:17 | INFO | train_inner | epoch 006: 566 / 3002 loss=2.55, ppl=5.86, wps=5753, ups=0.09, wpb=64834, bsz=128, num_updates=15486, lr=9.98841e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=178771
2021-06-20 20:18:28 | INFO | train_inner | epoch 006: 567 / 3002 loss=2.538, ppl=5.81, wps=5767.2, ups=0.09, wpb=64768, bsz=128, num_updates=15487, lr=9.98841e-05, gnorm=2.025, loss_scale=16, train_wall=11, gb_free=2.8, wall=178782
2021-06-20 20:18:39 | INFO | train_inner | epoch 006: 568 / 3002 loss=2.564, ppl=5.91, wps=5871.8, ups=0.09, wpb=64817, bsz=128, num_updates=15488, lr=9.98841e-05, gnorm=1.962, loss_scale=16, train_wall=11, gb_free=2.8, wall=178793
2021-06-20 20:18:50 | INFO | train_inner | epoch 006: 569 / 3002 loss=2.567, ppl=5.92, wps=5865.4, ups=0.09, wpb=64819, bsz=128, num_updates=15489, lr=9.98841e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=178804
2021-06-20 20:19:01 | INFO | train_inner | epoch 006: 570 / 3002 loss=2.291, ppl=4.89, wps=5895.9, ups=0.09, wpb=64893, bsz=128, num_updates=15490, lr=9.98841e-05, gnorm=1.88, loss_scale=16, train_wall=11, gb_free=2.8, wall=178815
2021-06-20 20:19:12 | INFO | train_inner | epoch 006: 571 / 3002 loss=2.432, ppl=5.4, wps=5840.9, ups=0.09, wpb=64815, bsz=128, num_updates=15491, lr=9.98841e-05, gnorm=1.879, loss_scale=16, train_wall=11, gb_free=2.8, wall=178827
2021-06-20 20:19:23 | INFO | train_inner | epoch 006: 572 / 3002 loss=2.528, ppl=5.77, wps=5861.1, ups=0.09, wpb=64774, bsz=128, num_updates=15492, lr=9.98841e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=178838
2021-06-20 20:19:34 | INFO | train_inner | epoch 006: 573 / 3002 loss=2.627, ppl=6.18, wps=5949.2, ups=0.09, wpb=64820, bsz=128, num_updates=15493, lr=9.9884e-05, gnorm=2.023, loss_scale=16, train_wall=10, gb_free=2.8, wall=178848
2021-06-20 20:19:45 | INFO | train_inner | epoch 006: 574 / 3002 loss=2.332, ppl=5.03, wps=5828.4, ups=0.09, wpb=64881, bsz=128, num_updates=15494, lr=9.9884e-05, gnorm=1.838, loss_scale=16, train_wall=11, gb_free=2.8, wall=178860
2021-06-20 20:19:56 | INFO | train_inner | epoch 006: 575 / 3002 loss=2.642, ppl=6.24, wps=5792.7, ups=0.09, wpb=64744, bsz=128, num_updates=15495, lr=9.9884e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=178871
2021-06-20 20:20:07 | INFO | train_inner | epoch 006: 576 / 3002 loss=2.388, ppl=5.23, wps=5881.5, ups=0.09, wpb=64795, bsz=128, num_updates=15496, lr=9.9884e-05, gnorm=1.978, loss_scale=16, train_wall=11, gb_free=2.8, wall=178882
2021-06-20 20:20:19 | INFO | train_inner | epoch 006: 577 / 3002 loss=2.442, ppl=5.44, wps=5803.5, ups=0.09, wpb=64840, bsz=128, num_updates=15497, lr=9.9884e-05, gnorm=2.012, loss_scale=16, train_wall=11, gb_free=2.8, wall=178893
2021-06-20 20:20:30 | INFO | train_inner | epoch 006: 578 / 3002 loss=2.563, ppl=5.91, wps=5849.9, ups=0.09, wpb=64882, bsz=128, num_updates=15498, lr=9.9884e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=178904
2021-06-20 20:20:41 | INFO | train_inner | epoch 006: 579 / 3002 loss=2.455, ppl=5.48, wps=5811.7, ups=0.09, wpb=64799, bsz=128, num_updates=15499, lr=9.9884e-05, gnorm=2.038, loss_scale=16, train_wall=11, gb_free=2.8, wall=178915
2021-06-20 20:20:52 | INFO | train_inner | epoch 006: 580 / 3002 loss=2.346, ppl=5.08, wps=5719.4, ups=0.09, wpb=64800, bsz=128, num_updates=15500, lr=9.9884e-05, gnorm=2.018, loss_scale=16, train_wall=11, gb_free=2.8, wall=178927
2021-06-20 20:21:03 | INFO | train_inner | epoch 006: 581 / 3002 loss=2.501, ppl=5.66, wps=5801.5, ups=0.09, wpb=64779, bsz=128, num_updates=15501, lr=9.9884e-05, gnorm=1.86, loss_scale=16, train_wall=11, gb_free=2.8, wall=178938
2021-06-20 20:21:15 | INFO | train_inner | epoch 006: 582 / 3002 loss=2.436, ppl=5.41, wps=5752.4, ups=0.09, wpb=64851, bsz=128, num_updates=15502, lr=9.9884e-05, gnorm=1.874, loss_scale=16, train_wall=11, gb_free=2.8, wall=178949
2021-06-20 20:21:26 | INFO | train_inner | epoch 006: 583 / 3002 loss=2.481, ppl=5.58, wps=5806.9, ups=0.09, wpb=64826, bsz=128, num_updates=15503, lr=9.9884e-05, gnorm=1.881, loss_scale=16, train_wall=11, gb_free=2.8, wall=178960
2021-06-20 20:21:37 | INFO | train_inner | epoch 006: 584 / 3002 loss=2.386, ppl=5.23, wps=5943.7, ups=0.09, wpb=64814, bsz=128, num_updates=15504, lr=9.9884e-05, gnorm=2.017, loss_scale=16, train_wall=10, gb_free=2.8, wall=178971
2021-06-20 20:21:48 | INFO | train_inner | epoch 006: 585 / 3002 loss=2.434, ppl=5.4, wps=5749.4, ups=0.09, wpb=64828, bsz=128, num_updates=15505, lr=9.9884e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=178982
2021-06-20 20:21:59 | INFO | train_inner | epoch 006: 586 / 3002 loss=2.518, ppl=5.73, wps=5791.4, ups=0.09, wpb=64737, bsz=128, num_updates=15506, lr=9.98839e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=178994
2021-06-20 20:22:10 | INFO | train_inner | epoch 006: 587 / 3002 loss=2.301, ppl=4.93, wps=5916.2, ups=0.09, wpb=64845, bsz=128, num_updates=15507, lr=9.98839e-05, gnorm=2.037, loss_scale=16, train_wall=11, gb_free=2.8, wall=179004
2021-06-20 20:22:21 | INFO | train_inner | epoch 006: 588 / 3002 loss=2.412, ppl=5.32, wps=5815.3, ups=0.09, wpb=64770, bsz=128, num_updates=15508, lr=9.98839e-05, gnorm=2.017, loss_scale=16, train_wall=11, gb_free=2.8, wall=179016
2021-06-20 20:22:32 | INFO | train_inner | epoch 006: 589 / 3002 loss=2.514, ppl=5.71, wps=5786.4, ups=0.09, wpb=64768, bsz=128, num_updates=15509, lr=9.98839e-05, gnorm=2.08, loss_scale=16, train_wall=11, gb_free=2.8, wall=179027
2021-06-20 20:22:43 | INFO | train_inner | epoch 006: 590 / 3002 loss=2.448, ppl=5.46, wps=5983.7, ups=0.09, wpb=64905, bsz=128, num_updates=15510, lr=9.98839e-05, gnorm=2.041, loss_scale=16, train_wall=10, gb_free=2.8, wall=179038
2021-06-20 20:22:54 | INFO | train_inner | epoch 006: 591 / 3002 loss=2.589, ppl=6.02, wps=5807.3, ups=0.09, wpb=64755, bsz=128, num_updates=15511, lr=9.98839e-05, gnorm=2.007, loss_scale=16, train_wall=11, gb_free=2.8, wall=179049
2021-06-20 20:23:05 | INFO | train_inner | epoch 006: 592 / 3002 loss=2.379, ppl=5.2, wps=5925.8, ups=0.09, wpb=64826, bsz=128, num_updates=15512, lr=9.98839e-05, gnorm=1.942, loss_scale=16, train_wall=10, gb_free=2.8, wall=179060
2021-06-20 20:23:16 | INFO | train_inner | epoch 006: 593 / 3002 loss=2.41, ppl=5.32, wps=5853.2, ups=0.09, wpb=64789, bsz=128, num_updates=15513, lr=9.98839e-05, gnorm=1.847, loss_scale=16, train_wall=11, gb_free=2.8, wall=179071
2021-06-20 20:23:28 | INFO | train_inner | epoch 006: 594 / 3002 loss=2.481, ppl=5.58, wps=5830.7, ups=0.09, wpb=64809, bsz=128, num_updates=15514, lr=9.98839e-05, gnorm=2.236, loss_scale=16, train_wall=11, gb_free=2.8, wall=179082
2021-06-20 20:23:39 | INFO | train_inner | epoch 006: 595 / 3002 loss=2.526, ppl=5.76, wps=5826.5, ups=0.09, wpb=64829, bsz=128, num_updates=15515, lr=9.98839e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=179093
2021-06-20 20:23:50 | INFO | train_inner | epoch 006: 596 / 3002 loss=2.383, ppl=5.22, wps=5973.7, ups=0.09, wpb=64883, bsz=128, num_updates=15516, lr=9.98839e-05, gnorm=1.905, loss_scale=16, train_wall=10, gb_free=2.8, wall=179104
2021-06-20 20:24:01 | INFO | train_inner | epoch 006: 597 / 3002 loss=2.484, ppl=5.59, wps=5872, ups=0.09, wpb=64883, bsz=128, num_updates=15517, lr=9.98839e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=179115
2021-06-20 20:24:12 | INFO | train_inner | epoch 006: 598 / 3002 loss=2.49, ppl=5.62, wps=5793.2, ups=0.09, wpb=64824, bsz=128, num_updates=15518, lr=9.98838e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=179126
2021-06-20 20:24:23 | INFO | train_inner | epoch 006: 599 / 3002 loss=2.438, ppl=5.42, wps=5869.3, ups=0.09, wpb=64791, bsz=128, num_updates=15519, lr=9.98838e-05, gnorm=1.996, loss_scale=16, train_wall=11, gb_free=2.8, wall=179137
2021-06-20 20:24:34 | INFO | train_inner | epoch 006: 600 / 3002 loss=2.552, ppl=5.87, wps=5903.3, ups=0.09, wpb=64781, bsz=128, num_updates=15520, lr=9.98838e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=179148
2021-06-20 20:24:45 | INFO | train_inner | epoch 006: 601 / 3002 loss=2.413, ppl=5.33, wps=5781.3, ups=0.09, wpb=64768, bsz=128, num_updates=15521, lr=9.98838e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=179159
2021-06-20 20:24:56 | INFO | train_inner | epoch 006: 602 / 3002 loss=2.469, ppl=5.54, wps=5858.5, ups=0.09, wpb=64872, bsz=128, num_updates=15522, lr=9.98838e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=179170
2021-06-20 20:25:07 | INFO | train_inner | epoch 006: 603 / 3002 loss=2.526, ppl=5.76, wps=5807, ups=0.09, wpb=64742, bsz=128, num_updates=15523, lr=9.98838e-05, gnorm=3.059, loss_scale=16, train_wall=11, gb_free=2.8, wall=179182
2021-06-20 20:25:18 | INFO | train_inner | epoch 006: 604 / 3002 loss=2.564, ppl=5.91, wps=5809.2, ups=0.09, wpb=64816, bsz=128, num_updates=15524, lr=9.98838e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=179193
2021-06-20 20:25:30 | INFO | train_inner | epoch 006: 605 / 3002 loss=2.558, ppl=5.89, wps=5836.8, ups=0.09, wpb=64840, bsz=128, num_updates=15525, lr=9.98838e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=179204
2021-06-20 20:25:41 | INFO | train_inner | epoch 006: 606 / 3002 loss=2.605, ppl=6.08, wps=5795.6, ups=0.09, wpb=64878, bsz=128, num_updates=15526, lr=9.98838e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=179215
2021-06-20 20:25:52 | INFO | train_inner | epoch 006: 607 / 3002 loss=2.345, ppl=5.08, wps=5736.3, ups=0.09, wpb=64837, bsz=128, num_updates=15527, lr=9.98838e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=179226
2021-06-20 20:26:03 | INFO | train_inner | epoch 006: 608 / 3002 loss=2.397, ppl=5.27, wps=5846.7, ups=0.09, wpb=64826, bsz=128, num_updates=15528, lr=9.98838e-05, gnorm=1.934, loss_scale=16, train_wall=11, gb_free=2.8, wall=179237
2021-06-20 20:26:14 | INFO | train_inner | epoch 006: 609 / 3002 loss=2.47, ppl=5.54, wps=5900.4, ups=0.09, wpb=64855, bsz=128, num_updates=15529, lr=9.98838e-05, gnorm=1.883, loss_scale=16, train_wall=11, gb_free=2.8, wall=179248
2021-06-20 20:26:25 | INFO | train_inner | epoch 006: 610 / 3002 loss=2.449, ppl=5.46, wps=5805.2, ups=0.09, wpb=64815, bsz=128, num_updates=15530, lr=9.98838e-05, gnorm=2.062, loss_scale=16, train_wall=11, gb_free=2.8, wall=179260
2021-06-20 20:26:36 | INFO | train_inner | epoch 006: 611 / 3002 loss=2.409, ppl=5.31, wps=5833.3, ups=0.09, wpb=64884, bsz=128, num_updates=15531, lr=9.98837e-05, gnorm=1.875, loss_scale=16, train_wall=11, gb_free=2.8, wall=179271
2021-06-20 20:26:48 | INFO | train_inner | epoch 006: 612 / 3002 loss=2.454, ppl=5.48, wps=5806.8, ups=0.09, wpb=64964, bsz=128, num_updates=15532, lr=9.98837e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=179282
2021-06-20 20:26:59 | INFO | train_inner | epoch 006: 613 / 3002 loss=2.383, ppl=5.22, wps=5842.1, ups=0.09, wpb=64788, bsz=128, num_updates=15533, lr=9.98837e-05, gnorm=1.831, loss_scale=16, train_wall=11, gb_free=2.8, wall=179293
2021-06-20 20:27:10 | INFO | train_inner | epoch 006: 614 / 3002 loss=2.356, ppl=5.12, wps=5712.6, ups=0.09, wpb=64773, bsz=128, num_updates=15534, lr=9.98837e-05, gnorm=2.068, loss_scale=16, train_wall=11, gb_free=2.8, wall=179304
2021-06-20 20:27:21 | INFO | train_inner | epoch 006: 615 / 3002 loss=2.696, ppl=6.48, wps=5856.5, ups=0.09, wpb=64809, bsz=128, num_updates=15535, lr=9.98837e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=179315
2021-06-20 20:27:32 | INFO | train_inner | epoch 006: 616 / 3002 loss=2.427, ppl=5.38, wps=5832.4, ups=0.09, wpb=64807, bsz=128, num_updates=15536, lr=9.98837e-05, gnorm=2.134, loss_scale=16, train_wall=11, gb_free=2.8, wall=179327
2021-06-20 20:27:43 | INFO | train_inner | epoch 006: 617 / 3002 loss=2.432, ppl=5.4, wps=6014.7, ups=0.09, wpb=64840, bsz=128, num_updates=15537, lr=9.98837e-05, gnorm=2.045, loss_scale=16, train_wall=10, gb_free=2.8, wall=179337
2021-06-20 20:27:54 | INFO | train_inner | epoch 006: 618 / 3002 loss=2.344, ppl=5.08, wps=5862.7, ups=0.09, wpb=64873, bsz=128, num_updates=15538, lr=9.98837e-05, gnorm=1.876, loss_scale=16, train_wall=11, gb_free=2.8, wall=179348
2021-06-20 20:28:05 | INFO | train_inner | epoch 006: 619 / 3002 loss=2.444, ppl=5.44, wps=5873.3, ups=0.09, wpb=64874, bsz=128, num_updates=15539, lr=9.98837e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=179359
2021-06-20 20:28:16 | INFO | train_inner | epoch 006: 620 / 3002 loss=2.52, ppl=5.74, wps=5856.4, ups=0.09, wpb=64850, bsz=128, num_updates=15540, lr=9.98837e-05, gnorm=1.856, loss_scale=16, train_wall=11, gb_free=2.8, wall=179370
2021-06-20 20:28:27 | INFO | train_inner | epoch 006: 621 / 3002 loss=2.619, ppl=6.14, wps=5875.2, ups=0.09, wpb=64768, bsz=128, num_updates=15541, lr=9.98837e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=179382
2021-06-20 20:28:38 | INFO | train_inner | epoch 006: 622 / 3002 loss=2.422, ppl=5.36, wps=5827, ups=0.09, wpb=64813, bsz=128, num_updates=15542, lr=9.98837e-05, gnorm=1.844, loss_scale=16, train_wall=11, gb_free=2.8, wall=179393
2021-06-20 20:28:49 | INFO | train_inner | epoch 006: 623 / 3002 loss=2.447, ppl=5.45, wps=5876.1, ups=0.09, wpb=64835, bsz=128, num_updates=15543, lr=9.98836e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=179404
2021-06-20 20:29:00 | INFO | train_inner | epoch 006: 624 / 3002 loss=2.592, ppl=6.03, wps=5838.2, ups=0.09, wpb=64821, bsz=128, num_updates=15544, lr=9.98836e-05, gnorm=1.875, loss_scale=16, train_wall=11, gb_free=2.8, wall=179415
2021-06-20 20:29:11 | INFO | train_inner | epoch 006: 625 / 3002 loss=2.439, ppl=5.42, wps=5914.1, ups=0.09, wpb=64868, bsz=128, num_updates=15545, lr=9.98836e-05, gnorm=1.92, loss_scale=16, train_wall=10, gb_free=2.8, wall=179426
2021-06-20 20:29:22 | INFO | train_inner | epoch 006: 626 / 3002 loss=2.524, ppl=5.75, wps=5934.9, ups=0.09, wpb=64845, bsz=128, num_updates=15546, lr=9.98836e-05, gnorm=1.949, loss_scale=16, train_wall=10, gb_free=2.8, wall=179437
2021-06-20 20:29:33 | INFO | train_inner | epoch 006: 627 / 3002 loss=2.389, ppl=5.24, wps=5813.5, ups=0.09, wpb=64835, bsz=128, num_updates=15547, lr=9.98836e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=179448
2021-06-20 20:29:44 | INFO | train_inner | epoch 006: 628 / 3002 loss=2.571, ppl=5.94, wps=5885.4, ups=0.09, wpb=64800, bsz=128, num_updates=15548, lr=9.98836e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=179459
2021-06-20 20:29:56 | INFO | train_inner | epoch 006: 629 / 3002 loss=2.406, ppl=5.3, wps=5797.2, ups=0.09, wpb=64817, bsz=128, num_updates=15549, lr=9.98836e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=179470
2021-06-20 20:30:07 | INFO | train_inner | epoch 006: 630 / 3002 loss=2.386, ppl=5.23, wps=5821.9, ups=0.09, wpb=64863, bsz=128, num_updates=15550, lr=9.98836e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=179481
2021-06-20 20:30:18 | INFO | train_inner | epoch 006: 631 / 3002 loss=2.336, ppl=5.05, wps=5939.3, ups=0.09, wpb=64864, bsz=128, num_updates=15551, lr=9.98836e-05, gnorm=1.874, loss_scale=32, train_wall=10, gb_free=2.8, wall=179492
2021-06-20 20:30:29 | INFO | train_inner | epoch 006: 632 / 3002 loss=2.489, ppl=5.61, wps=5885.9, ups=0.09, wpb=64821, bsz=128, num_updates=15552, lr=9.98836e-05, gnorm=1.974, loss_scale=32, train_wall=11, gb_free=2.8, wall=179503
2021-06-20 20:30:40 | INFO | train_inner | epoch 006: 633 / 3002 loss=2.493, ppl=5.63, wps=5895.7, ups=0.09, wpb=64777, bsz=128, num_updates=15553, lr=9.98836e-05, gnorm=1.96, loss_scale=32, train_wall=11, gb_free=2.8, wall=179514
2021-06-20 20:30:51 | INFO | train_inner | epoch 006: 634 / 3002 loss=2.43, ppl=5.39, wps=5894.7, ups=0.09, wpb=64780, bsz=128, num_updates=15554, lr=9.98836e-05, gnorm=1.926, loss_scale=32, train_wall=10, gb_free=2.8, wall=179525
2021-06-20 20:31:02 | INFO | train_inner | epoch 006: 635 / 3002 loss=2.489, ppl=5.61, wps=5940.4, ups=0.09, wpb=64776, bsz=128, num_updates=15555, lr=9.98836e-05, gnorm=1.968, loss_scale=32, train_wall=10, gb_free=2.8, wall=179536
2021-06-20 20:31:13 | INFO | train_inner | epoch 006: 636 / 3002 loss=2.381, ppl=5.21, wps=5841.7, ups=0.09, wpb=64835, bsz=128, num_updates=15556, lr=9.98835e-05, gnorm=1.918, loss_scale=32, train_wall=11, gb_free=2.8, wall=179547
2021-06-20 20:31:24 | INFO | train_inner | epoch 006: 637 / 3002 loss=2.587, ppl=6.01, wps=5874.1, ups=0.09, wpb=64829, bsz=128, num_updates=15557, lr=9.98835e-05, gnorm=1.904, loss_scale=32, train_wall=11, gb_free=2.8, wall=179558
2021-06-20 20:31:35 | INFO | train_inner | epoch 006: 638 / 3002 loss=2.517, ppl=5.72, wps=5815.7, ups=0.09, wpb=64871, bsz=128, num_updates=15558, lr=9.98835e-05, gnorm=1.972, loss_scale=32, train_wall=11, gb_free=2.8, wall=179569
2021-06-20 20:31:46 | INFO | train_inner | epoch 006: 639 / 3002 loss=2.439, ppl=5.42, wps=5854, ups=0.09, wpb=64872, bsz=128, num_updates=15559, lr=9.98835e-05, gnorm=1.95, loss_scale=32, train_wall=11, gb_free=2.8, wall=179580
2021-06-20 20:31:57 | INFO | train_inner | epoch 006: 640 / 3002 loss=2.56, ppl=5.9, wps=5846.1, ups=0.09, wpb=64813, bsz=128, num_updates=15560, lr=9.98835e-05, gnorm=1.917, loss_scale=32, train_wall=11, gb_free=2.8, wall=179591
2021-06-20 20:32:08 | INFO | train_inner | epoch 006: 641 / 3002 loss=2.579, ppl=5.98, wps=5898.6, ups=0.09, wpb=64777, bsz=128, num_updates=15561, lr=9.98835e-05, gnorm=1.869, loss_scale=32, train_wall=11, gb_free=2.8, wall=179602
2021-06-20 20:32:19 | INFO | train_inner | epoch 006: 642 / 3002 loss=2.441, ppl=5.43, wps=5909.4, ups=0.09, wpb=64889, bsz=128, num_updates=15562, lr=9.98835e-05, gnorm=1.849, loss_scale=32, train_wall=11, gb_free=2.8, wall=179613
2021-06-20 20:32:30 | INFO | train_inner | epoch 006: 643 / 3002 loss=2.511, ppl=5.7, wps=5779, ups=0.09, wpb=64836, bsz=128, num_updates=15563, lr=9.98835e-05, gnorm=1.941, loss_scale=32, train_wall=11, gb_free=2.8, wall=179625
2021-06-20 20:32:41 | INFO | train_inner | epoch 006: 644 / 3002 loss=2.461, ppl=5.5, wps=5805.4, ups=0.09, wpb=64884, bsz=128, num_updates=15564, lr=9.98835e-05, gnorm=1.999, loss_scale=32, train_wall=11, gb_free=2.8, wall=179636
2021-06-20 20:32:53 | INFO | train_inner | epoch 006: 645 / 3002 loss=2.403, ppl=5.29, wps=5850.2, ups=0.09, wpb=64840, bsz=128, num_updates=15565, lr=9.98835e-05, gnorm=1.975, loss_scale=32, train_wall=11, gb_free=2.8, wall=179647
2021-06-20 20:33:03 | INFO | train_inner | epoch 006: 646 / 3002 loss=2.451, ppl=5.47, wps=5981.8, ups=0.09, wpb=64955, bsz=128, num_updates=15566, lr=9.98835e-05, gnorm=1.953, loss_scale=32, train_wall=10, gb_free=2.8, wall=179658
2021-06-20 20:33:14 | INFO | train_inner | epoch 006: 647 / 3002 loss=2.537, ppl=5.8, wps=6009.9, ups=0.09, wpb=64827, bsz=128, num_updates=15567, lr=9.98835e-05, gnorm=1.963, loss_scale=32, train_wall=10, gb_free=2.8, wall=179669
2021-06-20 20:33:25 | INFO | train_inner | epoch 006: 648 / 3002 loss=2.577, ppl=5.97, wps=5789.9, ups=0.09, wpb=64844, bsz=128, num_updates=15568, lr=9.98834e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=179680
2021-06-20 20:33:36 | INFO | train_inner | epoch 006: 649 / 3002 loss=2.485, ppl=5.6, wps=5867.7, ups=0.09, wpb=64802, bsz=128, num_updates=15569, lr=9.98834e-05, gnorm=1.994, loss_scale=32, train_wall=11, gb_free=2.8, wall=179691
2021-06-20 20:33:48 | INFO | train_inner | epoch 006: 650 / 3002 loss=2.577, ppl=5.97, wps=5766.6, ups=0.09, wpb=64794, bsz=128, num_updates=15570, lr=9.98834e-05, gnorm=1.909, loss_scale=32, train_wall=11, gb_free=2.8, wall=179702
2021-06-20 20:33:59 | INFO | train_inner | epoch 006: 651 / 3002 loss=2.5, ppl=5.66, wps=5777, ups=0.09, wpb=64874, bsz=128, num_updates=15571, lr=9.98834e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=179713
2021-06-20 20:34:10 | INFO | train_inner | epoch 006: 652 / 3002 loss=2.582, ppl=5.99, wps=5898.7, ups=0.09, wpb=64820, bsz=128, num_updates=15572, lr=9.98834e-05, gnorm=2.03, loss_scale=32, train_wall=11, gb_free=2.8, wall=179724
2021-06-20 20:34:21 | INFO | train_inner | epoch 006: 653 / 3002 loss=2.553, ppl=5.87, wps=5835.5, ups=0.09, wpb=64788, bsz=128, num_updates=15573, lr=9.98834e-05, gnorm=1.898, loss_scale=32, train_wall=11, gb_free=2.8, wall=179735
2021-06-20 20:34:32 | INFO | train_inner | epoch 006: 654 / 3002 loss=2.53, ppl=5.78, wps=5901.5, ups=0.09, wpb=64809, bsz=128, num_updates=15574, lr=9.98834e-05, gnorm=1.917, loss_scale=32, train_wall=11, gb_free=2.8, wall=179746
2021-06-20 20:34:43 | INFO | train_inner | epoch 006: 655 / 3002 loss=2.523, ppl=5.75, wps=5856.4, ups=0.09, wpb=64727, bsz=128, num_updates=15575, lr=9.98834e-05, gnorm=1.912, loss_scale=32, train_wall=11, gb_free=2.8, wall=179757
2021-06-20 20:34:54 | INFO | train_inner | epoch 006: 656 / 3002 loss=2.293, ppl=4.9, wps=5742.2, ups=0.09, wpb=64806, bsz=128, num_updates=15576, lr=9.98834e-05, gnorm=1.874, loss_scale=32, train_wall=11, gb_free=2.8, wall=179769
2021-06-20 20:35:05 | INFO | train_inner | epoch 006: 657 / 3002 loss=2.464, ppl=5.52, wps=5829.7, ups=0.09, wpb=64837, bsz=128, num_updates=15577, lr=9.98834e-05, gnorm=1.95, loss_scale=32, train_wall=11, gb_free=2.8, wall=179780
2021-06-20 20:35:16 | INFO | train_inner | epoch 006: 658 / 3002 loss=2.462, ppl=5.51, wps=5894.7, ups=0.09, wpb=64846, bsz=128, num_updates=15578, lr=9.98834e-05, gnorm=1.933, loss_scale=32, train_wall=11, gb_free=2.8, wall=179791
2021-06-20 20:35:28 | INFO | train_inner | epoch 006: 659 / 3002 loss=2.601, ppl=6.07, wps=5805.4, ups=0.09, wpb=64669, bsz=128, num_updates=15579, lr=9.98834e-05, gnorm=1.908, loss_scale=32, train_wall=11, gb_free=2.8, wall=179802
2021-06-20 20:35:39 | INFO | train_inner | epoch 006: 660 / 3002 loss=2.613, ppl=6.12, wps=5809.8, ups=0.09, wpb=64781, bsz=128, num_updates=15580, lr=9.98834e-05, gnorm=1.868, loss_scale=32, train_wall=11, gb_free=2.8, wall=179813
2021-06-20 20:35:50 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0
2021-06-20 20:36:01 | INFO | train_inner | epoch 006: 662 / 3002 loss=2.477, ppl=5.57, wps=2920.4, ups=0.05, wpb=64779, bsz=128, num_updates=15581, lr=9.98833e-05, gnorm=1.921, loss_scale=16, train_wall=21, gb_free=2.8, wall=179835
2021-06-20 20:36:12 | INFO | train_inner | epoch 006: 663 / 3002 loss=2.409, ppl=5.31, wps=5821.3, ups=0.09, wpb=64829, bsz=128, num_updates=15582, lr=9.98833e-05, gnorm=2.042, loss_scale=16, train_wall=11, gb_free=2.8, wall=179846
2021-06-20 20:36:23 | INFO | train_inner | epoch 006: 664 / 3002 loss=2.508, ppl=5.69, wps=5806.5, ups=0.09, wpb=64840, bsz=128, num_updates=15583, lr=9.98833e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=179858
2021-06-20 20:36:34 | INFO | train_inner | epoch 006: 665 / 3002 loss=2.482, ppl=5.59, wps=5940, ups=0.09, wpb=64849, bsz=128, num_updates=15584, lr=9.98833e-05, gnorm=2.169, loss_scale=16, train_wall=10, gb_free=2.8, wall=179868
2021-06-20 20:36:45 | INFO | train_inner | epoch 006: 666 / 3002 loss=2.487, ppl=5.61, wps=5832, ups=0.09, wpb=64888, bsz=128, num_updates=15585, lr=9.98833e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=179880
2021-06-20 20:36:56 | INFO | train_inner | epoch 006: 667 / 3002 loss=2.479, ppl=5.58, wps=5986.4, ups=0.09, wpb=64854, bsz=128, num_updates=15586, lr=9.98833e-05, gnorm=1.822, loss_scale=16, train_wall=10, gb_free=2.8, wall=179890
2021-06-20 20:37:07 | INFO | train_inner | epoch 006: 668 / 3002 loss=2.515, ppl=5.72, wps=5980, ups=0.09, wpb=64931, bsz=128, num_updates=15587, lr=9.98833e-05, gnorm=2.058, loss_scale=16, train_wall=10, gb_free=2.8, wall=179901
2021-06-20 20:37:18 | INFO | train_inner | epoch 006: 669 / 3002 loss=2.37, ppl=5.17, wps=5932, ups=0.09, wpb=64880, bsz=128, num_updates=15588, lr=9.98833e-05, gnorm=1.885, loss_scale=16, train_wall=10, gb_free=2.8, wall=179912
2021-06-20 20:37:29 | INFO | train_inner | epoch 006: 670 / 3002 loss=2.375, ppl=5.19, wps=5866.3, ups=0.09, wpb=64867, bsz=128, num_updates=15589, lr=9.98833e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=179923
2021-06-20 20:37:40 | INFO | train_inner | epoch 006: 671 / 3002 loss=2.447, ppl=5.45, wps=5848.7, ups=0.09, wpb=64753, bsz=128, num_updates=15590, lr=9.98833e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=179934
2021-06-20 20:37:51 | INFO | train_inner | epoch 006: 672 / 3002 loss=2.373, ppl=5.18, wps=5787.1, ups=0.09, wpb=64727, bsz=128, num_updates=15591, lr=9.98833e-05, gnorm=1.871, loss_scale=16, train_wall=11, gb_free=2.8, wall=179946
2021-06-20 20:38:02 | INFO | train_inner | epoch 006: 673 / 3002 loss=2.632, ppl=6.2, wps=5768.6, ups=0.09, wpb=64837, bsz=128, num_updates=15592, lr=9.98833e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=179957
2021-06-20 20:38:14 | INFO | train_inner | epoch 006: 674 / 3002 loss=2.507, ppl=5.68, wps=5805.4, ups=0.09, wpb=64849, bsz=128, num_updates=15593, lr=9.98832e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=179968
2021-06-20 20:38:25 | INFO | train_inner | epoch 006: 675 / 3002 loss=2.431, ppl=5.39, wps=5842, ups=0.09, wpb=64752, bsz=128, num_updates=15594, lr=9.98832e-05, gnorm=1.892, loss_scale=16, train_wall=11, gb_free=2.8, wall=179979
2021-06-20 20:38:36 | INFO | train_inner | epoch 006: 676 / 3002 loss=2.491, ppl=5.62, wps=5943.7, ups=0.09, wpb=64871, bsz=128, num_updates=15595, lr=9.98832e-05, gnorm=2.038, loss_scale=16, train_wall=10, gb_free=2.8, wall=179990
2021-06-20 20:38:47 | INFO | train_inner | epoch 006: 677 / 3002 loss=2.445, ppl=5.44, wps=5830.7, ups=0.09, wpb=64810, bsz=128, num_updates=15596, lr=9.98832e-05, gnorm=1.852, loss_scale=16, train_wall=11, gb_free=2.8, wall=180001
2021-06-20 20:38:58 | INFO | train_inner | epoch 006: 678 / 3002 loss=2.569, ppl=5.93, wps=5786.1, ups=0.09, wpb=64743, bsz=128, num_updates=15597, lr=9.98832e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=180012
2021-06-20 20:39:09 | INFO | train_inner | epoch 006: 679 / 3002 loss=2.478, ppl=5.57, wps=5866.5, ups=0.09, wpb=64735, bsz=128, num_updates=15598, lr=9.98832e-05, gnorm=1.834, loss_scale=16, train_wall=11, gb_free=2.8, wall=180023
2021-06-20 20:39:20 | INFO | train_inner | epoch 006: 680 / 3002 loss=2.456, ppl=5.49, wps=5880.6, ups=0.09, wpb=64754, bsz=128, num_updates=15599, lr=9.98832e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=180034
2021-06-20 20:39:31 | INFO | train_inner | epoch 006: 681 / 3002 loss=2.432, ppl=5.4, wps=5775.3, ups=0.09, wpb=64842, bsz=128, num_updates=15600, lr=9.98832e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=180046
2021-06-20 20:39:42 | INFO | train_inner | epoch 006: 682 / 3002 loss=2.423, ppl=5.36, wps=5982.6, ups=0.09, wpb=64858, bsz=128, num_updates=15601, lr=9.98832e-05, gnorm=2.023, loss_scale=16, train_wall=10, gb_free=2.8, wall=180056
2021-06-20 20:39:53 | INFO | train_inner | epoch 006: 683 / 3002 loss=2.455, ppl=5.48, wps=5804.2, ups=0.09, wpb=64837, bsz=128, num_updates=15602, lr=9.98832e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=180068
2021-06-20 20:40:04 | INFO | train_inner | epoch 006: 684 / 3002 loss=2.465, ppl=5.52, wps=5846.3, ups=0.09, wpb=64824, bsz=128, num_updates=15603, lr=9.98832e-05, gnorm=1.971, loss_scale=16, train_wall=11, gb_free=2.8, wall=180079
2021-06-20 20:40:15 | INFO | train_inner | epoch 006: 685 / 3002 loss=2.472, ppl=5.55, wps=5828.7, ups=0.09, wpb=64890, bsz=128, num_updates=15604, lr=9.98832e-05, gnorm=1.956, loss_scale=16, train_wall=11, gb_free=2.8, wall=180090
2021-06-20 20:40:27 | INFO | train_inner | epoch 006: 686 / 3002 loss=2.424, ppl=5.37, wps=5803.7, ups=0.09, wpb=64838, bsz=128, num_updates=15605, lr=9.98832e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=180101
2021-06-20 20:40:37 | INFO | train_inner | epoch 006: 687 / 3002 loss=2.567, ppl=5.93, wps=5940.4, ups=0.09, wpb=64819, bsz=128, num_updates=15606, lr=9.98831e-05, gnorm=2.006, loss_scale=16, train_wall=10, gb_free=2.8, wall=180112
2021-06-20 20:40:49 | INFO | train_inner | epoch 006: 688 / 3002 loss=2.552, ppl=5.86, wps=5876.5, ups=0.09, wpb=64804, bsz=128, num_updates=15607, lr=9.98831e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=180123
2021-06-20 20:41:00 | INFO | train_inner | epoch 006: 689 / 3002 loss=2.472, ppl=5.55, wps=5799, ups=0.09, wpb=64848, bsz=128, num_updates=15608, lr=9.98831e-05, gnorm=1.849, loss_scale=16, train_wall=11, gb_free=2.8, wall=180134
2021-06-20 20:41:11 | INFO | train_inner | epoch 006: 690 / 3002 loss=2.444, ppl=5.44, wps=5721.4, ups=0.09, wpb=64762, bsz=128, num_updates=15609, lr=9.98831e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=180145
2021-06-20 20:41:22 | INFO | train_inner | epoch 006: 691 / 3002 loss=2.466, ppl=5.53, wps=5773.4, ups=0.09, wpb=64753, bsz=128, num_updates=15610, lr=9.98831e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=180157
2021-06-20 20:41:33 | INFO | train_inner | epoch 006: 692 / 3002 loss=2.546, ppl=5.84, wps=5881.8, ups=0.09, wpb=64861, bsz=128, num_updates=15611, lr=9.98831e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=180168
2021-06-20 20:41:44 | INFO | train_inner | epoch 006: 693 / 3002 loss=2.533, ppl=5.79, wps=5819.2, ups=0.09, wpb=64898, bsz=128, num_updates=15612, lr=9.98831e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=180179
2021-06-20 20:41:56 | INFO | train_inner | epoch 006: 694 / 3002 loss=2.482, ppl=5.59, wps=5767.2, ups=0.09, wpb=64807, bsz=128, num_updates=15613, lr=9.98831e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=180190
2021-06-20 20:42:07 | INFO | train_inner | epoch 006: 695 / 3002 loss=2.449, ppl=5.46, wps=5875.4, ups=0.09, wpb=64905, bsz=128, num_updates=15614, lr=9.98831e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=180201
2021-06-20 20:42:18 | INFO | train_inner | epoch 006: 696 / 3002 loss=2.596, ppl=6.05, wps=5784, ups=0.09, wpb=64804, bsz=128, num_updates=15615, lr=9.98831e-05, gnorm=2.094, loss_scale=16, train_wall=11, gb_free=2.8, wall=180212
2021-06-20 20:42:29 | INFO | train_inner | epoch 006: 697 / 3002 loss=2.526, ppl=5.76, wps=5937.6, ups=0.09, wpb=64836, bsz=128, num_updates=15616, lr=9.98831e-05, gnorm=2.05, loss_scale=16, train_wall=10, gb_free=2.8, wall=180223
2021-06-20 20:42:40 | INFO | train_inner | epoch 006: 698 / 3002 loss=2.345, ppl=5.08, wps=5781.1, ups=0.09, wpb=64763, bsz=128, num_updates=15617, lr=9.98831e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=180234
2021-06-20 20:42:51 | INFO | train_inner | epoch 006: 699 / 3002 loss=2.475, ppl=5.56, wps=5845.4, ups=0.09, wpb=64793, bsz=128, num_updates=15618, lr=9.9883e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=180245
2021-06-20 20:43:02 | INFO | train_inner | epoch 006: 700 / 3002 loss=2.554, ppl=5.87, wps=5883.7, ups=0.09, wpb=64849, bsz=128, num_updates=15619, lr=9.9883e-05, gnorm=2.051, loss_scale=16, train_wall=11, gb_free=2.8, wall=180256
2021-06-20 20:43:13 | INFO | train_inner | epoch 006: 701 / 3002 loss=2.463, ppl=5.51, wps=5886, ups=0.09, wpb=64870, bsz=128, num_updates=15620, lr=9.9883e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=180267
2021-06-20 20:43:24 | INFO | train_inner | epoch 006: 702 / 3002 loss=2.498, ppl=5.65, wps=5789.4, ups=0.09, wpb=64858, bsz=128, num_updates=15621, lr=9.9883e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=180279
2021-06-20 20:43:36 | INFO | train_inner | epoch 006: 703 / 3002 loss=2.514, ppl=5.71, wps=5704.2, ups=0.09, wpb=64796, bsz=128, num_updates=15622, lr=9.9883e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=180290
2021-06-20 20:43:47 | INFO | train_inner | epoch 006: 704 / 3002 loss=2.383, ppl=5.21, wps=5814.7, ups=0.09, wpb=64827, bsz=128, num_updates=15623, lr=9.9883e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=180301
2021-06-20 20:43:58 | INFO | train_inner | epoch 006: 705 / 3002 loss=2.736, ppl=6.66, wps=5812.9, ups=0.09, wpb=64739, bsz=128, num_updates=15624, lr=9.9883e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=180312
2021-06-20 20:44:09 | INFO | train_inner | epoch 006: 706 / 3002 loss=2.442, ppl=5.44, wps=5945.6, ups=0.09, wpb=64887, bsz=128, num_updates=15625, lr=9.9883e-05, gnorm=1.975, loss_scale=16, train_wall=10, gb_free=2.8, wall=180323
2021-06-20 20:44:20 | INFO | train_inner | epoch 006: 707 / 3002 loss=2.456, ppl=5.49, wps=5885.8, ups=0.09, wpb=64863, bsz=128, num_updates=15626, lr=9.9883e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=180334
2021-06-20 20:44:31 | INFO | train_inner | epoch 006: 708 / 3002 loss=2.501, ppl=5.66, wps=5856.3, ups=0.09, wpb=64803, bsz=128, num_updates=15627, lr=9.9883e-05, gnorm=2.023, loss_scale=16, train_wall=11, gb_free=2.8, wall=180345
2021-06-20 20:44:42 | INFO | train_inner | epoch 006: 709 / 3002 loss=2.519, ppl=5.73, wps=5868.4, ups=0.09, wpb=64881, bsz=128, num_updates=15628, lr=9.9883e-05, gnorm=1.877, loss_scale=16, train_wall=11, gb_free=2.8, wall=180356
2021-06-20 20:44:53 | INFO | train_inner | epoch 006: 710 / 3002 loss=2.597, ppl=6.05, wps=5944.8, ups=0.09, wpb=64875, bsz=128, num_updates=15629, lr=9.9883e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=180367
2021-06-20 20:45:04 | INFO | train_inner | epoch 006: 711 / 3002 loss=2.586, ppl=6.01, wps=5806.8, ups=0.09, wpb=64869, bsz=128, num_updates=15630, lr=9.9883e-05, gnorm=1.952, loss_scale=16, train_wall=11, gb_free=2.8, wall=180378
2021-06-20 20:45:15 | INFO | train_inner | epoch 006: 712 / 3002 loss=2.57, ppl=5.94, wps=5896, ups=0.09, wpb=64875, bsz=128, num_updates=15631, lr=9.98829e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=180389
2021-06-20 20:45:26 | INFO | train_inner | epoch 006: 713 / 3002 loss=2.555, ppl=5.88, wps=5886.5, ups=0.09, wpb=64809, bsz=128, num_updates=15632, lr=9.98829e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=180401
2021-06-20 20:45:37 | INFO | train_inner | epoch 006: 714 / 3002 loss=2.422, ppl=5.36, wps=5900, ups=0.09, wpb=64819, bsz=128, num_updates=15633, lr=9.98829e-05, gnorm=1.848, loss_scale=16, train_wall=11, gb_free=2.8, wall=180411
2021-06-20 20:45:48 | INFO | train_inner | epoch 006: 715 / 3002 loss=2.554, ppl=5.87, wps=5922.4, ups=0.09, wpb=64738, bsz=128, num_updates=15634, lr=9.98829e-05, gnorm=1.856, loss_scale=16, train_wall=10, gb_free=2.8, wall=180422
2021-06-20 20:45:59 | INFO | train_inner | epoch 006: 716 / 3002 loss=2.58, ppl=5.98, wps=5775.8, ups=0.09, wpb=64840, bsz=128, num_updates=15635, lr=9.98829e-05, gnorm=1.964, loss_scale=16, train_wall=11, gb_free=2.8, wall=180434
2021-06-20 20:46:11 | INFO | train_inner | epoch 006: 717 / 3002 loss=2.524, ppl=5.75, wps=5769.5, ups=0.09, wpb=64864, bsz=128, num_updates=15636, lr=9.98829e-05, gnorm=1.981, loss_scale=16, train_wall=11, gb_free=2.8, wall=180445
2021-06-20 20:46:22 | INFO | train_inner | epoch 006: 718 / 3002 loss=2.537, ppl=5.8, wps=5846, ups=0.09, wpb=64832, bsz=128, num_updates=15637, lr=9.98829e-05, gnorm=1.93, loss_scale=16, train_wall=11, gb_free=2.8, wall=180456
2021-06-20 20:46:33 | INFO | train_inner | epoch 006: 719 / 3002 loss=2.458, ppl=5.5, wps=5885.2, ups=0.09, wpb=64815, bsz=128, num_updates=15638, lr=9.98829e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=180467
2021-06-20 20:46:44 | INFO | train_inner | epoch 006: 720 / 3002 loss=2.379, ppl=5.2, wps=5812.2, ups=0.09, wpb=64864, bsz=128, num_updates=15639, lr=9.98829e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=180478
2021-06-20 20:46:55 | INFO | train_inner | epoch 006: 721 / 3002 loss=2.348, ppl=5.09, wps=5830.6, ups=0.09, wpb=64828, bsz=128, num_updates=15640, lr=9.98829e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=180489
2021-06-20 20:47:06 | INFO | train_inner | epoch 006: 722 / 3002 loss=2.336, ppl=5.05, wps=5966.1, ups=0.09, wpb=64813, bsz=128, num_updates=15641, lr=9.98829e-05, gnorm=1.885, loss_scale=16, train_wall=10, gb_free=2.8, wall=180500
2021-06-20 20:47:17 | INFO | train_inner | epoch 006: 723 / 3002 loss=2.45, ppl=5.47, wps=5749.7, ups=0.09, wpb=64823, bsz=128, num_updates=15642, lr=9.98829e-05, gnorm=2.13, loss_scale=16, train_wall=11, gb_free=2.8, wall=180511
2021-06-20 20:47:28 | INFO | train_inner | epoch 006: 724 / 3002 loss=2.698, ppl=6.49, wps=5847.6, ups=0.09, wpb=64846, bsz=128, num_updates=15643, lr=9.98828e-05, gnorm=2.078, loss_scale=16, train_wall=11, gb_free=2.8, wall=180522
2021-06-20 20:47:39 | INFO | train_inner | epoch 006: 725 / 3002 loss=2.415, ppl=5.33, wps=5863.5, ups=0.09, wpb=64852, bsz=128, num_updates=15644, lr=9.98828e-05, gnorm=2.078, loss_scale=16, train_wall=11, gb_free=2.8, wall=180534
2021-06-20 20:47:50 | INFO | train_inner | epoch 006: 726 / 3002 loss=2.505, ppl=5.68, wps=5837, ups=0.09, wpb=64859, bsz=128, num_updates=15645, lr=9.98828e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=180545
2021-06-20 20:48:02 | INFO | train_inner | epoch 006: 727 / 3002 loss=2.635, ppl=6.21, wps=5780.2, ups=0.09, wpb=64853, bsz=128, num_updates=15646, lr=9.98828e-05, gnorm=1.924, loss_scale=16, train_wall=11, gb_free=2.8, wall=180556
2021-06-20 20:48:13 | INFO | train_inner | epoch 006: 728 / 3002 loss=2.558, ppl=5.89, wps=5797.6, ups=0.09, wpb=64805, bsz=128, num_updates=15647, lr=9.98828e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=180567
2021-06-20 20:48:24 | INFO | train_inner | epoch 006: 729 / 3002 loss=2.316, ppl=4.98, wps=5871.4, ups=0.09, wpb=64856, bsz=128, num_updates=15648, lr=9.98828e-05, gnorm=1.936, loss_scale=16, train_wall=11, gb_free=2.8, wall=180578
2021-06-20 20:48:35 | INFO | train_inner | epoch 006: 730 / 3002 loss=2.476, ppl=5.57, wps=5851.9, ups=0.09, wpb=64884, bsz=128, num_updates=15649, lr=9.98828e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=180589
2021-06-20 20:48:46 | INFO | train_inner | epoch 006: 731 / 3002 loss=2.353, ppl=5.11, wps=5892.3, ups=0.09, wpb=64934, bsz=128, num_updates=15650, lr=9.98828e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=180600
2021-06-20 20:48:57 | INFO | train_inner | epoch 006: 732 / 3002 loss=2.509, ppl=5.69, wps=5836.3, ups=0.09, wpb=64820, bsz=128, num_updates=15651, lr=9.98828e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=180611
2021-06-20 20:49:08 | INFO | train_inner | epoch 006: 733 / 3002 loss=2.562, ppl=5.9, wps=5845.8, ups=0.09, wpb=64826, bsz=128, num_updates=15652, lr=9.98828e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=180622
2021-06-20 20:49:19 | INFO | train_inner | epoch 006: 734 / 3002 loss=2.442, ppl=5.43, wps=5850.4, ups=0.09, wpb=64834, bsz=128, num_updates=15653, lr=9.98828e-05, gnorm=1.915, loss_scale=16, train_wall=11, gb_free=2.8, wall=180634
2021-06-20 20:49:30 | INFO | train_inner | epoch 006: 735 / 3002 loss=2.483, ppl=5.59, wps=5964.2, ups=0.09, wpb=64817, bsz=128, num_updates=15654, lr=9.98828e-05, gnorm=1.935, loss_scale=16, train_wall=10, gb_free=2.8, wall=180644
2021-06-20 20:49:41 | INFO | train_inner | epoch 006: 736 / 3002 loss=2.527, ppl=5.77, wps=5854.3, ups=0.09, wpb=64828, bsz=128, num_updates=15655, lr=9.98828e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=180655
2021-06-20 20:49:52 | INFO | train_inner | epoch 006: 737 / 3002 loss=2.368, ppl=5.16, wps=5803.2, ups=0.09, wpb=64811, bsz=128, num_updates=15656, lr=9.98827e-05, gnorm=1.945, loss_scale=16, train_wall=11, gb_free=2.8, wall=180667
2021-06-20 20:50:03 | INFO | train_inner | epoch 006: 738 / 3002 loss=2.545, ppl=5.83, wps=5802.9, ups=0.09, wpb=64782, bsz=128, num_updates=15657, lr=9.98827e-05, gnorm=2.222, loss_scale=16, train_wall=11, gb_free=2.8, wall=180678
2021-06-20 20:50:15 | INFO | train_inner | epoch 006: 739 / 3002 loss=2.344, ppl=5.08, wps=5835, ups=0.09, wpb=64816, bsz=128, num_updates=15658, lr=9.98827e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=180689
2021-06-20 20:50:26 | INFO | train_inner | epoch 006: 740 / 3002 loss=2.577, ppl=5.97, wps=5846.4, ups=0.09, wpb=64823, bsz=128, num_updates=15659, lr=9.98827e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=180700
2021-06-20 20:50:37 | INFO | train_inner | epoch 006: 741 / 3002 loss=2.517, ppl=5.73, wps=5728.9, ups=0.09, wpb=64790, bsz=128, num_updates=15660, lr=9.98827e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=180711
2021-06-20 20:50:48 | INFO | train_inner | epoch 006: 742 / 3002 loss=2.434, ppl=5.4, wps=5774.8, ups=0.09, wpb=64811, bsz=128, num_updates=15661, lr=9.98827e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=180723
2021-06-20 20:50:59 | INFO | train_inner | epoch 006: 743 / 3002 loss=2.38, ppl=5.2, wps=5806.5, ups=0.09, wpb=64853, bsz=128, num_updates=15662, lr=9.98827e-05, gnorm=1.935, loss_scale=16, train_wall=11, gb_free=2.8, wall=180734
2021-06-20 20:51:10 | INFO | train_inner | epoch 006: 744 / 3002 loss=2.566, ppl=5.92, wps=5897.3, ups=0.09, wpb=64864, bsz=128, num_updates=15663, lr=9.98827e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=180745
2021-06-20 20:51:21 | INFO | train_inner | epoch 006: 745 / 3002 loss=2.33, ppl=5.03, wps=5816.3, ups=0.09, wpb=64839, bsz=128, num_updates=15664, lr=9.98827e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=180756
2021-06-20 20:51:33 | INFO | train_inner | epoch 006: 746 / 3002 loss=2.532, ppl=5.79, wps=5766.1, ups=0.09, wpb=64812, bsz=128, num_updates=15665, lr=9.98827e-05, gnorm=1.954, loss_scale=16, train_wall=11, gb_free=2.8, wall=180767
2021-06-20 20:51:44 | INFO | train_inner | epoch 006: 747 / 3002 loss=2.462, ppl=5.51, wps=5789.8, ups=0.09, wpb=64800, bsz=128, num_updates=15666, lr=9.98827e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=180778
2021-06-20 20:51:55 | INFO | train_inner | epoch 006: 748 / 3002 loss=2.624, ppl=6.16, wps=5807.6, ups=0.09, wpb=64758, bsz=128, num_updates=15667, lr=9.98827e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=180789
2021-06-20 20:52:06 | INFO | train_inner | epoch 006: 749 / 3002 loss=2.601, ppl=6.07, wps=5879.1, ups=0.09, wpb=64850, bsz=128, num_updates=15668, lr=9.98826e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=180800
2021-06-20 20:52:17 | INFO | train_inner | epoch 006: 750 / 3002 loss=2.507, ppl=5.69, wps=5809.1, ups=0.09, wpb=64730, bsz=128, num_updates=15669, lr=9.98826e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=180812
2021-06-20 20:52:28 | INFO | train_inner | epoch 006: 751 / 3002 loss=2.469, ppl=5.54, wps=5951.2, ups=0.09, wpb=64867, bsz=128, num_updates=15670, lr=9.98826e-05, gnorm=1.912, loss_scale=16, train_wall=10, gb_free=2.8, wall=180822
2021-06-20 20:52:39 | INFO | train_inner | epoch 006: 752 / 3002 loss=2.375, ppl=5.19, wps=5750.5, ups=0.09, wpb=64834, bsz=128, num_updates=15671, lr=9.98826e-05, gnorm=1.832, loss_scale=16, train_wall=11, gb_free=2.8, wall=180834
2021-06-20 20:52:51 | INFO | train_inner | epoch 006: 753 / 3002 loss=2.5, ppl=5.66, wps=5835.7, ups=0.09, wpb=64827, bsz=128, num_updates=15672, lr=9.98826e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=180845
2021-06-20 20:53:02 | INFO | train_inner | epoch 006: 754 / 3002 loss=2.471, ppl=5.54, wps=5804.9, ups=0.09, wpb=64831, bsz=128, num_updates=15673, lr=9.98826e-05, gnorm=2.008, loss_scale=16, train_wall=11, gb_free=2.8, wall=180856
2021-06-20 20:53:13 | INFO | train_inner | epoch 006: 755 / 3002 loss=2.506, ppl=5.68, wps=5924.8, ups=0.09, wpb=64843, bsz=128, num_updates=15674, lr=9.98826e-05, gnorm=1.838, loss_scale=16, train_wall=11, gb_free=2.8, wall=180867
2021-06-20 20:53:24 | INFO | train_inner | epoch 006: 756 / 3002 loss=2.551, ppl=5.86, wps=5800.8, ups=0.09, wpb=64835, bsz=128, num_updates=15675, lr=9.98826e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=180878
2021-06-20 20:53:35 | INFO | train_inner | epoch 006: 757 / 3002 loss=2.36, ppl=5.13, wps=5924.9, ups=0.09, wpb=64812, bsz=128, num_updates=15676, lr=9.98826e-05, gnorm=1.851, loss_scale=16, train_wall=10, gb_free=2.8, wall=180889
2021-06-20 20:53:46 | INFO | train_inner | epoch 006: 758 / 3002 loss=2.393, ppl=5.25, wps=5785.9, ups=0.09, wpb=64854, bsz=128, num_updates=15677, lr=9.98826e-05, gnorm=1.816, loss_scale=16, train_wall=11, gb_free=2.8, wall=180900
2021-06-20 20:53:57 | INFO | train_inner | epoch 006: 759 / 3002 loss=2.614, ppl=6.12, wps=5980.8, ups=0.09, wpb=64840, bsz=128, num_updates=15678, lr=9.98826e-05, gnorm=2.029, loss_scale=16, train_wall=10, gb_free=2.8, wall=180911
2021-06-20 20:54:08 | INFO | train_inner | epoch 006: 760 / 3002 loss=2.427, ppl=5.38, wps=5895.4, ups=0.09, wpb=64896, bsz=128, num_updates=15679, lr=9.98826e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=180922
2021-06-20 20:54:19 | INFO | train_inner | epoch 006: 761 / 3002 loss=2.494, ppl=5.64, wps=5825.8, ups=0.09, wpb=64777, bsz=128, num_updates=15680, lr=9.98826e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=180933
2021-06-20 20:54:30 | INFO | train_inner | epoch 006: 762 / 3002 loss=2.449, ppl=5.46, wps=5948.9, ups=0.09, wpb=64736, bsz=128, num_updates=15681, lr=9.98825e-05, gnorm=1.892, loss_scale=16, train_wall=10, gb_free=2.8, wall=180944
2021-06-20 20:54:41 | INFO | train_inner | epoch 006: 763 / 3002 loss=2.495, ppl=5.64, wps=5908.7, ups=0.09, wpb=64787, bsz=128, num_updates=15682, lr=9.98825e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=180955
2021-06-20 20:54:52 | INFO | train_inner | epoch 006: 764 / 3002 loss=2.55, ppl=5.86, wps=5827, ups=0.09, wpb=64910, bsz=128, num_updates=15683, lr=9.98825e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=180966
2021-06-20 20:55:03 | INFO | train_inner | epoch 006: 765 / 3002 loss=2.34, ppl=5.06, wps=5813.5, ups=0.09, wpb=64863, bsz=128, num_updates=15684, lr=9.98825e-05, gnorm=1.977, loss_scale=16, train_wall=11, gb_free=2.8, wall=180977
2021-06-20 20:55:14 | INFO | train_inner | epoch 006: 766 / 3002 loss=2.551, ppl=5.86, wps=5834.4, ups=0.09, wpb=64912, bsz=128, num_updates=15685, lr=9.98825e-05, gnorm=2.741, loss_scale=16, train_wall=11, gb_free=2.8, wall=180989
2021-06-20 20:55:25 | INFO | train_inner | epoch 006: 767 / 3002 loss=2.521, ppl=5.74, wps=5801.2, ups=0.09, wpb=64891, bsz=128, num_updates=15686, lr=9.98825e-05, gnorm=1.969, loss_scale=16, train_wall=11, gb_free=2.8, wall=181000
2021-06-20 20:55:37 | INFO | train_inner | epoch 006: 768 / 3002 loss=2.496, ppl=5.64, wps=5822.7, ups=0.09, wpb=64793, bsz=128, num_updates=15687, lr=9.98825e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=181011
2021-06-20 20:55:48 | INFO | train_inner | epoch 006: 769 / 3002 loss=2.476, ppl=5.56, wps=5735.5, ups=0.09, wpb=64810, bsz=128, num_updates=15688, lr=9.98825e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=181022
2021-06-20 20:55:59 | INFO | train_inner | epoch 006: 770 / 3002 loss=2.6, ppl=6.06, wps=5937.7, ups=0.09, wpb=64835, bsz=128, num_updates=15689, lr=9.98825e-05, gnorm=2.025, loss_scale=16, train_wall=10, gb_free=2.8, wall=181033
2021-06-20 20:56:10 | INFO | train_inner | epoch 006: 771 / 3002 loss=2.526, ppl=5.76, wps=5897, ups=0.09, wpb=64804, bsz=128, num_updates=15690, lr=9.98825e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=181044
2021-06-20 20:56:21 | INFO | train_inner | epoch 006: 772 / 3002 loss=2.438, ppl=5.42, wps=5853.5, ups=0.09, wpb=64896, bsz=128, num_updates=15691, lr=9.98825e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=181055
2021-06-20 20:56:32 | INFO | train_inner | epoch 006: 773 / 3002 loss=2.467, ppl=5.53, wps=5817.9, ups=0.09, wpb=64821, bsz=128, num_updates=15692, lr=9.98825e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=181066
2021-06-20 20:56:43 | INFO | train_inner | epoch 006: 774 / 3002 loss=2.628, ppl=6.18, wps=5780.7, ups=0.09, wpb=64835, bsz=128, num_updates=15693, lr=9.98824e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=181078
2021-06-20 20:56:54 | INFO | train_inner | epoch 006: 775 / 3002 loss=2.416, ppl=5.34, wps=5808.8, ups=0.09, wpb=64755, bsz=128, num_updates=15694, lr=9.98824e-05, gnorm=1.871, loss_scale=16, train_wall=11, gb_free=2.8, wall=181089
2021-06-20 20:57:06 | INFO | train_inner | epoch 006: 776 / 3002 loss=2.664, ppl=6.34, wps=5781.9, ups=0.09, wpb=64745, bsz=128, num_updates=15695, lr=9.98824e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=181100
2021-06-20 20:57:17 | INFO | train_inner | epoch 006: 777 / 3002 loss=2.604, ppl=6.08, wps=5832.4, ups=0.09, wpb=64791, bsz=128, num_updates=15696, lr=9.98824e-05, gnorm=2.026, loss_scale=16, train_wall=11, gb_free=2.8, wall=181111
2021-06-20 20:57:28 | INFO | train_inner | epoch 006: 778 / 3002 loss=2.515, ppl=5.71, wps=5742.3, ups=0.09, wpb=64725, bsz=128, num_updates=15697, lr=9.98824e-05, gnorm=1.909, loss_scale=16, train_wall=11, gb_free=2.8, wall=181122
2021-06-20 20:57:39 | INFO | train_inner | epoch 006: 779 / 3002 loss=2.491, ppl=5.62, wps=5890.4, ups=0.09, wpb=64805, bsz=128, num_updates=15698, lr=9.98824e-05, gnorm=1.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=181133
2021-06-20 20:57:50 | INFO | train_inner | epoch 006: 780 / 3002 loss=2.686, ppl=6.43, wps=5878.9, ups=0.09, wpb=64864, bsz=128, num_updates=15699, lr=9.98824e-05, gnorm=1.923, loss_scale=16, train_wall=11, gb_free=2.8, wall=181144
2021-06-20 20:58:01 | INFO | train_inner | epoch 006: 781 / 3002 loss=2.415, ppl=5.33, wps=5758.5, ups=0.09, wpb=64762, bsz=128, num_updates=15700, lr=9.98824e-05, gnorm=1.884, loss_scale=16, train_wall=11, gb_free=2.8, wall=181156
2021-06-20 20:58:12 | INFO | train_inner | epoch 006: 782 / 3002 loss=2.411, ppl=5.32, wps=5879.1, ups=0.09, wpb=64849, bsz=128, num_updates=15701, lr=9.98824e-05, gnorm=1.985, loss_scale=16, train_wall=11, gb_free=2.8, wall=181167
2021-06-20 20:58:23 | INFO | train_inner | epoch 006: 783 / 3002 loss=2.559, ppl=5.89, wps=5879.7, ups=0.09, wpb=64838, bsz=128, num_updates=15702, lr=9.98824e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=181178
2021-06-20 20:58:34 | INFO | train_inner | epoch 006: 784 / 3002 loss=2.415, ppl=5.33, wps=5854.5, ups=0.09, wpb=64817, bsz=128, num_updates=15703, lr=9.98824e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=181189
2021-06-20 20:58:45 | INFO | train_inner | epoch 006: 785 / 3002 loss=2.589, ppl=6.02, wps=5845.4, ups=0.09, wpb=64830, bsz=128, num_updates=15704, lr=9.98824e-05, gnorm=2.167, loss_scale=16, train_wall=11, gb_free=2.8, wall=181200
2021-06-20 20:58:56 | INFO | train_inner | epoch 006: 786 / 3002 loss=2.361, ppl=5.14, wps=5942.4, ups=0.09, wpb=64863, bsz=128, num_updates=15705, lr=9.98824e-05, gnorm=1.956, loss_scale=16, train_wall=10, gb_free=2.8, wall=181211
2021-06-20 20:59:07 | INFO | train_inner | epoch 006: 787 / 3002 loss=2.495, ppl=5.64, wps=5993.2, ups=0.09, wpb=64826, bsz=128, num_updates=15706, lr=9.98823e-05, gnorm=1.951, loss_scale=16, train_wall=10, gb_free=2.8, wall=181221
2021-06-20 20:59:18 | INFO | train_inner | epoch 006: 788 / 3002 loss=2.47, ppl=5.54, wps=5883.5, ups=0.09, wpb=64881, bsz=128, num_updates=15707, lr=9.98823e-05, gnorm=1.956, loss_scale=16, train_wall=11, gb_free=2.8, wall=181233
2021-06-20 20:59:29 | INFO | train_inner | epoch 006: 789 / 3002 loss=2.501, ppl=5.66, wps=5938.9, ups=0.09, wpb=64873, bsz=128, num_updates=15708, lr=9.98823e-05, gnorm=2.097, loss_scale=32, train_wall=10, gb_free=2.8, wall=181243
2021-06-20 20:59:40 | INFO | train_inner | epoch 006: 790 / 3002 loss=2.519, ppl=5.73, wps=5966.2, ups=0.09, wpb=64792, bsz=128, num_updates=15709, lr=9.98823e-05, gnorm=1.879, loss_scale=32, train_wall=10, gb_free=2.8, wall=181254
2021-06-20 20:59:51 | INFO | train_inner | epoch 006: 791 / 3002 loss=2.29, ppl=4.89, wps=5961.3, ups=0.09, wpb=64878, bsz=128, num_updates=15710, lr=9.98823e-05, gnorm=1.895, loss_scale=32, train_wall=10, gb_free=2.8, wall=181265
2021-06-20 21:00:02 | INFO | train_inner | epoch 006: 792 / 3002 loss=2.615, ppl=6.13, wps=5884.2, ups=0.09, wpb=64758, bsz=128, num_updates=15711, lr=9.98823e-05, gnorm=1.939, loss_scale=32, train_wall=11, gb_free=2.8, wall=181276
2021-06-20 21:00:13 | INFO | train_inner | epoch 006: 793 / 3002 loss=2.412, ppl=5.32, wps=5856.8, ups=0.09, wpb=64859, bsz=128, num_updates=15712, lr=9.98823e-05, gnorm=1.927, loss_scale=32, train_wall=11, gb_free=2.8, wall=181287
2021-06-20 21:00:24 | INFO | train_inner | epoch 006: 794 / 3002 loss=2.467, ppl=5.53, wps=5876.2, ups=0.09, wpb=64874, bsz=128, num_updates=15713, lr=9.98823e-05, gnorm=1.896, loss_scale=32, train_wall=11, gb_free=2.8, wall=181298
2021-06-20 21:00:35 | INFO | train_inner | epoch 006: 795 / 3002 loss=2.409, ppl=5.31, wps=5878.3, ups=0.09, wpb=64779, bsz=128, num_updates=15714, lr=9.98823e-05, gnorm=1.996, loss_scale=32, train_wall=11, gb_free=2.8, wall=181309
2021-06-20 21:00:46 | INFO | train_inner | epoch 006: 796 / 3002 loss=2.325, ppl=5.01, wps=5869.4, ups=0.09, wpb=64882, bsz=128, num_updates=15715, lr=9.98823e-05, gnorm=1.844, loss_scale=32, train_wall=11, gb_free=2.8, wall=181320
2021-06-20 21:00:57 | INFO | train_inner | epoch 006: 797 / 3002 loss=2.437, ppl=5.42, wps=5785.9, ups=0.09, wpb=64880, bsz=128, num_updates=15716, lr=9.98823e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=181332
2021-06-20 21:01:08 | INFO | train_inner | epoch 006: 798 / 3002 loss=2.45, ppl=5.46, wps=5790.2, ups=0.09, wpb=64738, bsz=128, num_updates=15717, lr=9.98823e-05, gnorm=1.883, loss_scale=32, train_wall=11, gb_free=2.8, wall=181343
2021-06-20 21:01:20 | INFO | train_inner | epoch 006: 799 / 3002 loss=2.501, ppl=5.66, wps=5791.9, ups=0.09, wpb=64823, bsz=128, num_updates=15718, lr=9.98822e-05, gnorm=1.954, loss_scale=32, train_wall=11, gb_free=2.8, wall=181354
2021-06-20 21:01:31 | INFO | train_inner | epoch 006: 800 / 3002 loss=2.405, ppl=5.29, wps=5924.1, ups=0.09, wpb=64868, bsz=128, num_updates=15719, lr=9.98822e-05, gnorm=1.89, loss_scale=32, train_wall=10, gb_free=2.8, wall=181365
2021-06-20 21:01:42 | INFO | train_inner | epoch 006: 801 / 3002 loss=2.584, ppl=5.99, wps=5890.8, ups=0.09, wpb=64882, bsz=128, num_updates=15720, lr=9.98822e-05, gnorm=1.981, loss_scale=32, train_wall=11, gb_free=2.8, wall=181376
2021-06-20 21:01:53 | INFO | train_inner | epoch 006: 802 / 3002 loss=2.445, ppl=5.44, wps=5792.6, ups=0.09, wpb=64860, bsz=128, num_updates=15721, lr=9.98822e-05, gnorm=1.985, loss_scale=32, train_wall=11, gb_free=2.8, wall=181387
2021-06-20 21:02:04 | INFO | train_inner | epoch 006: 803 / 3002 loss=2.444, ppl=5.44, wps=5796.1, ups=0.09, wpb=64869, bsz=128, num_updates=15722, lr=9.98822e-05, gnorm=1.868, loss_scale=32, train_wall=11, gb_free=2.8, wall=181398
2021-06-20 21:02:15 | INFO | train_inner | epoch 006: 804 / 3002 loss=2.494, ppl=5.63, wps=5801.3, ups=0.09, wpb=64856, bsz=128, num_updates=15723, lr=9.98822e-05, gnorm=1.928, loss_scale=32, train_wall=11, gb_free=2.8, wall=181409
2021-06-20 21:02:26 | INFO | train_inner | epoch 006: 805 / 3002 loss=2.677, ppl=6.4, wps=5883.7, ups=0.09, wpb=64747, bsz=128, num_updates=15724, lr=9.98822e-05, gnorm=2.025, loss_scale=32, train_wall=11, gb_free=2.8, wall=181420
2021-06-20 21:02:37 | INFO | train_inner | epoch 006: 806 / 3002 loss=2.611, ppl=6.11, wps=5819.6, ups=0.09, wpb=64840, bsz=128, num_updates=15725, lr=9.98822e-05, gnorm=1.939, loss_scale=32, train_wall=11, gb_free=2.8, wall=181432
2021-06-20 21:02:48 | INFO | train_inner | epoch 006: 807 / 3002 loss=2.577, ppl=5.97, wps=5883.9, ups=0.09, wpb=64851, bsz=128, num_updates=15726, lr=9.98822e-05, gnorm=1.912, loss_scale=32, train_wall=11, gb_free=2.8, wall=181443
2021-06-20 21:02:59 | INFO | train_inner | epoch 006: 808 / 3002 loss=2.312, ppl=4.97, wps=5877.3, ups=0.09, wpb=64895, bsz=128, num_updates=15727, lr=9.98822e-05, gnorm=1.973, loss_scale=32, train_wall=11, gb_free=2.8, wall=181454
2021-06-20 21:03:11 | INFO | train_inner | epoch 006: 809 / 3002 loss=2.486, ppl=5.6, wps=5784.6, ups=0.09, wpb=64788, bsz=128, num_updates=15728, lr=9.98822e-05, gnorm=1.877, loss_scale=32, train_wall=11, gb_free=2.8, wall=181465
2021-06-20 21:03:22 | INFO | train_inner | epoch 006: 810 / 3002 loss=2.496, ppl=5.64, wps=5856.8, ups=0.09, wpb=64845, bsz=128, num_updates=15729, lr=9.98822e-05, gnorm=1.858, loss_scale=32, train_wall=11, gb_free=2.8, wall=181476
2021-06-20 21:03:33 | INFO | train_inner | epoch 006: 811 / 3002 loss=2.51, ppl=5.69, wps=5786.1, ups=0.09, wpb=64773, bsz=128, num_updates=15730, lr=9.98822e-05, gnorm=1.885, loss_scale=32, train_wall=11, gb_free=2.8, wall=181487
2021-06-20 21:03:44 | INFO | train_inner | epoch 006: 812 / 3002 loss=2.534, ppl=5.79, wps=5867.5, ups=0.09, wpb=64767, bsz=128, num_updates=15731, lr=9.98821e-05, gnorm=1.976, loss_scale=32, train_wall=11, gb_free=2.8, wall=181498
2021-06-20 21:03:55 | INFO | train_inner | epoch 006: 813 / 3002 loss=2.511, ppl=5.7, wps=5894.2, ups=0.09, wpb=64742, bsz=128, num_updates=15732, lr=9.98821e-05, gnorm=1.855, loss_scale=32, train_wall=10, gb_free=2.8, wall=181509
2021-06-20 21:04:06 | INFO | train_inner | epoch 006: 814 / 3002 loss=2.437, ppl=5.42, wps=5860.6, ups=0.09, wpb=64907, bsz=128, num_updates=15733, lr=9.98821e-05, gnorm=1.957, loss_scale=32, train_wall=11, gb_free=2.8, wall=181520
2021-06-20 21:04:17 | INFO | train_inner | epoch 006: 815 / 3002 loss=2.435, ppl=5.41, wps=5947.2, ups=0.09, wpb=64827, bsz=128, num_updates=15734, lr=9.98821e-05, gnorm=1.863, loss_scale=32, train_wall=10, gb_free=2.8, wall=181531
2021-06-20 21:04:28 | INFO | train_inner | epoch 006: 816 / 3002 loss=2.343, ppl=5.07, wps=5876.5, ups=0.09, wpb=64789, bsz=128, num_updates=15735, lr=9.98821e-05, gnorm=1.837, loss_scale=32, train_wall=11, gb_free=2.8, wall=181542
2021-06-20 21:04:39 | INFO | train_inner | epoch 006: 817 / 3002 loss=2.527, ppl=5.76, wps=5746.2, ups=0.09, wpb=64816, bsz=128, num_updates=15736, lr=9.98821e-05, gnorm=1.926, loss_scale=32, train_wall=11, gb_free=2.8, wall=181553
2021-06-20 21:04:50 | INFO | train_inner | epoch 006: 818 / 3002 loss=2.516, ppl=5.72, wps=5799.1, ups=0.09, wpb=64792, bsz=128, num_updates=15737, lr=9.98821e-05, gnorm=1.95, loss_scale=32, train_wall=11, gb_free=2.8, wall=181565
2021-06-20 21:05:01 | INFO | train_inner | epoch 006: 819 / 3002 loss=2.483, ppl=5.59, wps=6001.9, ups=0.09, wpb=64867, bsz=128, num_updates=15738, lr=9.98821e-05, gnorm=1.899, loss_scale=32, train_wall=10, gb_free=2.8, wall=181575
2021-06-20 21:05:12 | INFO | train_inner | epoch 006: 820 / 3002 loss=2.673, ppl=6.38, wps=5750.8, ups=0.09, wpb=64771, bsz=128, num_updates=15739, lr=9.98821e-05, gnorm=2.126, loss_scale=32, train_wall=11, gb_free=2.8, wall=181587
2021-06-20 21:05:24 | INFO | train_inner | epoch 006: 821 / 3002 loss=2.505, ppl=5.68, wps=5776.3, ups=0.09, wpb=64827, bsz=128, num_updates=15740, lr=9.98821e-05, gnorm=1.934, loss_scale=32, train_wall=11, gb_free=2.8, wall=181598
2021-06-20 21:05:35 | INFO | train_inner | epoch 006: 822 / 3002 loss=2.323, ppl=5, wps=5797, ups=0.09, wpb=64828, bsz=128, num_updates=15741, lr=9.98821e-05, gnorm=1.962, loss_scale=32, train_wall=11, gb_free=2.8, wall=181609
2021-06-20 21:05:46 | INFO | train_inner | epoch 006: 823 / 3002 loss=2.401, ppl=5.28, wps=5740.9, ups=0.09, wpb=64850, bsz=128, num_updates=15742, lr=9.98821e-05, gnorm=2.053, loss_scale=32, train_wall=11, gb_free=2.8, wall=181620
2021-06-20 21:05:57 | INFO | train_inner | epoch 006: 824 / 3002 loss=2.292, ppl=4.9, wps=5853.8, ups=0.09, wpb=64823, bsz=128, num_updates=15743, lr=9.9882e-05, gnorm=1.908, loss_scale=32, train_wall=11, gb_free=2.8, wall=181631
2021-06-20 21:06:08 | INFO | train_inner | epoch 006: 825 / 3002 loss=2.492, ppl=5.63, wps=5892.3, ups=0.09, wpb=64867, bsz=128, num_updates=15744, lr=9.9882e-05, gnorm=2.019, loss_scale=32, train_wall=11, gb_free=2.8, wall=181643
2021-06-20 21:06:19 | INFO | train_inner | epoch 006: 826 / 3002 loss=2.356, ppl=5.12, wps=5771.2, ups=0.09, wpb=64881, bsz=128, num_updates=15745, lr=9.9882e-05, gnorm=1.823, loss_scale=32, train_wall=11, gb_free=2.8, wall=181654
2021-06-20 21:06:31 | INFO | train_inner | epoch 006: 827 / 3002 loss=2.508, ppl=5.69, wps=5840.6, ups=0.09, wpb=64802, bsz=128, num_updates=15746, lr=9.9882e-05, gnorm=1.921, loss_scale=32, train_wall=11, gb_free=2.8, wall=181665
2021-06-20 21:06:42 | INFO | train_inner | epoch 006: 828 / 3002 loss=2.358, ppl=5.13, wps=5819, ups=0.09, wpb=64804, bsz=128, num_updates=15747, lr=9.9882e-05, gnorm=1.854, loss_scale=32, train_wall=11, gb_free=2.8, wall=181676
2021-06-20 21:06:53 | INFO | train_inner | epoch 006: 829 / 3002 loss=2.419, ppl=5.35, wps=5764, ups=0.09, wpb=64856, bsz=128, num_updates=15748, lr=9.9882e-05, gnorm=1.934, loss_scale=32, train_wall=11, gb_free=2.8, wall=181687
2021-06-20 21:07:04 | INFO | train_inner | epoch 006: 830 / 3002 loss=2.504, ppl=5.67, wps=5725, ups=0.09, wpb=64808, bsz=128, num_updates=15749, lr=9.9882e-05, gnorm=1.875, loss_scale=32, train_wall=11, gb_free=2.8, wall=181699
2021-06-20 21:07:15 | INFO | train_inner | epoch 006: 831 / 3002 loss=2.5, ppl=5.66, wps=5969, ups=0.09, wpb=64760, bsz=128, num_updates=15750, lr=9.9882e-05, gnorm=1.871, loss_scale=32, train_wall=10, gb_free=2.8, wall=181709
2021-06-20 21:07:26 | INFO | train_inner | epoch 006: 832 / 3002 loss=2.501, ppl=5.66, wps=5966, ups=0.09, wpb=64799, bsz=128, num_updates=15751, lr=9.9882e-05, gnorm=1.958, loss_scale=32, train_wall=10, gb_free=2.8, wall=181720
2021-06-20 21:07:37 | INFO | train_inner | epoch 006: 833 / 3002 loss=2.535, ppl=5.79, wps=5962, ups=0.09, wpb=64886, bsz=128, num_updates=15752, lr=9.9882e-05, gnorm=1.903, loss_scale=32, train_wall=10, gb_free=2.8, wall=181731
2021-06-20 21:07:48 | INFO | train_inner | epoch 006: 834 / 3002 loss=2.487, ppl=5.61, wps=5878.8, ups=0.09, wpb=64843, bsz=128, num_updates=15753, lr=9.9882e-05, gnorm=1.997, loss_scale=32, train_wall=11, gb_free=2.8, wall=181742
2021-06-20 21:07:59 | INFO | train_inner | epoch 006: 835 / 3002 loss=2.339, ppl=5.06, wps=5836.8, ups=0.09, wpb=64762, bsz=128, num_updates=15754, lr=9.9882e-05, gnorm=1.934, loss_scale=32, train_wall=11, gb_free=2.8, wall=181753
2021-06-20 21:08:10 | INFO | train_inner | epoch 006: 836 / 3002 loss=2.559, ppl=5.89, wps=5879.9, ups=0.09, wpb=64898, bsz=128, num_updates=15755, lr=9.9882e-05, gnorm=1.958, loss_scale=32, train_wall=11, gb_free=2.8, wall=181764
2021-06-20 21:08:21 | INFO | train_inner | epoch 006: 837 / 3002 loss=2.527, ppl=5.76, wps=5777, ups=0.09, wpb=64828, bsz=128, num_updates=15756, lr=9.98819e-05, gnorm=1.933, loss_scale=32, train_wall=11, gb_free=2.8, wall=181776
2021-06-20 21:08:32 | INFO | train_inner | epoch 006: 838 / 3002 loss=2.362, ppl=5.14, wps=5787.2, ups=0.09, wpb=64835, bsz=128, num_updates=15757, lr=9.98819e-05, gnorm=1.923, loss_scale=32, train_wall=11, gb_free=2.8, wall=181787
2021-06-20 21:08:43 | INFO | train_inner | epoch 006: 839 / 3002 loss=2.494, ppl=5.63, wps=5911.1, ups=0.09, wpb=64804, bsz=128, num_updates=15758, lr=9.98819e-05, gnorm=1.877, loss_scale=32, train_wall=11, gb_free=2.8, wall=181798
2021-06-20 21:08:54 | INFO | train_inner | epoch 006: 840 / 3002 loss=2.484, ppl=5.6, wps=5894.9, ups=0.09, wpb=64843, bsz=128, num_updates=15759, lr=9.98819e-05, gnorm=1.938, loss_scale=32, train_wall=11, gb_free=2.8, wall=181809
2021-06-20 21:09:05 | INFO | train_inner | epoch 006: 841 / 3002 loss=2.459, ppl=5.5, wps=5868.7, ups=0.09, wpb=64811, bsz=128, num_updates=15760, lr=9.98819e-05, gnorm=1.896, loss_scale=32, train_wall=11, gb_free=2.8, wall=181820
2021-06-20 21:09:16 | INFO | train_inner | epoch 006: 842 / 3002 loss=2.468, ppl=5.53, wps=5905.3, ups=0.09, wpb=64843, bsz=128, num_updates=15761, lr=9.98819e-05, gnorm=1.889, loss_scale=32, train_wall=10, gb_free=2.8, wall=181831
2021-06-20 21:09:27 | INFO | train_inner | epoch 006: 843 / 3002 loss=2.446, ppl=5.45, wps=5978.1, ups=0.09, wpb=64929, bsz=128, num_updates=15762, lr=9.98819e-05, gnorm=1.976, loss_scale=32, train_wall=10, gb_free=2.8, wall=181842
2021-06-20 21:09:38 | INFO | train_inner | epoch 006: 844 / 3002 loss=2.448, ppl=5.46, wps=5818.1, ups=0.09, wpb=64775, bsz=128, num_updates=15763, lr=9.98819e-05, gnorm=1.93, loss_scale=32, train_wall=11, gb_free=2.8, wall=181853
2021-06-20 21:09:50 | INFO | train_inner | epoch 006: 845 / 3002 loss=2.593, ppl=6.03, wps=5736.3, ups=0.09, wpb=64848, bsz=128, num_updates=15764, lr=9.98819e-05, gnorm=1.933, loss_scale=32, train_wall=11, gb_free=2.8, wall=181864
2021-06-20 21:10:01 | INFO | train_inner | epoch 006: 846 / 3002 loss=2.459, ppl=5.5, wps=5852, ups=0.09, wpb=64831, bsz=128, num_updates=15765, lr=9.98819e-05, gnorm=1.91, loss_scale=32, train_wall=11, gb_free=2.8, wall=181875
2021-06-20 21:10:12 | INFO | train_inner | epoch 006: 847 / 3002 loss=2.397, ppl=5.27, wps=5856.9, ups=0.09, wpb=64827, bsz=128, num_updates=15766, lr=9.98819e-05, gnorm=2.044, loss_scale=32, train_wall=11, gb_free=2.8, wall=181886
2021-06-20 21:10:23 | INFO | train_inner | epoch 006: 848 / 3002 loss=2.538, ppl=5.81, wps=5853.2, ups=0.09, wpb=64812, bsz=128, num_updates=15767, lr=9.98819e-05, gnorm=1.92, loss_scale=32, train_wall=11, gb_free=2.8, wall=181897
2021-06-20 21:10:34 | INFO | train_inner | epoch 006: 849 / 3002 loss=2.566, ppl=5.92, wps=5823.8, ups=0.09, wpb=64877, bsz=128, num_updates=15768, lr=9.98818e-05, gnorm=1.992, loss_scale=32, train_wall=11, gb_free=2.8, wall=181908
2021-06-20 21:10:45 | INFO | train_inner | epoch 006: 850 / 3002 loss=2.499, ppl=5.65, wps=5849.4, ups=0.09, wpb=64730, bsz=128, num_updates=15769, lr=9.98818e-05, gnorm=1.929, loss_scale=32, train_wall=11, gb_free=2.8, wall=181919
2021-06-20 21:10:56 | INFO | train_inner | epoch 006: 851 / 3002 loss=2.369, ppl=5.17, wps=5855.5, ups=0.09, wpb=64826, bsz=128, num_updates=15770, lr=9.98818e-05, gnorm=2.03, loss_scale=32, train_wall=11, gb_free=2.8, wall=181931
2021-06-20 21:11:07 | INFO | train_inner | epoch 006: 852 / 3002 loss=2.602, ppl=6.07, wps=5968.9, ups=0.09, wpb=64896, bsz=128, num_updates=15771, lr=9.98818e-05, gnorm=1.91, loss_scale=32, train_wall=10, gb_free=2.8, wall=181941
2021-06-20 21:11:18 | INFO | train_inner | epoch 006: 853 / 3002 loss=2.485, ppl=5.6, wps=5752.3, ups=0.09, wpb=64841, bsz=128, num_updates=15772, lr=9.98818e-05, gnorm=1.986, loss_scale=32, train_wall=11, gb_free=2.8, wall=181953
2021-06-20 21:11:29 | INFO | train_inner | epoch 006: 854 / 3002 loss=2.424, ppl=5.37, wps=5901.1, ups=0.09, wpb=64857, bsz=128, num_updates=15773, lr=9.98818e-05, gnorm=1.852, loss_scale=32, train_wall=11, gb_free=2.8, wall=181964
2021-06-20 21:11:40 | INFO | train_inner | epoch 006: 855 / 3002 loss=2.386, ppl=5.23, wps=5858.1, ups=0.09, wpb=64826, bsz=128, num_updates=15774, lr=9.98818e-05, gnorm=1.879, loss_scale=32, train_wall=11, gb_free=2.8, wall=181975
2021-06-20 21:11:51 | INFO | train_inner | epoch 006: 856 / 3002 loss=2.446, ppl=5.45, wps=5915.7, ups=0.09, wpb=64802, bsz=128, num_updates=15775, lr=9.98818e-05, gnorm=1.917, loss_scale=32, train_wall=10, gb_free=2.8, wall=181986
2021-06-20 21:12:02 | INFO | train_inner | epoch 006: 857 / 3002 loss=2.626, ppl=6.17, wps=5818.7, ups=0.09, wpb=64833, bsz=128, num_updates=15776, lr=9.98818e-05, gnorm=1.869, loss_scale=32, train_wall=11, gb_free=2.8, wall=181997
2021-06-20 21:12:14 | INFO | train_inner | epoch 006: 858 / 3002 loss=2.339, ppl=5.06, wps=5838.3, ups=0.09, wpb=64854, bsz=128, num_updates=15777, lr=9.98818e-05, gnorm=1.813, loss_scale=32, train_wall=11, gb_free=2.8, wall=182008
2021-06-20 21:12:25 | INFO | train_inner | epoch 006: 859 / 3002 loss=2.342, ppl=5.07, wps=5899.9, ups=0.09, wpb=64920, bsz=128, num_updates=15778, lr=9.98818e-05, gnorm=1.859, loss_scale=32, train_wall=11, gb_free=2.8, wall=182019
2021-06-20 21:12:36 | INFO | train_inner | epoch 006: 860 / 3002 loss=2.623, ppl=6.16, wps=5926.3, ups=0.09, wpb=64875, bsz=128, num_updates=15779, lr=9.98818e-05, gnorm=1.931, loss_scale=32, train_wall=10, gb_free=2.8, wall=182030
2021-06-20 21:12:47 | INFO | train_inner | epoch 006: 861 / 3002 loss=2.603, ppl=6.08, wps=5858.3, ups=0.09, wpb=64786, bsz=128, num_updates=15780, lr=9.98818e-05, gnorm=1.985, loss_scale=32, train_wall=11, gb_free=2.8, wall=182041
2021-06-20 21:12:58 | INFO | train_inner | epoch 006: 862 / 3002 loss=2.53, ppl=5.77, wps=5810.7, ups=0.09, wpb=64756, bsz=128, num_updates=15781, lr=9.98817e-05, gnorm=1.89, loss_scale=32, train_wall=11, gb_free=2.8, wall=182052
2021-06-20 21:13:09 | INFO | train_inner | epoch 006: 863 / 3002 loss=2.376, ppl=5.19, wps=5716.9, ups=0.09, wpb=64839, bsz=128, num_updates=15782, lr=9.98817e-05, gnorm=2.16, loss_scale=32, train_wall=11, gb_free=2.8, wall=182063
2021-06-20 21:13:20 | INFO | train_inner | epoch 006: 864 / 3002 loss=2.404, ppl=5.29, wps=5907, ups=0.09, wpb=64888, bsz=128, num_updates=15783, lr=9.98817e-05, gnorm=1.86, loss_scale=32, train_wall=11, gb_free=2.8, wall=182074
2021-06-20 21:13:31 | INFO | train_inner | epoch 006: 865 / 3002 loss=2.36, ppl=5.13, wps=5852.9, ups=0.09, wpb=64765, bsz=128, num_updates=15784, lr=9.98817e-05, gnorm=1.867, loss_scale=32, train_wall=11, gb_free=2.8, wall=182085
2021-06-20 21:13:42 | INFO | train_inner | epoch 006: 866 / 3002 loss=2.504, ppl=5.67, wps=5862.1, ups=0.09, wpb=64877, bsz=128, num_updates=15785, lr=9.98817e-05, gnorm=1.97, loss_scale=32, train_wall=11, gb_free=2.8, wall=182097
2021-06-20 21:13:53 | INFO | train_inner | epoch 006: 867 / 3002 loss=2.561, ppl=5.9, wps=5846.1, ups=0.09, wpb=64812, bsz=128, num_updates=15786, lr=9.98817e-05, gnorm=1.919, loss_scale=32, train_wall=11, gb_free=2.8, wall=182108
2021-06-20 21:14:04 | INFO | train_inner | epoch 006: 868 / 3002 loss=2.193, ppl=4.57, wps=5885.6, ups=0.09, wpb=64801, bsz=128, num_updates=15787, lr=9.98817e-05, gnorm=1.912, loss_scale=32, train_wall=11, gb_free=2.8, wall=182119
2021-06-20 21:14:15 | INFO | train_inner | epoch 006: 869 / 3002 loss=2.4, ppl=5.28, wps=5948, ups=0.09, wpb=64795, bsz=128, num_updates=15788, lr=9.98817e-05, gnorm=1.852, loss_scale=32, train_wall=10, gb_free=2.8, wall=182130
2021-06-20 21:14:26 | INFO | train_inner | epoch 006: 870 / 3002 loss=2.439, ppl=5.42, wps=5842.5, ups=0.09, wpb=64840, bsz=128, num_updates=15789, lr=9.98817e-05, gnorm=1.967, loss_scale=32, train_wall=11, gb_free=2.8, wall=182141
2021-06-20 21:14:37 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0
2021-06-20 21:14:48 | INFO | train_inner | epoch 006: 872 / 3002 loss=2.549, ppl=5.85, wps=2940.5, ups=0.05, wpb=64829, bsz=128, num_updates=15790, lr=9.98817e-05, gnorm=1.973, loss_scale=16, train_wall=21, gb_free=2.8, wall=182163
2021-06-20 21:14:59 | INFO | train_inner | epoch 006: 873 / 3002 loss=2.371, ppl=5.17, wps=5842.4, ups=0.09, wpb=64836, bsz=128, num_updates=15791, lr=9.98817e-05, gnorm=1.872, loss_scale=16, train_wall=11, gb_free=2.8, wall=182174
2021-06-20 21:15:11 | INFO | train_inner | epoch 006: 874 / 3002 loss=2.469, ppl=5.54, wps=5869, ups=0.09, wpb=64919, bsz=128, num_updates=15792, lr=9.98817e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=182185
2021-06-20 21:15:21 | INFO | train_inner | epoch 006: 875 / 3002 loss=2.421, ppl=5.36, wps=5910.9, ups=0.09, wpb=64814, bsz=128, num_updates=15793, lr=9.98816e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=182196
2021-06-20 21:15:32 | INFO | train_inner | epoch 006: 876 / 3002 loss=2.468, ppl=5.53, wps=5959.3, ups=0.09, wpb=64807, bsz=128, num_updates=15794, lr=9.98816e-05, gnorm=1.938, loss_scale=16, train_wall=10, gb_free=2.8, wall=182207
2021-06-20 21:15:44 | INFO | train_inner | epoch 006: 877 / 3002 loss=2.399, ppl=5.27, wps=5757, ups=0.09, wpb=64896, bsz=128, num_updates=15795, lr=9.98816e-05, gnorm=1.946, loss_scale=16, train_wall=11, gb_free=2.8, wall=182218
2021-06-20 21:15:55 | INFO | train_inner | epoch 006: 878 / 3002 loss=2.643, ppl=6.25, wps=5840, ups=0.09, wpb=64819, bsz=128, num_updates=15796, lr=9.98816e-05, gnorm=1.994, loss_scale=16, train_wall=11, gb_free=2.8, wall=182229
2021-06-20 21:16:06 | INFO | train_inner | epoch 006: 879 / 3002 loss=2.355, ppl=5.12, wps=5989.2, ups=0.09, wpb=64843, bsz=128, num_updates=15797, lr=9.98816e-05, gnorm=1.914, loss_scale=16, train_wall=10, gb_free=2.8, wall=182240
2021-06-20 21:16:17 | INFO | train_inner | epoch 006: 880 / 3002 loss=2.574, ppl=5.96, wps=5719, ups=0.09, wpb=64801, bsz=128, num_updates=15798, lr=9.98816e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=182251
2021-06-20 21:16:28 | INFO | train_inner | epoch 006: 881 / 3002 loss=2.529, ppl=5.77, wps=5893.6, ups=0.09, wpb=64802, bsz=128, num_updates=15799, lr=9.98816e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=182262
2021-06-20 21:16:39 | INFO | train_inner | epoch 006: 882 / 3002 loss=2.432, ppl=5.4, wps=5822.8, ups=0.09, wpb=64925, bsz=128, num_updates=15800, lr=9.98816e-05, gnorm=1.917, loss_scale=16, train_wall=11, gb_free=2.8, wall=182273
2021-06-20 21:16:50 | INFO | train_inner | epoch 006: 883 / 3002 loss=2.296, ppl=4.91, wps=5755.6, ups=0.09, wpb=64831, bsz=128, num_updates=15801, lr=9.98816e-05, gnorm=1.873, loss_scale=16, train_wall=11, gb_free=2.8, wall=182285
2021-06-20 21:17:01 | INFO | train_inner | epoch 006: 884 / 3002 loss=2.456, ppl=5.49, wps=5877.8, ups=0.09, wpb=64793, bsz=128, num_updates=15802, lr=9.98816e-05, gnorm=1.913, loss_scale=16, train_wall=11, gb_free=2.8, wall=182296
2021-06-20 21:17:13 | INFO | train_inner | epoch 006: 885 / 3002 loss=2.366, ppl=5.16, wps=5768.5, ups=0.09, wpb=64860, bsz=128, num_updates=15803, lr=9.98816e-05, gnorm=1.957, loss_scale=16, train_wall=11, gb_free=2.8, wall=182307
2021-06-20 21:17:24 | INFO | train_inner | epoch 006: 886 / 3002 loss=2.56, ppl=5.9, wps=5695.2, ups=0.09, wpb=64738, bsz=128, num_updates=15804, lr=9.98816e-05, gnorm=1.975, loss_scale=16, train_wall=11, gb_free=2.8, wall=182318
2021-06-20 21:17:35 | INFO | train_inner | epoch 006: 887 / 3002 loss=2.464, ppl=5.52, wps=5930.9, ups=0.09, wpb=64828, bsz=128, num_updates=15805, lr=9.98816e-05, gnorm=1.935, loss_scale=16, train_wall=10, gb_free=2.8, wall=182329
2021-06-20 21:17:46 | INFO | train_inner | epoch 006: 888 / 3002 loss=2.464, ppl=5.52, wps=5865.2, ups=0.09, wpb=64804, bsz=128, num_updates=15806, lr=9.98815e-05, gnorm=1.83, loss_scale=16, train_wall=11, gb_free=2.8, wall=182340
2021-06-20 21:17:57 | INFO | train_inner | epoch 006: 889 / 3002 loss=2.438, ppl=5.42, wps=5815.6, ups=0.09, wpb=64915, bsz=128, num_updates=15807, lr=9.98815e-05, gnorm=1.929, loss_scale=16, train_wall=11, gb_free=2.8, wall=182351
2021-06-20 21:18:08 | INFO | train_inner | epoch 006: 890 / 3002 loss=2.546, ppl=5.84, wps=5913.3, ups=0.09, wpb=64896, bsz=128, num_updates=15808, lr=9.98815e-05, gnorm=1.891, loss_scale=16, train_wall=11, gb_free=2.8, wall=182362
2021-06-20 21:18:19 | INFO | train_inner | epoch 006: 891 / 3002 loss=2.551, ppl=5.86, wps=5968.3, ups=0.09, wpb=64844, bsz=128, num_updates=15809, lr=9.98815e-05, gnorm=1.926, loss_scale=16, train_wall=10, gb_free=2.8, wall=182373
2021-06-20 21:18:30 | INFO | train_inner | epoch 006: 892 / 3002 loss=2.575, ppl=5.96, wps=5897.5, ups=0.09, wpb=64853, bsz=128, num_updates=15810, lr=9.98815e-05, gnorm=1.955, loss_scale=16, train_wall=11, gb_free=2.8, wall=182384
2021-06-20 21:18:41 | INFO | train_inner | epoch 006: 893 / 3002 loss=2.402, ppl=5.28, wps=5764.3, ups=0.09, wpb=64743, bsz=128, num_updates=15811, lr=9.98815e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=182395
2021-06-20 21:18:52 | INFO | train_inner | epoch 006: 894 / 3002 loss=2.411, ppl=5.32, wps=5840.6, ups=0.09, wpb=64876, bsz=128, num_updates=15812, lr=9.98815e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=182407
2021-06-20 21:19:03 | INFO | train_inner | epoch 006: 895 / 3002 loss=2.409, ppl=5.31, wps=5797.6, ups=0.09, wpb=64802, bsz=128, num_updates=15813, lr=9.98815e-05, gnorm=2.007, loss_scale=16, train_wall=11, gb_free=2.8, wall=182418
2021-06-20 21:19:14 | INFO | train_inner | epoch 006: 896 / 3002 loss=2.429, ppl=5.38, wps=5950.3, ups=0.09, wpb=64792, bsz=128, num_updates=15814, lr=9.98815e-05, gnorm=1.957, loss_scale=16, train_wall=10, gb_free=2.8, wall=182429
2021-06-20 21:19:25 | INFO | train_inner | epoch 006: 897 / 3002 loss=2.565, ppl=5.92, wps=5870.1, ups=0.09, wpb=64792, bsz=128, num_updates=15815, lr=9.98815e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=182440
2021-06-20 21:19:37 | INFO | train_inner | epoch 006: 898 / 3002 loss=2.572, ppl=5.94, wps=5809.8, ups=0.09, wpb=64852, bsz=128, num_updates=15816, lr=9.98815e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=182451
2021-06-20 21:19:48 | INFO | train_inner | epoch 006: 899 / 3002 loss=2.612, ppl=6.11, wps=5759.7, ups=0.09, wpb=64789, bsz=128, num_updates=15817, lr=9.98815e-05, gnorm=2.022, loss_scale=16, train_wall=11, gb_free=2.8, wall=182462
2021-06-20 21:19:59 | INFO | train_inner | epoch 006: 900 / 3002 loss=2.252, ppl=4.76, wps=5874.8, ups=0.09, wpb=64860, bsz=128, num_updates=15818, lr=9.98814e-05, gnorm=2.006, loss_scale=16, train_wall=11, gb_free=2.8, wall=182473
2021-06-20 21:20:10 | INFO | train_inner | epoch 006: 901 / 3002 loss=2.442, ppl=5.43, wps=5895.1, ups=0.09, wpb=64852, bsz=128, num_updates=15819, lr=9.98814e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=182484
2021-06-20 21:20:21 | INFO | train_inner | epoch 006: 902 / 3002 loss=2.557, ppl=5.89, wps=5866.4, ups=0.09, wpb=64825, bsz=128, num_updates=15820, lr=9.98814e-05, gnorm=1.892, loss_scale=16, train_wall=11, gb_free=2.8, wall=182495
2021-06-20 21:20:32 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 21:20:43 | INFO | train_inner | epoch 006: 904 / 3002 loss=2.495, ppl=5.64, wps=2970.2, ups=0.05, wpb=64845, bsz=128, num_updates=15821, lr=9.98814e-05, gnorm=1.844, loss_scale=8, train_wall=21, gb_free=2.8, wall=182517
2021-06-20 21:20:54 | INFO | train_inner | epoch 006: 905 / 3002 loss=2.585, ppl=6, wps=5866.7, ups=0.09, wpb=64746, bsz=128, num_updates=15822, lr=9.98814e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=182528
2021-06-20 21:21:05 | INFO | train_inner | epoch 006: 906 / 3002 loss=2.398, ppl=5.27, wps=5844.4, ups=0.09, wpb=64836, bsz=128, num_updates=15823, lr=9.98814e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=182539
2021-06-20 21:21:16 | INFO | train_inner | epoch 006: 907 / 3002 loss=2.427, ppl=5.38, wps=5775.5, ups=0.09, wpb=64784, bsz=128, num_updates=15824, lr=9.98814e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=182550
2021-06-20 21:21:27 | INFO | train_inner | epoch 006: 908 / 3002 loss=2.635, ppl=6.21, wps=5787.3, ups=0.09, wpb=64791, bsz=128, num_updates=15825, lr=9.98814e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=182562
2021-06-20 21:21:39 | INFO | train_inner | epoch 006: 909 / 3002 loss=2.329, ppl=5.03, wps=5715.8, ups=0.09, wpb=64751, bsz=128, num_updates=15826, lr=9.98814e-05, gnorm=1.882, loss_scale=8, train_wall=11, gb_free=2.8, wall=182573
2021-06-20 21:21:50 | INFO | train_inner | epoch 006: 910 / 3002 loss=2.468, ppl=5.53, wps=5840.6, ups=0.09, wpb=64819, bsz=128, num_updates=15827, lr=9.98814e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=182584
2021-06-20 21:22:01 | INFO | train_inner | epoch 006: 911 / 3002 loss=2.395, ppl=5.26, wps=5847.6, ups=0.09, wpb=64812, bsz=128, num_updates=15828, lr=9.98814e-05, gnorm=1.869, loss_scale=8, train_wall=11, gb_free=2.8, wall=182595
2021-06-20 21:22:12 | INFO | train_inner | epoch 006: 912 / 3002 loss=2.438, ppl=5.42, wps=5812, ups=0.09, wpb=64737, bsz=128, num_updates=15829, lr=9.98814e-05, gnorm=1.857, loss_scale=8, train_wall=11, gb_free=2.8, wall=182606
2021-06-20 21:22:23 | INFO | train_inner | epoch 006: 913 / 3002 loss=2.538, ppl=5.81, wps=5917.5, ups=0.09, wpb=64829, bsz=128, num_updates=15830, lr=9.98814e-05, gnorm=2.025, loss_scale=8, train_wall=11, gb_free=2.8, wall=182617
2021-06-20 21:22:34 | INFO | train_inner | epoch 006: 914 / 3002 loss=2.43, ppl=5.39, wps=5823.1, ups=0.09, wpb=64842, bsz=128, num_updates=15831, lr=9.98813e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=182628
2021-06-20 21:22:45 | INFO | train_inner | epoch 006: 915 / 3002 loss=2.47, ppl=5.54, wps=5842.5, ups=0.09, wpb=64824, bsz=128, num_updates=15832, lr=9.98813e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=182639
2021-06-20 21:22:56 | INFO | train_inner | epoch 006: 916 / 3002 loss=2.249, ppl=4.75, wps=5863.1, ups=0.09, wpb=64893, bsz=128, num_updates=15833, lr=9.98813e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=182650
2021-06-20 21:23:07 | INFO | train_inner | epoch 006: 917 / 3002 loss=2.487, ppl=5.61, wps=5745.1, ups=0.09, wpb=64801, bsz=128, num_updates=15834, lr=9.98813e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=182662
2021-06-20 21:23:18 | INFO | train_inner | epoch 006: 918 / 3002 loss=2.443, ppl=5.44, wps=5886.9, ups=0.09, wpb=64881, bsz=128, num_updates=15835, lr=9.98813e-05, gnorm=1.833, loss_scale=8, train_wall=11, gb_free=2.8, wall=182673
2021-06-20 21:23:29 | INFO | train_inner | epoch 006: 919 / 3002 loss=2.485, ppl=5.6, wps=5887.2, ups=0.09, wpb=64768, bsz=128, num_updates=15836, lr=9.98813e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=182684
2021-06-20 21:23:41 | INFO | train_inner | epoch 006: 920 / 3002 loss=2.466, ppl=5.53, wps=5837.9, ups=0.09, wpb=64844, bsz=128, num_updates=15837, lr=9.98813e-05, gnorm=2.212, loss_scale=8, train_wall=11, gb_free=2.8, wall=182695
2021-06-20 21:23:51 | INFO | train_inner | epoch 006: 921 / 3002 loss=2.362, ppl=5.14, wps=5920.7, ups=0.09, wpb=64808, bsz=128, num_updates=15838, lr=9.98813e-05, gnorm=1.905, loss_scale=8, train_wall=11, gb_free=2.8, wall=182706
2021-06-20 21:24:03 | INFO | train_inner | epoch 006: 922 / 3002 loss=2.429, ppl=5.39, wps=5752.9, ups=0.09, wpb=64820, bsz=128, num_updates=15839, lr=9.98813e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=182717
2021-06-20 21:24:14 | INFO | train_inner | epoch 006: 923 / 3002 loss=2.306, ppl=4.95, wps=5854.8, ups=0.09, wpb=64857, bsz=128, num_updates=15840, lr=9.98813e-05, gnorm=1.831, loss_scale=8, train_wall=11, gb_free=2.8, wall=182728
2021-06-20 21:24:25 | INFO | train_inner | epoch 006: 924 / 3002 loss=2.476, ppl=5.56, wps=5785.5, ups=0.09, wpb=64791, bsz=128, num_updates=15841, lr=9.98813e-05, gnorm=2.691, loss_scale=8, train_wall=11, gb_free=2.8, wall=182739
2021-06-20 21:24:36 | INFO | train_inner | epoch 006: 925 / 3002 loss=2.442, ppl=5.43, wps=5831.9, ups=0.09, wpb=64862, bsz=128, num_updates=15842, lr=9.98813e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=182750
2021-06-20 21:24:47 | INFO | train_inner | epoch 006: 926 / 3002 loss=2.4, ppl=5.28, wps=5745.8, ups=0.09, wpb=64767, bsz=128, num_updates=15843, lr=9.98812e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=182762
2021-06-20 21:24:58 | INFO | train_inner | epoch 006: 927 / 3002 loss=2.444, ppl=5.44, wps=5863.5, ups=0.09, wpb=64843, bsz=128, num_updates=15844, lr=9.98812e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=182773
2021-06-20 21:25:10 | INFO | train_inner | epoch 006: 928 / 3002 loss=2.483, ppl=5.59, wps=5878.6, ups=0.09, wpb=64884, bsz=128, num_updates=15845, lr=9.98812e-05, gnorm=2.006, loss_scale=8, train_wall=11, gb_free=2.8, wall=182784
2021-06-20 21:25:21 | INFO | train_inner | epoch 006: 929 / 3002 loss=2.388, ppl=5.24, wps=5902.4, ups=0.09, wpb=64830, bsz=128, num_updates=15846, lr=9.98812e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=182795
2021-06-20 21:25:32 | INFO | train_inner | epoch 006: 930 / 3002 loss=2.394, ppl=5.26, wps=5846.4, ups=0.09, wpb=64816, bsz=128, num_updates=15847, lr=9.98812e-05, gnorm=1.981, loss_scale=8, train_wall=11, gb_free=2.8, wall=182806
2021-06-20 21:25:42 | INFO | train_inner | epoch 006: 931 / 3002 loss=2.39, ppl=5.24, wps=5961.7, ups=0.09, wpb=64873, bsz=128, num_updates=15848, lr=9.98812e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=182817
2021-06-20 21:25:53 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-20 21:26:04 | INFO | train_inner | epoch 006: 933 / 3002 loss=2.544, ppl=5.83, wps=2958.5, ups=0.05, wpb=64810, bsz=128, num_updates=15849, lr=9.98812e-05, gnorm=1.937, loss_scale=4, train_wall=21, gb_free=2.8, wall=182839
2021-06-20 21:26:16 | INFO | train_inner | epoch 006: 934 / 3002 loss=2.374, ppl=5.18, wps=5748.3, ups=0.09, wpb=64771, bsz=128, num_updates=15850, lr=9.98812e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=182850
2021-06-20 21:26:27 | INFO | train_inner | epoch 006: 935 / 3002 loss=2.452, ppl=5.47, wps=5886.2, ups=0.09, wpb=64940, bsz=128, num_updates=15851, lr=9.98812e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=182861
2021-06-20 21:26:38 | INFO | train_inner | epoch 006: 936 / 3002 loss=2.554, ppl=5.87, wps=5876, ups=0.09, wpb=64843, bsz=128, num_updates=15852, lr=9.98812e-05, gnorm=1.959, loss_scale=4, train_wall=11, gb_free=2.8, wall=182872
2021-06-20 21:26:49 | INFO | train_inner | epoch 006: 937 / 3002 loss=2.419, ppl=5.35, wps=5769.1, ups=0.09, wpb=64838, bsz=128, num_updates=15853, lr=9.98812e-05, gnorm=1.87, loss_scale=4, train_wall=11, gb_free=2.8, wall=182883
2021-06-20 21:27:00 | INFO | train_inner | epoch 006: 938 / 3002 loss=2.462, ppl=5.51, wps=5933.6, ups=0.09, wpb=64860, bsz=128, num_updates=15854, lr=9.98812e-05, gnorm=1.901, loss_scale=4, train_wall=10, gb_free=2.8, wall=182894
2021-06-20 21:27:11 | INFO | train_inner | epoch 006: 939 / 3002 loss=2.315, ppl=4.98, wps=5870.5, ups=0.09, wpb=64857, bsz=128, num_updates=15855, lr=9.98812e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=182905
2021-06-20 21:27:22 | INFO | train_inner | epoch 006: 940 / 3002 loss=2.401, ppl=5.28, wps=5953.3, ups=0.09, wpb=64814, bsz=128, num_updates=15856, lr=9.98811e-05, gnorm=1.945, loss_scale=4, train_wall=10, gb_free=2.8, wall=182916
2021-06-20 21:27:33 | INFO | train_inner | epoch 006: 941 / 3002 loss=2.308, ppl=4.95, wps=5995.6, ups=0.09, wpb=64850, bsz=128, num_updates=15857, lr=9.98811e-05, gnorm=1.902, loss_scale=4, train_wall=10, gb_free=2.8, wall=182927
2021-06-20 21:27:44 | INFO | train_inner | epoch 006: 942 / 3002 loss=2.613, ppl=6.12, wps=5817.1, ups=0.09, wpb=64846, bsz=128, num_updates=15858, lr=9.98811e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=182938
2021-06-20 21:27:55 | INFO | train_inner | epoch 006: 943 / 3002 loss=2.632, ppl=6.2, wps=5913.1, ups=0.09, wpb=64857, bsz=128, num_updates=15859, lr=9.98811e-05, gnorm=1.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=182949
2021-06-20 21:28:06 | INFO | train_inner | epoch 006: 944 / 3002 loss=2.53, ppl=5.78, wps=5748.9, ups=0.09, wpb=64801, bsz=128, num_updates=15860, lr=9.98811e-05, gnorm=2.115, loss_scale=4, train_wall=11, gb_free=2.8, wall=182960
2021-06-20 21:28:17 | INFO | train_inner | epoch 006: 945 / 3002 loss=2.508, ppl=5.69, wps=5910.6, ups=0.09, wpb=64836, bsz=128, num_updates=15861, lr=9.98811e-05, gnorm=1.936, loss_scale=4, train_wall=10, gb_free=2.8, wall=182971
2021-06-20 21:28:28 | INFO | train_inner | epoch 006: 946 / 3002 loss=2.581, ppl=5.98, wps=5824.1, ups=0.09, wpb=64829, bsz=128, num_updates=15862, lr=9.98811e-05, gnorm=1.872, loss_scale=4, train_wall=11, gb_free=2.8, wall=182982
2021-06-20 21:28:39 | INFO | train_inner | epoch 006: 947 / 3002 loss=2.458, ppl=5.49, wps=5796.3, ups=0.09, wpb=64855, bsz=128, num_updates=15863, lr=9.98811e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=182994
2021-06-20 21:28:50 | INFO | train_inner | epoch 006: 948 / 3002 loss=2.525, ppl=5.76, wps=5812.2, ups=0.09, wpb=64784, bsz=128, num_updates=15864, lr=9.98811e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=183005
2021-06-20 21:29:01 | INFO | train_inner | epoch 006: 949 / 3002 loss=2.452, ppl=5.47, wps=5927, ups=0.09, wpb=64816, bsz=128, num_updates=15865, lr=9.98811e-05, gnorm=1.951, loss_scale=4, train_wall=11, gb_free=2.8, wall=183016
2021-06-20 21:29:12 | INFO | train_inner | epoch 006: 950 / 3002 loss=2.536, ppl=5.8, wps=5953.9, ups=0.09, wpb=64884, bsz=128, num_updates=15866, lr=9.98811e-05, gnorm=1.982, loss_scale=4, train_wall=10, gb_free=2.8, wall=183027
2021-06-20 21:29:23 | INFO | train_inner | epoch 006: 951 / 3002 loss=2.575, ppl=5.96, wps=5912.6, ups=0.09, wpb=64780, bsz=128, num_updates=15867, lr=9.98811e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=183038
2021-06-20 21:29:34 | INFO | train_inner | epoch 006: 952 / 3002 loss=2.59, ppl=6.02, wps=5786.7, ups=0.09, wpb=64815, bsz=128, num_updates=15868, lr=9.9881e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=183049
2021-06-20 21:29:46 | INFO | train_inner | epoch 006: 953 / 3002 loss=2.411, ppl=5.32, wps=5837.8, ups=0.09, wpb=64718, bsz=128, num_updates=15869, lr=9.9881e-05, gnorm=1.839, loss_scale=4, train_wall=11, gb_free=2.8, wall=183060
2021-06-20 21:29:57 | INFO | train_inner | epoch 006: 954 / 3002 loss=2.473, ppl=5.55, wps=5832.6, ups=0.09, wpb=64820, bsz=128, num_updates=15870, lr=9.9881e-05, gnorm=1.968, loss_scale=4, train_wall=11, gb_free=2.8, wall=183071
2021-06-20 21:30:08 | INFO | train_inner | epoch 006: 955 / 3002 loss=2.523, ppl=5.75, wps=5716.2, ups=0.09, wpb=64831, bsz=128, num_updates=15871, lr=9.9881e-05, gnorm=1.922, loss_scale=4, train_wall=11, gb_free=2.8, wall=183082
2021-06-20 21:30:19 | INFO | train_inner | epoch 006: 956 / 3002 loss=2.48, ppl=5.58, wps=5897.4, ups=0.09, wpb=64829, bsz=128, num_updates=15872, lr=9.9881e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=183093
2021-06-20 21:30:30 | INFO | train_inner | epoch 006: 957 / 3002 loss=2.604, ppl=6.08, wps=5919, ups=0.09, wpb=64841, bsz=128, num_updates=15873, lr=9.9881e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=183104
2021-06-20 21:30:41 | INFO | train_inner | epoch 006: 958 / 3002 loss=2.509, ppl=5.69, wps=5831.9, ups=0.09, wpb=64787, bsz=128, num_updates=15874, lr=9.9881e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=183115
2021-06-20 21:30:52 | INFO | train_inner | epoch 006: 959 / 3002 loss=2.558, ppl=5.89, wps=5859.5, ups=0.09, wpb=64824, bsz=128, num_updates=15875, lr=9.9881e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=183126
2021-06-20 21:31:03 | INFO | train_inner | epoch 006: 960 / 3002 loss=2.557, ppl=5.88, wps=5883.8, ups=0.09, wpb=64816, bsz=128, num_updates=15876, lr=9.9881e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=183137
2021-06-20 21:31:14 | INFO | train_inner | epoch 006: 961 / 3002 loss=2.65, ppl=6.28, wps=5803.9, ups=0.09, wpb=64800, bsz=128, num_updates=15877, lr=9.9881e-05, gnorm=1.887, loss_scale=4, train_wall=11, gb_free=2.8, wall=183149
2021-06-20 21:31:25 | INFO | train_inner | epoch 006: 962 / 3002 loss=2.556, ppl=5.88, wps=5832.3, ups=0.09, wpb=64683, bsz=128, num_updates=15878, lr=9.9881e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=183160
2021-06-20 21:31:36 | INFO | train_inner | epoch 006: 963 / 3002 loss=2.346, ppl=5.08, wps=5904.1, ups=0.09, wpb=64897, bsz=128, num_updates=15879, lr=9.9881e-05, gnorm=1.862, loss_scale=4, train_wall=11, gb_free=2.8, wall=183171
2021-06-20 21:31:47 | INFO | train_inner | epoch 006: 964 / 3002 loss=2.49, ppl=5.62, wps=5873.3, ups=0.09, wpb=64819, bsz=128, num_updates=15880, lr=9.9881e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=183182
2021-06-20 21:31:58 | INFO | train_inner | epoch 006: 965 / 3002 loss=2.479, ppl=5.58, wps=5875.6, ups=0.09, wpb=64774, bsz=128, num_updates=15881, lr=9.98809e-05, gnorm=1.82, loss_scale=4, train_wall=11, gb_free=2.8, wall=183193
2021-06-20 21:32:09 | INFO | train_inner | epoch 006: 966 / 3002 loss=2.491, ppl=5.62, wps=5871.9, ups=0.09, wpb=64800, bsz=128, num_updates=15882, lr=9.98809e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=183204
2021-06-20 21:32:21 | INFO | train_inner | epoch 006: 967 / 3002 loss=2.387, ppl=5.23, wps=5827, ups=0.09, wpb=64840, bsz=128, num_updates=15883, lr=9.98809e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=183215
2021-06-20 21:32:32 | INFO | train_inner | epoch 006: 968 / 3002 loss=2.517, ppl=5.72, wps=5922.2, ups=0.09, wpb=64910, bsz=128, num_updates=15884, lr=9.98809e-05, gnorm=1.975, loss_scale=4, train_wall=10, gb_free=2.8, wall=183226
2021-06-20 21:32:43 | INFO | train_inner | epoch 006: 969 / 3002 loss=2.57, ppl=5.94, wps=5899.5, ups=0.09, wpb=64815, bsz=128, num_updates=15885, lr=9.98809e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=183237
2021-06-20 21:32:54 | INFO | train_inner | epoch 006: 970 / 3002 loss=2.637, ppl=6.22, wps=5921.8, ups=0.09, wpb=64878, bsz=128, num_updates=15886, lr=9.98809e-05, gnorm=1.945, loss_scale=4, train_wall=10, gb_free=2.8, wall=183248
2021-06-20 21:33:05 | INFO | train_inner | epoch 006: 971 / 3002 loss=2.476, ppl=5.56, wps=5736.9, ups=0.09, wpb=64735, bsz=128, num_updates=15887, lr=9.98809e-05, gnorm=2.544, loss_scale=4, train_wall=11, gb_free=2.8, wall=183259
2021-06-20 21:33:16 | INFO | train_inner | epoch 006: 972 / 3002 loss=2.567, ppl=5.93, wps=5898, ups=0.09, wpb=64820, bsz=128, num_updates=15888, lr=9.98809e-05, gnorm=1.892, loss_scale=4, train_wall=11, gb_free=2.8, wall=183270
2021-06-20 21:33:27 | INFO | train_inner | epoch 006: 973 / 3002 loss=2.305, ppl=4.94, wps=5890.7, ups=0.09, wpb=64856, bsz=128, num_updates=15889, lr=9.98809e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=183281
2021-06-20 21:33:38 | INFO | train_inner | epoch 006: 974 / 3002 loss=2.365, ppl=5.15, wps=5901.4, ups=0.09, wpb=64856, bsz=128, num_updates=15890, lr=9.98809e-05, gnorm=1.878, loss_scale=4, train_wall=11, gb_free=2.8, wall=183292
2021-06-20 21:33:49 | INFO | train_inner | epoch 006: 975 / 3002 loss=2.479, ppl=5.57, wps=5869.4, ups=0.09, wpb=64844, bsz=128, num_updates=15891, lr=9.98809e-05, gnorm=1.949, loss_scale=4, train_wall=11, gb_free=2.8, wall=183303
2021-06-20 21:34:00 | INFO | train_inner | epoch 006: 976 / 3002 loss=2.481, ppl=5.58, wps=5802.2, ups=0.09, wpb=64893, bsz=128, num_updates=15892, lr=9.98809e-05, gnorm=1.926, loss_scale=4, train_wall=11, gb_free=2.8, wall=183314
2021-06-20 21:34:11 | INFO | train_inner | epoch 006: 977 / 3002 loss=2.432, ppl=5.4, wps=5887.1, ups=0.09, wpb=64812, bsz=128, num_updates=15893, lr=9.98808e-05, gnorm=1.882, loss_scale=4, train_wall=11, gb_free=2.8, wall=183325
2021-06-20 21:34:22 | INFO | train_inner | epoch 006: 978 / 3002 loss=2.462, ppl=5.51, wps=5684.3, ups=0.09, wpb=64847, bsz=128, num_updates=15894, lr=9.98808e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=183337
2021-06-20 21:34:34 | INFO | train_inner | epoch 006: 979 / 3002 loss=2.524, ppl=5.75, wps=5801.2, ups=0.09, wpb=64852, bsz=128, num_updates=15895, lr=9.98808e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=183348
2021-06-20 21:34:45 | INFO | train_inner | epoch 006: 980 / 3002 loss=2.523, ppl=5.75, wps=5871.6, ups=0.09, wpb=64836, bsz=128, num_updates=15896, lr=9.98808e-05, gnorm=3.627, loss_scale=4, train_wall=11, gb_free=2.8, wall=183359
2021-06-20 21:34:56 | INFO | train_inner | epoch 006: 981 / 3002 loss=2.407, ppl=5.3, wps=5880.9, ups=0.09, wpb=64868, bsz=128, num_updates=15897, lr=9.98808e-05, gnorm=1.851, loss_scale=4, train_wall=11, gb_free=2.8, wall=183370
2021-06-20 21:35:07 | INFO | train_inner | epoch 006: 982 / 3002 loss=2.423, ppl=5.36, wps=5928, ups=0.09, wpb=64881, bsz=128, num_updates=15898, lr=9.98808e-05, gnorm=1.883, loss_scale=4, train_wall=10, gb_free=2.8, wall=183381
2021-06-20 21:35:18 | INFO | train_inner | epoch 006: 983 / 3002 loss=2.551, ppl=5.86, wps=5898.3, ups=0.09, wpb=64915, bsz=128, num_updates=15899, lr=9.98808e-05, gnorm=2.115, loss_scale=4, train_wall=11, gb_free=2.8, wall=183392
2021-06-20 21:35:29 | INFO | train_inner | epoch 006: 984 / 3002 loss=2.435, ppl=5.41, wps=5824, ups=0.09, wpb=64787, bsz=128, num_updates=15900, lr=9.98808e-05, gnorm=1.883, loss_scale=4, train_wall=11, gb_free=2.8, wall=183403
2021-06-20 21:35:40 | INFO | train_inner | epoch 006: 985 / 3002 loss=2.455, ppl=5.48, wps=5975.4, ups=0.09, wpb=64881, bsz=128, num_updates=15901, lr=9.98808e-05, gnorm=1.932, loss_scale=4, train_wall=10, gb_free=2.8, wall=183414
2021-06-20 21:35:51 | INFO | train_inner | epoch 006: 986 / 3002 loss=2.455, ppl=5.48, wps=5857, ups=0.09, wpb=64790, bsz=128, num_updates=15902, lr=9.98808e-05, gnorm=2.857, loss_scale=4, train_wall=11, gb_free=2.8, wall=183425
2021-06-20 21:36:01 | INFO | train_inner | epoch 006: 987 / 3002 loss=2.542, ppl=5.82, wps=5998.8, ups=0.09, wpb=64825, bsz=128, num_updates=15903, lr=9.98808e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=183436
2021-06-20 21:36:13 | INFO | train_inner | epoch 006: 988 / 3002 loss=2.579, ppl=5.98, wps=5769, ups=0.09, wpb=64782, bsz=128, num_updates=15904, lr=9.98808e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=183447
2021-06-20 21:36:24 | INFO | train_inner | epoch 006: 989 / 3002 loss=2.412, ppl=5.32, wps=5766.7, ups=0.09, wpb=64823, bsz=128, num_updates=15905, lr=9.98808e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=183458
2021-06-20 21:36:35 | INFO | train_inner | epoch 006: 990 / 3002 loss=2.412, ppl=5.32, wps=5903.1, ups=0.09, wpb=64869, bsz=128, num_updates=15906, lr=9.98807e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=183469
2021-06-20 21:36:46 | INFO | train_inner | epoch 006: 991 / 3002 loss=2.524, ppl=5.75, wps=5749.8, ups=0.09, wpb=64760, bsz=128, num_updates=15907, lr=9.98807e-05, gnorm=2.042, loss_scale=4, train_wall=11, gb_free=2.8, wall=183481
2021-06-20 21:36:57 | INFO | train_inner | epoch 006: 992 / 3002 loss=2.479, ppl=5.58, wps=5935.4, ups=0.09, wpb=64892, bsz=128, num_updates=15908, lr=9.98807e-05, gnorm=1.906, loss_scale=4, train_wall=10, gb_free=2.8, wall=183491
2021-06-20 21:37:08 | INFO | train_inner | epoch 006: 993 / 3002 loss=2.473, ppl=5.55, wps=5918.3, ups=0.09, wpb=64848, bsz=128, num_updates=15909, lr=9.98807e-05, gnorm=1.944, loss_scale=4, train_wall=10, gb_free=2.8, wall=183502
2021-06-20 21:37:19 | INFO | train_inner | epoch 006: 994 / 3002 loss=2.506, ppl=5.68, wps=5917.5, ups=0.09, wpb=64821, bsz=128, num_updates=15910, lr=9.98807e-05, gnorm=1.97, loss_scale=4, train_wall=10, gb_free=2.8, wall=183513
2021-06-20 21:37:30 | INFO | train_inner | epoch 006: 995 / 3002 loss=2.415, ppl=5.33, wps=5839.4, ups=0.09, wpb=64835, bsz=128, num_updates=15911, lr=9.98807e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=183525
2021-06-20 21:37:41 | INFO | train_inner | epoch 006: 996 / 3002 loss=2.5, ppl=5.66, wps=5764.5, ups=0.09, wpb=64840, bsz=128, num_updates=15912, lr=9.98807e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=183536
2021-06-20 21:37:53 | INFO | train_inner | epoch 006: 997 / 3002 loss=2.262, ppl=4.8, wps=5812.3, ups=0.09, wpb=64879, bsz=128, num_updates=15913, lr=9.98807e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=183547
2021-06-20 21:38:04 | INFO | train_inner | epoch 006: 998 / 3002 loss=2.387, ppl=5.23, wps=5800.7, ups=0.09, wpb=64799, bsz=128, num_updates=15914, lr=9.98807e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=183558
2021-06-20 21:38:15 | INFO | train_inner | epoch 006: 999 / 3002 loss=2.399, ppl=5.28, wps=5800.6, ups=0.09, wpb=64820, bsz=128, num_updates=15915, lr=9.98807e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=183569
2021-06-20 21:38:26 | INFO | train_inner | epoch 006: 1000 / 3002 loss=2.337, ppl=5.05, wps=5786.2, ups=0.09, wpb=64834, bsz=128, num_updates=15916, lr=9.98807e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=183580
2021-06-20 21:38:37 | INFO | train_inner | epoch 006: 1001 / 3002 loss=2.549, ppl=5.85, wps=5858.1, ups=0.09, wpb=64788, bsz=128, num_updates=15917, lr=9.98807e-05, gnorm=2.073, loss_scale=4, train_wall=11, gb_free=2.8, wall=183592
2021-06-20 21:38:48 | INFO | train_inner | epoch 006: 1002 / 3002 loss=2.41, ppl=5.31, wps=5886.3, ups=0.09, wpb=64834, bsz=128, num_updates=15918, lr=9.98806e-05, gnorm=1.869, loss_scale=4, train_wall=11, gb_free=2.8, wall=183603
2021-06-20 21:38:59 | INFO | train_inner | epoch 006: 1003 / 3002 loss=2.562, ppl=5.91, wps=5921.6, ups=0.09, wpb=64779, bsz=128, num_updates=15919, lr=9.98806e-05, gnorm=1.908, loss_scale=4, train_wall=10, gb_free=2.8, wall=183613
2021-06-20 21:39:10 | INFO | train_inner | epoch 006: 1004 / 3002 loss=2.318, ppl=4.99, wps=5803.4, ups=0.09, wpb=64766, bsz=128, num_updates=15920, lr=9.98806e-05, gnorm=1.881, loss_scale=4, train_wall=11, gb_free=2.8, wall=183625
2021-06-20 21:39:21 | INFO | train_inner | epoch 006: 1005 / 3002 loss=2.463, ppl=5.51, wps=5848.5, ups=0.09, wpb=64798, bsz=128, num_updates=15921, lr=9.98806e-05, gnorm=1.875, loss_scale=4, train_wall=11, gb_free=2.8, wall=183636
2021-06-20 21:39:33 | INFO | train_inner | epoch 006: 1006 / 3002 loss=2.308, ppl=4.95, wps=5750.3, ups=0.09, wpb=64887, bsz=128, num_updates=15922, lr=9.98806e-05, gnorm=1.848, loss_scale=4, train_wall=11, gb_free=2.8, wall=183647
2021-06-20 21:39:44 | INFO | train_inner | epoch 006: 1007 / 3002 loss=2.425, ppl=5.37, wps=5946.2, ups=0.09, wpb=64854, bsz=128, num_updates=15923, lr=9.98806e-05, gnorm=1.923, loss_scale=4, train_wall=10, gb_free=2.8, wall=183658
2021-06-20 21:39:55 | INFO | train_inner | epoch 006: 1008 / 3002 loss=2.484, ppl=5.59, wps=5913.4, ups=0.09, wpb=64830, bsz=128, num_updates=15924, lr=9.98806e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=183669
2021-06-20 21:40:06 | INFO | train_inner | epoch 006: 1009 / 3002 loss=2.482, ppl=5.59, wps=5827.2, ups=0.09, wpb=64823, bsz=128, num_updates=15925, lr=9.98806e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=183680
2021-06-20 21:40:17 | INFO | train_inner | epoch 006: 1010 / 3002 loss=2.462, ppl=5.51, wps=5945.5, ups=0.09, wpb=64768, bsz=128, num_updates=15926, lr=9.98806e-05, gnorm=1.887, loss_scale=4, train_wall=10, gb_free=2.8, wall=183691
2021-06-20 21:40:27 | INFO | train_inner | epoch 006: 1011 / 3002 loss=2.506, ppl=5.68, wps=5991.3, ups=0.09, wpb=64863, bsz=128, num_updates=15927, lr=9.98806e-05, gnorm=1.97, loss_scale=4, train_wall=10, gb_free=2.8, wall=183702
2021-06-20 21:40:38 | INFO | train_inner | epoch 006: 1012 / 3002 loss=2.586, ppl=6, wps=5900.1, ups=0.09, wpb=64891, bsz=128, num_updates=15928, lr=9.98806e-05, gnorm=1.835, loss_scale=4, train_wall=11, gb_free=2.8, wall=183713
2021-06-20 21:40:49 | INFO | train_inner | epoch 006: 1013 / 3002 loss=2.304, ppl=4.94, wps=5852.9, ups=0.09, wpb=64889, bsz=128, num_updates=15929, lr=9.98806e-05, gnorm=1.853, loss_scale=4, train_wall=11, gb_free=2.8, wall=183724
2021-06-20 21:41:00 | INFO | train_inner | epoch 006: 1014 / 3002 loss=2.453, ppl=5.48, wps=5887.7, ups=0.09, wpb=64823, bsz=128, num_updates=15930, lr=9.98806e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=183735
2021-06-20 21:41:11 | INFO | train_inner | epoch 006: 1015 / 3002 loss=2.287, ppl=4.88, wps=6032, ups=0.09, wpb=64896, bsz=128, num_updates=15931, lr=9.98805e-05, gnorm=1.925, loss_scale=4, train_wall=10, gb_free=2.8, wall=183746
2021-06-20 21:41:23 | INFO | train_inner | epoch 006: 1016 / 3002 loss=2.525, ppl=5.76, wps=5758.2, ups=0.09, wpb=64856, bsz=128, num_updates=15932, lr=9.98805e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=183757
2021-06-20 21:41:33 | INFO | train_inner | epoch 006: 1017 / 3002 loss=2.45, ppl=5.46, wps=6026.6, ups=0.09, wpb=64799, bsz=128, num_updates=15933, lr=9.98805e-05, gnorm=1.938, loss_scale=4, train_wall=10, gb_free=2.8, wall=183768
2021-06-20 21:41:44 | INFO | train_inner | epoch 006: 1018 / 3002 loss=2.556, ppl=5.88, wps=5853.8, ups=0.09, wpb=64763, bsz=128, num_updates=15934, lr=9.98805e-05, gnorm=1.898, loss_scale=4, train_wall=11, gb_free=2.8, wall=183779
2021-06-20 21:41:55 | INFO | train_inner | epoch 006: 1019 / 3002 loss=2.516, ppl=5.72, wps=5843.4, ups=0.09, wpb=64814, bsz=128, num_updates=15935, lr=9.98805e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=183790
2021-06-20 21:42:06 | INFO | train_inner | epoch 006: 1020 / 3002 loss=2.542, ppl=5.83, wps=5948.7, ups=0.09, wpb=64799, bsz=128, num_updates=15936, lr=9.98805e-05, gnorm=1.989, loss_scale=4, train_wall=10, gb_free=2.8, wall=183801
2021-06-20 21:42:17 | INFO | train_inner | epoch 006: 1021 / 3002 loss=2.435, ppl=5.41, wps=5870.6, ups=0.09, wpb=64855, bsz=128, num_updates=15937, lr=9.98805e-05, gnorm=1.881, loss_scale=4, train_wall=11, gb_free=2.8, wall=183812
2021-06-20 21:42:28 | INFO | train_inner | epoch 006: 1022 / 3002 loss=2.58, ppl=5.98, wps=5985.4, ups=0.09, wpb=64821, bsz=128, num_updates=15938, lr=9.98805e-05, gnorm=1.864, loss_scale=4, train_wall=10, gb_free=2.8, wall=183823
2021-06-20 21:42:39 | INFO | train_inner | epoch 006: 1023 / 3002 loss=2.55, ppl=5.86, wps=5945.3, ups=0.09, wpb=64793, bsz=128, num_updates=15939, lr=9.98805e-05, gnorm=1.881, loss_scale=4, train_wall=10, gb_free=2.8, wall=183833
2021-06-20 21:42:50 | INFO | train_inner | epoch 006: 1024 / 3002 loss=2.584, ppl=6, wps=5917, ups=0.09, wpb=64899, bsz=128, num_updates=15940, lr=9.98805e-05, gnorm=1.915, loss_scale=4, train_wall=10, gb_free=2.8, wall=183844
2021-06-20 21:43:01 | INFO | train_inner | epoch 006: 1025 / 3002 loss=2.377, ppl=5.2, wps=5754.4, ups=0.09, wpb=64821, bsz=128, num_updates=15941, lr=9.98805e-05, gnorm=1.862, loss_scale=4, train_wall=11, gb_free=2.8, wall=183856
2021-06-20 21:43:12 | INFO | train_inner | epoch 006: 1026 / 3002 loss=2.585, ppl=6, wps=5854.6, ups=0.09, wpb=64809, bsz=128, num_updates=15942, lr=9.98805e-05, gnorm=1.929, loss_scale=4, train_wall=11, gb_free=2.8, wall=183867
2021-06-20 21:43:24 | INFO | train_inner | epoch 006: 1027 / 3002 loss=2.518, ppl=5.73, wps=5723.5, ups=0.09, wpb=64787, bsz=128, num_updates=15943, lr=9.98804e-05, gnorm=1.913, loss_scale=4, train_wall=11, gb_free=2.8, wall=183878
2021-06-20 21:43:35 | INFO | train_inner | epoch 006: 1028 / 3002 loss=2.457, ppl=5.49, wps=5864.8, ups=0.09, wpb=64799, bsz=128, num_updates=15944, lr=9.98804e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=183889
2021-06-20 21:43:46 | INFO | train_inner | epoch 006: 1029 / 3002 loss=2.311, ppl=4.96, wps=5892.4, ups=0.09, wpb=64797, bsz=128, num_updates=15945, lr=9.98804e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=183900
2021-06-20 21:43:57 | INFO | train_inner | epoch 006: 1030 / 3002 loss=2.53, ppl=5.78, wps=5803.2, ups=0.09, wpb=64833, bsz=128, num_updates=15946, lr=9.98804e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=183911
2021-06-20 21:44:08 | INFO | train_inner | epoch 006: 1031 / 3002 loss=2.6, ppl=6.06, wps=6024.1, ups=0.09, wpb=64884, bsz=128, num_updates=15947, lr=9.98804e-05, gnorm=1.984, loss_scale=4, train_wall=10, gb_free=2.8, wall=183922
2021-06-20 21:44:19 | INFO | train_inner | epoch 006: 1032 / 3002 loss=2.438, ppl=5.42, wps=5887.6, ups=0.09, wpb=64858, bsz=128, num_updates=15948, lr=9.98804e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=183933
2021-06-20 21:44:30 | INFO | train_inner | epoch 006: 1033 / 3002 loss=2.528, ppl=5.77, wps=5938.3, ups=0.09, wpb=64857, bsz=128, num_updates=15949, lr=9.98804e-05, gnorm=1.931, loss_scale=4, train_wall=10, gb_free=2.8, wall=183944
2021-06-20 21:44:41 | INFO | train_inner | epoch 006: 1034 / 3002 loss=2.531, ppl=5.78, wps=5882.2, ups=0.09, wpb=64807, bsz=128, num_updates=15950, lr=9.98804e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=183955
2021-06-20 21:44:52 | INFO | train_inner | epoch 006: 1035 / 3002 loss=2.388, ppl=5.23, wps=5784.4, ups=0.09, wpb=64811, bsz=128, num_updates=15951, lr=9.98804e-05, gnorm=1.921, loss_scale=4, train_wall=11, gb_free=2.8, wall=183966
2021-06-20 21:45:03 | INFO | train_inner | epoch 006: 1036 / 3002 loss=2.475, ppl=5.56, wps=5874.6, ups=0.09, wpb=64814, bsz=128, num_updates=15952, lr=9.98804e-05, gnorm=1.879, loss_scale=4, train_wall=11, gb_free=2.8, wall=183977
2021-06-20 21:45:14 | INFO | train_inner | epoch 006: 1037 / 3002 loss=2.456, ppl=5.49, wps=5712.2, ups=0.09, wpb=64677, bsz=128, num_updates=15953, lr=9.98804e-05, gnorm=1.889, loss_scale=4, train_wall=11, gb_free=2.8, wall=183989
2021-06-20 21:45:25 | INFO | train_inner | epoch 006: 1038 / 3002 loss=2.44, ppl=5.43, wps=5807.8, ups=0.09, wpb=64792, bsz=128, num_updates=15954, lr=9.98804e-05, gnorm=1.953, loss_scale=4, train_wall=11, gb_free=2.8, wall=184000
2021-06-20 21:45:37 | INFO | train_inner | epoch 006: 1039 / 3002 loss=2.367, ppl=5.16, wps=5821, ups=0.09, wpb=64856, bsz=128, num_updates=15955, lr=9.98804e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=184011
2021-06-20 21:45:48 | INFO | train_inner | epoch 006: 1040 / 3002 loss=2.537, ppl=5.8, wps=5805.1, ups=0.09, wpb=64778, bsz=128, num_updates=15956, lr=9.98803e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=184022
2021-06-20 21:45:59 | INFO | train_inner | epoch 006: 1041 / 3002 loss=2.499, ppl=5.65, wps=5796.3, ups=0.09, wpb=64861, bsz=128, num_updates=15957, lr=9.98803e-05, gnorm=2.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=184033
2021-06-20 21:46:10 | INFO | train_inner | epoch 006: 1042 / 3002 loss=2.672, ppl=6.37, wps=5863.4, ups=0.09, wpb=64849, bsz=128, num_updates=15958, lr=9.98803e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=184044
2021-06-20 21:46:21 | INFO | train_inner | epoch 006: 1043 / 3002 loss=2.464, ppl=5.52, wps=5883.3, ups=0.09, wpb=64858, bsz=128, num_updates=15959, lr=9.98803e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=184055
2021-06-20 21:46:32 | INFO | train_inner | epoch 006: 1044 / 3002 loss=2.333, ppl=5.04, wps=5981.7, ups=0.09, wpb=64850, bsz=128, num_updates=15960, lr=9.98803e-05, gnorm=1.917, loss_scale=4, train_wall=10, gb_free=2.8, wall=184066
2021-06-20 21:46:43 | INFO | train_inner | epoch 006: 1045 / 3002 loss=2.53, ppl=5.78, wps=5903.4, ups=0.09, wpb=64848, bsz=128, num_updates=15961, lr=9.98803e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=184077
2021-06-20 21:46:54 | INFO | train_inner | epoch 006: 1046 / 3002 loss=2.44, ppl=5.42, wps=5884.3, ups=0.09, wpb=64916, bsz=128, num_updates=15962, lr=9.98803e-05, gnorm=2.039, loss_scale=4, train_wall=11, gb_free=2.8, wall=184088
2021-06-20 21:47:05 | INFO | train_inner | epoch 006: 1047 / 3002 loss=2.399, ppl=5.27, wps=5865.3, ups=0.09, wpb=64859, bsz=128, num_updates=15963, lr=9.98803e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=184099
2021-06-20 21:47:16 | INFO | train_inner | epoch 006: 1048 / 3002 loss=2.304, ppl=4.94, wps=5931.7, ups=0.09, wpb=64874, bsz=128, num_updates=15964, lr=9.98803e-05, gnorm=2.601, loss_scale=4, train_wall=10, gb_free=2.8, wall=184110
2021-06-20 21:47:27 | INFO | train_inner | epoch 006: 1049 / 3002 loss=2.374, ppl=5.18, wps=5796, ups=0.09, wpb=64808, bsz=128, num_updates=15965, lr=9.98803e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=184121
2021-06-20 21:47:38 | INFO | train_inner | epoch 006: 1050 / 3002 loss=2.385, ppl=5.22, wps=5940.9, ups=0.09, wpb=64796, bsz=128, num_updates=15966, lr=9.98803e-05, gnorm=1.911, loss_scale=4, train_wall=10, gb_free=2.8, wall=184132
2021-06-20 21:47:49 | INFO | train_inner | epoch 006: 1051 / 3002 loss=2.541, ppl=5.82, wps=5853.5, ups=0.09, wpb=64833, bsz=128, num_updates=15967, lr=9.98803e-05, gnorm=1.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=184143
2021-06-20 21:48:00 | INFO | train_inner | epoch 006: 1052 / 3002 loss=2.393, ppl=5.25, wps=5855.7, ups=0.09, wpb=64825, bsz=128, num_updates=15968, lr=9.98802e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=184154
2021-06-20 21:48:11 | INFO | train_inner | epoch 006: 1053 / 3002 loss=2.459, ppl=5.5, wps=5823.1, ups=0.09, wpb=64821, bsz=128, num_updates=15969, lr=9.98802e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=184165
2021-06-20 21:48:23 | INFO | train_inner | epoch 006: 1054 / 3002 loss=2.371, ppl=5.17, wps=5709.9, ups=0.09, wpb=64840, bsz=128, num_updates=15970, lr=9.98802e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=184177
2021-06-20 21:48:34 | INFO | train_inner | epoch 006: 1055 / 3002 loss=2.483, ppl=5.59, wps=5861, ups=0.09, wpb=64851, bsz=128, num_updates=15971, lr=9.98802e-05, gnorm=1.867, loss_scale=4, train_wall=11, gb_free=2.8, wall=184188
2021-06-20 21:48:45 | INFO | train_inner | epoch 006: 1056 / 3002 loss=2.343, ppl=5.07, wps=5829.2, ups=0.09, wpb=64786, bsz=128, num_updates=15972, lr=9.98802e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=184199
2021-06-20 21:48:56 | INFO | train_inner | epoch 006: 1057 / 3002 loss=2.412, ppl=5.32, wps=5890.9, ups=0.09, wpb=64854, bsz=128, num_updates=15973, lr=9.98802e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=184210
2021-06-20 21:49:07 | INFO | train_inner | epoch 006: 1058 / 3002 loss=2.562, ppl=5.91, wps=5924.4, ups=0.09, wpb=64895, bsz=128, num_updates=15974, lr=9.98802e-05, gnorm=1.912, loss_scale=4, train_wall=11, gb_free=2.8, wall=184221
2021-06-20 21:49:18 | INFO | train_inner | epoch 006: 1059 / 3002 loss=2.523, ppl=5.75, wps=5840.5, ups=0.09, wpb=64812, bsz=128, num_updates=15975, lr=9.98802e-05, gnorm=1.883, loss_scale=4, train_wall=11, gb_free=2.8, wall=184232
2021-06-20 21:49:29 | INFO | train_inner | epoch 006: 1060 / 3002 loss=2.399, ppl=5.27, wps=5822.8, ups=0.09, wpb=64844, bsz=128, num_updates=15976, lr=9.98802e-05, gnorm=1.804, loss_scale=8, train_wall=11, gb_free=2.8, wall=184243
2021-06-20 21:49:40 | INFO | train_inner | epoch 006: 1061 / 3002 loss=2.564, ppl=5.91, wps=5981, ups=0.09, wpb=64794, bsz=128, num_updates=15977, lr=9.98802e-05, gnorm=1.819, loss_scale=8, train_wall=10, gb_free=2.8, wall=184254
2021-06-20 21:49:51 | INFO | train_inner | epoch 006: 1062 / 3002 loss=2.458, ppl=5.49, wps=5880.9, ups=0.09, wpb=64791, bsz=128, num_updates=15978, lr=9.98802e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=184265
2021-06-20 21:50:02 | INFO | train_inner | epoch 006: 1063 / 3002 loss=2.426, ppl=5.37, wps=5804.8, ups=0.09, wpb=64818, bsz=128, num_updates=15979, lr=9.98802e-05, gnorm=1.928, loss_scale=8, train_wall=11, gb_free=2.8, wall=184276
2021-06-20 21:50:13 | INFO | train_inner | epoch 006: 1064 / 3002 loss=2.463, ppl=5.51, wps=5818.3, ups=0.09, wpb=64830, bsz=128, num_updates=15980, lr=9.98802e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=184287
2021-06-20 21:50:24 | INFO | train_inner | epoch 006: 1065 / 3002 loss=2.394, ppl=5.25, wps=5752.5, ups=0.09, wpb=64861, bsz=128, num_updates=15981, lr=9.98801e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=184299
2021-06-20 21:50:35 | INFO | train_inner | epoch 006: 1066 / 3002 loss=2.512, ppl=5.7, wps=5823.2, ups=0.09, wpb=64827, bsz=128, num_updates=15982, lr=9.98801e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=184310
2021-06-20 21:50:46 | INFO | train_inner | epoch 006: 1067 / 3002 loss=2.379, ppl=5.2, wps=5909, ups=0.09, wpb=64844, bsz=128, num_updates=15983, lr=9.98801e-05, gnorm=2.08, loss_scale=8, train_wall=10, gb_free=2.8, wall=184321
2021-06-20 21:50:58 | INFO | train_inner | epoch 006: 1068 / 3002 loss=2.527, ppl=5.76, wps=5721.8, ups=0.09, wpb=64785, bsz=128, num_updates=15984, lr=9.98801e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=184332
2021-06-20 21:51:08 | INFO | train_inner | epoch 006: 1069 / 3002 loss=2.421, ppl=5.36, wps=6049.3, ups=0.09, wpb=64884, bsz=128, num_updates=15985, lr=9.98801e-05, gnorm=1.967, loss_scale=8, train_wall=10, gb_free=2.8, wall=184343
2021-06-20 21:51:20 | INFO | train_inner | epoch 006: 1070 / 3002 loss=2.367, ppl=5.16, wps=5886.9, ups=0.09, wpb=64862, bsz=128, num_updates=15986, lr=9.98801e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=184354
2021-06-20 21:51:31 | INFO | train_inner | epoch 006: 1071 / 3002 loss=2.47, ppl=5.54, wps=5816.4, ups=0.09, wpb=64873, bsz=128, num_updates=15987, lr=9.98801e-05, gnorm=1.822, loss_scale=8, train_wall=11, gb_free=2.8, wall=184365
2021-06-20 21:51:42 | INFO | train_inner | epoch 006: 1072 / 3002 loss=2.451, ppl=5.47, wps=5907.3, ups=0.09, wpb=64866, bsz=128, num_updates=15988, lr=9.98801e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=184376
2021-06-20 21:51:53 | INFO | train_inner | epoch 006: 1073 / 3002 loss=2.61, ppl=6.1, wps=5750.5, ups=0.09, wpb=64753, bsz=128, num_updates=15989, lr=9.98801e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=184387
2021-06-20 21:52:04 | INFO | train_inner | epoch 006: 1074 / 3002 loss=2.434, ppl=5.4, wps=5832.4, ups=0.09, wpb=64868, bsz=128, num_updates=15990, lr=9.98801e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=184398
2021-06-20 21:52:15 | INFO | train_inner | epoch 006: 1075 / 3002 loss=2.57, ppl=5.94, wps=5760.5, ups=0.09, wpb=64825, bsz=128, num_updates=15991, lr=9.98801e-05, gnorm=1.966, loss_scale=8, train_wall=11, gb_free=2.8, wall=184410
2021-06-20 21:52:26 | INFO | train_inner | epoch 006: 1076 / 3002 loss=2.632, ppl=6.2, wps=5823.8, ups=0.09, wpb=64809, bsz=128, num_updates=15992, lr=9.98801e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=184421
2021-06-20 21:52:37 | INFO | train_inner | epoch 006: 1077 / 3002 loss=2.408, ppl=5.31, wps=5936.9, ups=0.09, wpb=64874, bsz=128, num_updates=15993, lr=9.988e-05, gnorm=2.027, loss_scale=8, train_wall=10, gb_free=2.8, wall=184432
2021-06-20 21:52:49 | INFO | train_inner | epoch 006: 1078 / 3002 loss=2.604, ppl=6.08, wps=5742.6, ups=0.09, wpb=64773, bsz=128, num_updates=15994, lr=9.988e-05, gnorm=2.142, loss_scale=8, train_wall=11, gb_free=2.8, wall=184443
2021-06-20 21:53:00 | INFO | train_inner | epoch 006: 1079 / 3002 loss=2.483, ppl=5.59, wps=5860.6, ups=0.09, wpb=64810, bsz=128, num_updates=15995, lr=9.988e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=184454
2021-06-20 21:53:11 | INFO | train_inner | epoch 006: 1080 / 3002 loss=2.486, ppl=5.6, wps=5856, ups=0.09, wpb=64840, bsz=128, num_updates=15996, lr=9.988e-05, gnorm=1.926, loss_scale=8, train_wall=11, gb_free=2.8, wall=184465
2021-06-20 21:53:22 | INFO | train_inner | epoch 006: 1081 / 3002 loss=2.466, ppl=5.53, wps=5746.9, ups=0.09, wpb=64701, bsz=128, num_updates=15997, lr=9.988e-05, gnorm=1.892, loss_scale=8, train_wall=11, gb_free=2.8, wall=184476
2021-06-20 21:53:33 | INFO | train_inner | epoch 006: 1082 / 3002 loss=2.519, ppl=5.73, wps=5750.1, ups=0.09, wpb=64771, bsz=128, num_updates=15998, lr=9.988e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=184488
2021-06-20 21:53:45 | INFO | train_inner | epoch 006: 1083 / 3002 loss=2.533, ppl=5.79, wps=5740.1, ups=0.09, wpb=64804, bsz=128, num_updates=15999, lr=9.988e-05, gnorm=1.941, loss_scale=8, train_wall=11, gb_free=2.8, wall=184499
2021-06-20 21:53:56 | INFO | train_inner | epoch 006: 1084 / 3002 loss=2.456, ppl=5.49, wps=5891.2, ups=0.09, wpb=64746, bsz=128, num_updates=16000, lr=9.988e-05, gnorm=2.035, loss_scale=8, train_wall=11, gb_free=2.8, wall=184510
2021-06-20 21:54:07 | INFO | train_inner | epoch 006: 1085 / 3002 loss=2.466, ppl=5.52, wps=5778, ups=0.09, wpb=64804, bsz=128, num_updates=16001, lr=9.988e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=184521
2021-06-20 21:54:18 | INFO | train_inner | epoch 006: 1086 / 3002 loss=2.471, ppl=5.54, wps=5881.1, ups=0.09, wpb=64860, bsz=128, num_updates=16002, lr=9.988e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=184532
2021-06-20 21:54:29 | INFO | train_inner | epoch 006: 1087 / 3002 loss=2.491, ppl=5.62, wps=5812.6, ups=0.09, wpb=64814, bsz=128, num_updates=16003, lr=9.988e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=184543
2021-06-20 21:54:40 | INFO | train_inner | epoch 006: 1088 / 3002 loss=2.651, ppl=6.28, wps=5825.4, ups=0.09, wpb=64832, bsz=128, num_updates=16004, lr=9.988e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=184554
2021-06-20 21:54:51 | INFO | train_inner | epoch 006: 1089 / 3002 loss=2.353, ppl=5.11, wps=5782.6, ups=0.09, wpb=64848, bsz=128, num_updates=16005, lr=9.988e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=184566
2021-06-20 21:55:02 | INFO | train_inner | epoch 006: 1090 / 3002 loss=2.416, ppl=5.34, wps=5816.4, ups=0.09, wpb=64805, bsz=128, num_updates=16006, lr=9.98799e-05, gnorm=1.811, loss_scale=8, train_wall=11, gb_free=2.8, wall=184577
2021-06-20 21:55:13 | INFO | train_inner | epoch 006: 1091 / 3002 loss=2.469, ppl=5.54, wps=5938.9, ups=0.09, wpb=64870, bsz=128, num_updates=16007, lr=9.98799e-05, gnorm=1.897, loss_scale=8, train_wall=10, gb_free=2.8, wall=184588
2021-06-20 21:55:25 | INFO | train_inner | epoch 006: 1092 / 3002 loss=2.365, ppl=5.15, wps=5812.2, ups=0.09, wpb=64886, bsz=128, num_updates=16008, lr=9.98799e-05, gnorm=1.882, loss_scale=8, train_wall=11, gb_free=2.8, wall=184599
2021-06-20 21:55:36 | INFO | train_inner | epoch 006: 1093 / 3002 loss=2.41, ppl=5.31, wps=5832.6, ups=0.09, wpb=64813, bsz=128, num_updates=16009, lr=9.98799e-05, gnorm=1.953, loss_scale=8, train_wall=11, gb_free=2.8, wall=184610
2021-06-20 21:55:47 | INFO | train_inner | epoch 006: 1094 / 3002 loss=2.502, ppl=5.66, wps=5929.1, ups=0.09, wpb=64618, bsz=128, num_updates=16010, lr=9.98799e-05, gnorm=1.997, loss_scale=8, train_wall=10, gb_free=2.8, wall=184621
2021-06-20 21:55:58 | INFO | train_inner | epoch 006: 1095 / 3002 loss=2.599, ppl=6.06, wps=5817.4, ups=0.09, wpb=64856, bsz=128, num_updates=16011, lr=9.98799e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=184632
2021-06-20 21:56:09 | INFO | train_inner | epoch 006: 1096 / 3002 loss=2.573, ppl=5.95, wps=5874.7, ups=0.09, wpb=64943, bsz=128, num_updates=16012, lr=9.98799e-05, gnorm=1.976, loss_scale=8, train_wall=11, gb_free=2.8, wall=184643
2021-06-20 21:56:20 | INFO | train_inner | epoch 006: 1097 / 3002 loss=2.481, ppl=5.58, wps=5966.1, ups=0.09, wpb=64832, bsz=128, num_updates=16013, lr=9.98799e-05, gnorm=1.938, loss_scale=8, train_wall=10, gb_free=2.8, wall=184654
2021-06-20 21:56:31 | INFO | train_inner | epoch 006: 1098 / 3002 loss=2.532, ppl=5.78, wps=5924, ups=0.09, wpb=64898, bsz=128, num_updates=16014, lr=9.98799e-05, gnorm=1.939, loss_scale=8, train_wall=10, gb_free=2.8, wall=184665
2021-06-20 21:56:42 | INFO | train_inner | epoch 006: 1099 / 3002 loss=2.511, ppl=5.7, wps=5791.6, ups=0.09, wpb=64878, bsz=128, num_updates=16015, lr=9.98799e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=184676
2021-06-20 21:56:53 | INFO | train_inner | epoch 006: 1100 / 3002 loss=2.454, ppl=5.48, wps=5848, ups=0.09, wpb=64787, bsz=128, num_updates=16016, lr=9.98799e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=184687
2021-06-20 21:57:04 | INFO | train_inner | epoch 006: 1101 / 3002 loss=2.416, ppl=5.34, wps=5795.6, ups=0.09, wpb=64835, bsz=128, num_updates=16017, lr=9.98799e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=184698
2021-06-20 21:57:15 | INFO | train_inner | epoch 006: 1102 / 3002 loss=2.56, ppl=5.9, wps=5826, ups=0.09, wpb=64853, bsz=128, num_updates=16018, lr=9.98798e-05, gnorm=1.943, loss_scale=8, train_wall=11, gb_free=2.8, wall=184709
2021-06-20 21:57:26 | INFO | train_inner | epoch 006: 1103 / 3002 loss=2.539, ppl=5.81, wps=5830.2, ups=0.09, wpb=64895, bsz=128, num_updates=16019, lr=9.98798e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=184721
2021-06-20 21:57:38 | INFO | train_inner | epoch 006: 1104 / 3002 loss=2.519, ppl=5.73, wps=5765.6, ups=0.09, wpb=64772, bsz=128, num_updates=16020, lr=9.98798e-05, gnorm=1.897, loss_scale=8, train_wall=11, gb_free=2.8, wall=184732
2021-06-20 21:57:48 | INFO | train_inner | epoch 006: 1105 / 3002 loss=2.568, ppl=5.93, wps=6006.2, ups=0.09, wpb=64899, bsz=128, num_updates=16021, lr=9.98798e-05, gnorm=1.92, loss_scale=8, train_wall=10, gb_free=2.8, wall=184743
2021-06-20 21:57:59 | INFO | train_inner | epoch 006: 1106 / 3002 loss=2.39, ppl=5.24, wps=5905.2, ups=0.09, wpb=64939, bsz=128, num_updates=16022, lr=9.98798e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=184754
2021-06-20 21:58:10 | INFO | train_inner | epoch 006: 1107 / 3002 loss=2.492, ppl=5.63, wps=5863.2, ups=0.09, wpb=64778, bsz=128, num_updates=16023, lr=9.98798e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=184765
2021-06-20 21:58:22 | INFO | train_inner | epoch 006: 1108 / 3002 loss=2.437, ppl=5.41, wps=5737.1, ups=0.09, wpb=64801, bsz=128, num_updates=16024, lr=9.98798e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=184776
2021-06-20 21:58:33 | INFO | train_inner | epoch 006: 1109 / 3002 loss=2.434, ppl=5.4, wps=5806.4, ups=0.09, wpb=64836, bsz=128, num_updates=16025, lr=9.98798e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=184787
2021-06-20 21:58:44 | INFO | train_inner | epoch 006: 1110 / 3002 loss=2.53, ppl=5.78, wps=5930.3, ups=0.09, wpb=64890, bsz=128, num_updates=16026, lr=9.98798e-05, gnorm=2.021, loss_scale=8, train_wall=10, gb_free=2.8, wall=184798
2021-06-20 21:58:55 | INFO | train_inner | epoch 006: 1111 / 3002 loss=2.444, ppl=5.44, wps=5855.6, ups=0.09, wpb=64816, bsz=128, num_updates=16027, lr=9.98798e-05, gnorm=1.848, loss_scale=8, train_wall=11, gb_free=2.8, wall=184809
2021-06-20 21:59:06 | INFO | train_inner | epoch 006: 1112 / 3002 loss=2.359, ppl=5.13, wps=5797, ups=0.09, wpb=64800, bsz=128, num_updates=16028, lr=9.98798e-05, gnorm=1.912, loss_scale=8, train_wall=11, gb_free=2.8, wall=184820
2021-06-20 21:59:17 | INFO | train_inner | epoch 006: 1113 / 3002 loss=2.359, ppl=5.13, wps=5840.2, ups=0.09, wpb=64873, bsz=128, num_updates=16029, lr=9.98798e-05, gnorm=1.898, loss_scale=8, train_wall=11, gb_free=2.8, wall=184831
2021-06-20 21:59:28 | INFO | train_inner | epoch 006: 1114 / 3002 loss=2.461, ppl=5.51, wps=5841.3, ups=0.09, wpb=64701, bsz=128, num_updates=16030, lr=9.98798e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=184843
2021-06-20 21:59:39 | INFO | train_inner | epoch 006: 1115 / 3002 loss=2.467, ppl=5.53, wps=5896.6, ups=0.09, wpb=64867, bsz=128, num_updates=16031, lr=9.98797e-05, gnorm=1.872, loss_scale=8, train_wall=11, gb_free=2.8, wall=184854
2021-06-20 21:59:50 | INFO | train_inner | epoch 006: 1116 / 3002 loss=2.51, ppl=5.7, wps=5814.5, ups=0.09, wpb=64834, bsz=128, num_updates=16032, lr=9.98797e-05, gnorm=1.972, loss_scale=8, train_wall=11, gb_free=2.8, wall=184865
2021-06-20 22:00:01 | INFO | train_inner | epoch 006: 1117 / 3002 loss=2.503, ppl=5.67, wps=5959, ups=0.09, wpb=64875, bsz=128, num_updates=16033, lr=9.98797e-05, gnorm=1.91, loss_scale=8, train_wall=10, gb_free=2.8, wall=184876
2021-06-20 22:00:12 | INFO | train_inner | epoch 006: 1118 / 3002 loss=2.414, ppl=5.33, wps=5812.3, ups=0.09, wpb=64845, bsz=128, num_updates=16034, lr=9.98797e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=184887
2021-06-20 22:00:24 | INFO | train_inner | epoch 006: 1119 / 3002 loss=2.562, ppl=5.91, wps=5731.4, ups=0.09, wpb=64770, bsz=128, num_updates=16035, lr=9.98797e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=184898
2021-06-20 22:00:35 | INFO | train_inner | epoch 006: 1120 / 3002 loss=2.547, ppl=5.85, wps=5860.4, ups=0.09, wpb=64825, bsz=128, num_updates=16036, lr=9.98797e-05, gnorm=1.813, loss_scale=8, train_wall=11, gb_free=2.8, wall=184909
2021-06-20 22:00:46 | INFO | train_inner | epoch 006: 1121 / 3002 loss=2.423, ppl=5.36, wps=5874.4, ups=0.09, wpb=64791, bsz=128, num_updates=16037, lr=9.98797e-05, gnorm=2.302, loss_scale=8, train_wall=11, gb_free=2.8, wall=184920
2021-06-20 22:00:57 | INFO | train_inner | epoch 006: 1122 / 3002 loss=2.477, ppl=5.57, wps=5882.9, ups=0.09, wpb=64865, bsz=128, num_updates=16038, lr=9.98797e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=184931
2021-06-20 22:01:08 | INFO | train_inner | epoch 006: 1123 / 3002 loss=2.441, ppl=5.43, wps=5845.7, ups=0.09, wpb=64803, bsz=128, num_updates=16039, lr=9.98797e-05, gnorm=1.789, loss_scale=8, train_wall=11, gb_free=2.8, wall=184942
2021-06-20 22:01:19 | INFO | train_inner | epoch 006: 1124 / 3002 loss=2.498, ppl=5.65, wps=5746.2, ups=0.09, wpb=64837, bsz=128, num_updates=16040, lr=9.98797e-05, gnorm=1.874, loss_scale=8, train_wall=11, gb_free=2.8, wall=184954
2021-06-20 22:01:30 | INFO | train_inner | epoch 006: 1125 / 3002 loss=2.499, ppl=5.65, wps=5968.1, ups=0.09, wpb=64777, bsz=128, num_updates=16041, lr=9.98797e-05, gnorm=1.897, loss_scale=8, train_wall=10, gb_free=2.8, wall=184964
2021-06-20 22:01:41 | INFO | train_inner | epoch 006: 1126 / 3002 loss=2.37, ppl=5.17, wps=5830.7, ups=0.09, wpb=64850, bsz=128, num_updates=16042, lr=9.98797e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=184976
2021-06-20 22:01:52 | INFO | train_inner | epoch 006: 1127 / 3002 loss=2.539, ppl=5.81, wps=5722.3, ups=0.09, wpb=64768, bsz=128, num_updates=16043, lr=9.98796e-05, gnorm=1.944, loss_scale=8, train_wall=11, gb_free=2.8, wall=184987
2021-06-20 22:02:04 | INFO | train_inner | epoch 006: 1128 / 3002 loss=2.261, ppl=4.79, wps=5778, ups=0.09, wpb=64823, bsz=128, num_updates=16044, lr=9.98796e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=184998
2021-06-20 22:02:15 | INFO | train_inner | epoch 006: 1129 / 3002 loss=2.471, ppl=5.55, wps=5883.5, ups=0.09, wpb=64878, bsz=128, num_updates=16045, lr=9.98796e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=185009
2021-06-20 22:02:26 | INFO | train_inner | epoch 006: 1130 / 3002 loss=2.443, ppl=5.44, wps=5945.5, ups=0.09, wpb=64904, bsz=128, num_updates=16046, lr=9.98796e-05, gnorm=1.856, loss_scale=8, train_wall=10, gb_free=2.8, wall=185020
2021-06-20 22:02:37 | INFO | train_inner | epoch 006: 1131 / 3002 loss=2.366, ppl=5.16, wps=5941.9, ups=0.09, wpb=64883, bsz=128, num_updates=16047, lr=9.98796e-05, gnorm=1.937, loss_scale=8, train_wall=10, gb_free=2.8, wall=185031
2021-06-20 22:02:47 | INFO | train_inner | epoch 006: 1132 / 3002 loss=2.611, ppl=6.11, wps=5925.2, ups=0.09, wpb=64739, bsz=128, num_updates=16048, lr=9.98796e-05, gnorm=1.979, loss_scale=8, train_wall=10, gb_free=2.8, wall=185042
2021-06-20 22:02:59 | INFO | train_inner | epoch 006: 1133 / 3002 loss=2.588, ppl=6.01, wps=5762.5, ups=0.09, wpb=64806, bsz=128, num_updates=16049, lr=9.98796e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=185053
2021-06-20 22:03:10 | INFO | train_inner | epoch 006: 1134 / 3002 loss=2.591, ppl=6.03, wps=5905.7, ups=0.09, wpb=64813, bsz=128, num_updates=16050, lr=9.98796e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=185064
2021-06-20 22:03:21 | INFO | train_inner | epoch 006: 1135 / 3002 loss=2.408, ppl=5.31, wps=5855.1, ups=0.09, wpb=64926, bsz=128, num_updates=16051, lr=9.98796e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=185075
2021-06-20 22:03:32 | INFO | train_inner | epoch 006: 1136 / 3002 loss=2.393, ppl=5.25, wps=5889.4, ups=0.09, wpb=64883, bsz=128, num_updates=16052, lr=9.98796e-05, gnorm=1.88, loss_scale=8, train_wall=11, gb_free=2.8, wall=185086
2021-06-20 22:03:43 | INFO | train_inner | epoch 006: 1137 / 3002 loss=2.442, ppl=5.43, wps=5759.2, ups=0.09, wpb=64830, bsz=128, num_updates=16053, lr=9.98796e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=185097
2021-06-20 22:03:54 | INFO | train_inner | epoch 006: 1138 / 3002 loss=2.326, ppl=5.01, wps=5893.3, ups=0.09, wpb=64828, bsz=128, num_updates=16054, lr=9.98796e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=185108
2021-06-20 22:04:05 | INFO | train_inner | epoch 006: 1139 / 3002 loss=2.469, ppl=5.54, wps=5862.8, ups=0.09, wpb=64797, bsz=128, num_updates=16055, lr=9.98796e-05, gnorm=1.914, loss_scale=8, train_wall=11, gb_free=2.8, wall=185119
2021-06-20 22:04:16 | INFO | train_inner | epoch 006: 1140 / 3002 loss=2.653, ppl=6.29, wps=5815, ups=0.09, wpb=64689, bsz=128, num_updates=16056, lr=9.98795e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=185131
2021-06-20 22:04:27 | INFO | train_inner | epoch 006: 1141 / 3002 loss=2.516, ppl=5.72, wps=5862.3, ups=0.09, wpb=64908, bsz=128, num_updates=16057, lr=9.98795e-05, gnorm=1.878, loss_scale=8, train_wall=11, gb_free=2.8, wall=185142
2021-06-20 22:04:38 | INFO | train_inner | epoch 006: 1142 / 3002 loss=2.386, ppl=5.23, wps=5869.2, ups=0.09, wpb=64848, bsz=128, num_updates=16058, lr=9.98795e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=185153
2021-06-20 22:04:49 | INFO | train_inner | epoch 006: 1143 / 3002 loss=2.503, ppl=5.67, wps=5923.6, ups=0.09, wpb=64920, bsz=128, num_updates=16059, lr=9.98795e-05, gnorm=1.853, loss_scale=8, train_wall=11, gb_free=2.8, wall=185164
2021-06-20 22:05:00 | INFO | train_inner | epoch 006: 1144 / 3002 loss=2.516, ppl=5.72, wps=5925.9, ups=0.09, wpb=64788, bsz=128, num_updates=16060, lr=9.98795e-05, gnorm=1.873, loss_scale=8, train_wall=11, gb_free=2.8, wall=185175
2021-06-20 22:05:11 | INFO | train_inner | epoch 006: 1145 / 3002 loss=2.54, ppl=5.81, wps=5965.7, ups=0.09, wpb=64877, bsz=128, num_updates=16061, lr=9.98795e-05, gnorm=1.838, loss_scale=8, train_wall=10, gb_free=2.8, wall=185185
2021-06-20 22:05:22 | INFO | train_inner | epoch 006: 1146 / 3002 loss=2.466, ppl=5.52, wps=5819.8, ups=0.09, wpb=64810, bsz=128, num_updates=16062, lr=9.98795e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=185197
2021-06-20 22:05:33 | INFO | train_inner | epoch 006: 1147 / 3002 loss=2.543, ppl=5.83, wps=5925.8, ups=0.09, wpb=64787, bsz=128, num_updates=16063, lr=9.98795e-05, gnorm=1.911, loss_scale=8, train_wall=10, gb_free=2.8, wall=185208
2021-06-20 22:05:44 | INFO | train_inner | epoch 006: 1148 / 3002 loss=2.489, ppl=5.61, wps=5915.8, ups=0.09, wpb=64827, bsz=128, num_updates=16064, lr=9.98795e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=185219
2021-06-20 22:05:55 | INFO | train_inner | epoch 006: 1149 / 3002 loss=2.554, ppl=5.87, wps=5945.5, ups=0.09, wpb=64935, bsz=128, num_updates=16065, lr=9.98795e-05, gnorm=2.045, loss_scale=8, train_wall=10, gb_free=2.8, wall=185229
2021-06-20 22:06:06 | INFO | train_inner | epoch 006: 1150 / 3002 loss=2.463, ppl=5.51, wps=5945.2, ups=0.09, wpb=64951, bsz=128, num_updates=16066, lr=9.98795e-05, gnorm=1.889, loss_scale=8, train_wall=10, gb_free=2.8, wall=185240
2021-06-20 22:06:17 | INFO | train_inner | epoch 006: 1151 / 3002 loss=2.49, ppl=5.62, wps=5790.3, ups=0.09, wpb=64834, bsz=128, num_updates=16067, lr=9.98795e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=185252
2021-06-20 22:06:28 | INFO | train_inner | epoch 006: 1152 / 3002 loss=2.454, ppl=5.48, wps=5909.2, ups=0.09, wpb=64853, bsz=128, num_updates=16068, lr=9.98794e-05, gnorm=2.186, loss_scale=8, train_wall=11, gb_free=2.8, wall=185263
2021-06-20 22:06:39 | INFO | train_inner | epoch 006: 1153 / 3002 loss=2.514, ppl=5.71, wps=5886.1, ups=0.09, wpb=64874, bsz=128, num_updates=16069, lr=9.98794e-05, gnorm=1.982, loss_scale=8, train_wall=11, gb_free=2.8, wall=185274
2021-06-20 22:06:50 | INFO | train_inner | epoch 006: 1154 / 3002 loss=2.429, ppl=5.39, wps=5925.8, ups=0.09, wpb=64797, bsz=128, num_updates=16070, lr=9.98794e-05, gnorm=1.938, loss_scale=8, train_wall=10, gb_free=2.8, wall=185284
2021-06-20 22:07:01 | INFO | train_inner | epoch 006: 1155 / 3002 loss=2.491, ppl=5.62, wps=5921.6, ups=0.09, wpb=64812, bsz=128, num_updates=16071, lr=9.98794e-05, gnorm=1.898, loss_scale=8, train_wall=10, gb_free=2.8, wall=185295
2021-06-20 22:07:12 | INFO | train_inner | epoch 006: 1156 / 3002 loss=2.392, ppl=5.25, wps=5879.7, ups=0.09, wpb=64895, bsz=128, num_updates=16072, lr=9.98794e-05, gnorm=1.886, loss_scale=8, train_wall=11, gb_free=2.8, wall=185306
2021-06-20 22:07:23 | INFO | train_inner | epoch 006: 1157 / 3002 loss=2.557, ppl=5.88, wps=5878, ups=0.09, wpb=64922, bsz=128, num_updates=16073, lr=9.98794e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=185318
2021-06-20 22:07:34 | INFO | train_inner | epoch 006: 1158 / 3002 loss=2.414, ppl=5.33, wps=5854.1, ups=0.09, wpb=64777, bsz=128, num_updates=16074, lr=9.98794e-05, gnorm=1.934, loss_scale=8, train_wall=11, gb_free=2.8, wall=185329
2021-06-20 22:07:45 | INFO | train_inner | epoch 006: 1159 / 3002 loss=2.623, ppl=6.16, wps=5846.4, ups=0.09, wpb=64839, bsz=128, num_updates=16075, lr=9.98794e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=185340
2021-06-20 22:07:56 | INFO | train_inner | epoch 006: 1160 / 3002 loss=2.602, ppl=6.07, wps=5901, ups=0.09, wpb=64767, bsz=128, num_updates=16076, lr=9.98794e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=185351
2021-06-20 22:08:07 | INFO | train_inner | epoch 006: 1161 / 3002 loss=2.563, ppl=5.91, wps=5800.2, ups=0.09, wpb=64769, bsz=128, num_updates=16077, lr=9.98794e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=185362
2021-06-20 22:08:18 | INFO | train_inner | epoch 006: 1162 / 3002 loss=2.447, ppl=5.45, wps=5929.4, ups=0.09, wpb=64850, bsz=128, num_updates=16078, lr=9.98794e-05, gnorm=1.916, loss_scale=8, train_wall=10, gb_free=2.8, wall=185373
2021-06-20 22:08:30 | INFO | train_inner | epoch 006: 1163 / 3002 loss=2.543, ppl=5.83, wps=5794, ups=0.09, wpb=64791, bsz=128, num_updates=16079, lr=9.98794e-05, gnorm=1.884, loss_scale=8, train_wall=11, gb_free=2.8, wall=185384
2021-06-20 22:08:41 | INFO | train_inner | epoch 006: 1164 / 3002 loss=2.52, ppl=5.74, wps=5849.4, ups=0.09, wpb=64849, bsz=128, num_updates=16080, lr=9.98794e-05, gnorm=1.847, loss_scale=8, train_wall=11, gb_free=2.8, wall=185395
2021-06-20 22:08:52 | INFO | train_inner | epoch 006: 1165 / 3002 loss=2.515, ppl=5.72, wps=5900.1, ups=0.09, wpb=64860, bsz=128, num_updates=16081, lr=9.98793e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=185406
2021-06-20 22:09:03 | INFO | train_inner | epoch 006: 1166 / 3002 loss=2.325, ppl=5.01, wps=5816, ups=0.09, wpb=64866, bsz=128, num_updates=16082, lr=9.98793e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=185417
2021-06-20 22:09:14 | INFO | train_inner | epoch 006: 1167 / 3002 loss=2.516, ppl=5.72, wps=5788.2, ups=0.09, wpb=64763, bsz=128, num_updates=16083, lr=9.98793e-05, gnorm=1.931, loss_scale=8, train_wall=11, gb_free=2.8, wall=185428
2021-06-20 22:09:25 | INFO | train_inner | epoch 006: 1168 / 3002 loss=2.633, ppl=6.2, wps=5929.9, ups=0.09, wpb=64789, bsz=128, num_updates=16084, lr=9.98793e-05, gnorm=1.976, loss_scale=8, train_wall=10, gb_free=2.8, wall=185439
2021-06-20 22:09:36 | INFO | train_inner | epoch 006: 1169 / 3002 loss=2.315, ppl=4.98, wps=5974.5, ups=0.09, wpb=64846, bsz=128, num_updates=16085, lr=9.98793e-05, gnorm=1.895, loss_scale=8, train_wall=10, gb_free=2.8, wall=185450
2021-06-20 22:09:47 | INFO | train_inner | epoch 006: 1170 / 3002 loss=2.573, ppl=5.95, wps=5947.9, ups=0.09, wpb=64832, bsz=128, num_updates=16086, lr=9.98793e-05, gnorm=2.011, loss_scale=8, train_wall=10, gb_free=2.8, wall=185461
2021-06-20 22:09:58 | INFO | train_inner | epoch 006: 1171 / 3002 loss=2.495, ppl=5.64, wps=5779.5, ups=0.09, wpb=64817, bsz=128, num_updates=16087, lr=9.98793e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=185472
2021-06-20 22:10:09 | INFO | train_inner | epoch 006: 1172 / 3002 loss=2.589, ppl=6.02, wps=5781.5, ups=0.09, wpb=64804, bsz=128, num_updates=16088, lr=9.98793e-05, gnorm=1.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=185483
2021-06-20 22:10:20 | INFO | train_inner | epoch 006: 1173 / 3002 loss=2.569, ppl=5.93, wps=5723.6, ups=0.09, wpb=64827, bsz=128, num_updates=16089, lr=9.98793e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=185495
2021-06-20 22:10:32 | INFO | train_inner | epoch 006: 1174 / 3002 loss=2.588, ppl=6.01, wps=5841.4, ups=0.09, wpb=64787, bsz=128, num_updates=16090, lr=9.98793e-05, gnorm=1.888, loss_scale=8, train_wall=11, gb_free=2.8, wall=185506
2021-06-20 22:10:42 | INFO | train_inner | epoch 006: 1175 / 3002 loss=2.44, ppl=5.43, wps=5967.4, ups=0.09, wpb=64935, bsz=128, num_updates=16091, lr=9.98793e-05, gnorm=1.913, loss_scale=8, train_wall=10, gb_free=2.8, wall=185517
2021-06-20 22:10:54 | INFO | train_inner | epoch 006: 1176 / 3002 loss=2.568, ppl=5.93, wps=5843.9, ups=0.09, wpb=64771, bsz=128, num_updates=16092, lr=9.98793e-05, gnorm=1.962, loss_scale=8, train_wall=11, gb_free=2.8, wall=185528
2021-06-20 22:11:04 | INFO | train_inner | epoch 006: 1177 / 3002 loss=2.556, ppl=5.88, wps=5932.4, ups=0.09, wpb=64765, bsz=128, num_updates=16093, lr=9.98792e-05, gnorm=1.891, loss_scale=8, train_wall=10, gb_free=2.8, wall=185539
2021-06-20 22:11:16 | INFO | train_inner | epoch 006: 1178 / 3002 loss=2.336, ppl=5.05, wps=5821, ups=0.09, wpb=64905, bsz=128, num_updates=16094, lr=9.98792e-05, gnorm=1.849, loss_scale=8, train_wall=11, gb_free=2.8, wall=185550
2021-06-20 22:11:27 | INFO | train_inner | epoch 006: 1179 / 3002 loss=2.454, ppl=5.48, wps=5925.4, ups=0.09, wpb=64831, bsz=128, num_updates=16095, lr=9.98792e-05, gnorm=1.897, loss_scale=8, train_wall=11, gb_free=2.8, wall=185561
2021-06-20 22:11:38 | INFO | train_inner | epoch 006: 1180 / 3002 loss=2.496, ppl=5.64, wps=5814.9, ups=0.09, wpb=64876, bsz=128, num_updates=16096, lr=9.98792e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=185572
2021-06-20 22:11:49 | INFO | train_inner | epoch 006: 1181 / 3002 loss=2.348, ppl=5.09, wps=5855.4, ups=0.09, wpb=64816, bsz=128, num_updates=16097, lr=9.98792e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=185583
2021-06-20 22:12:00 | INFO | train_inner | epoch 006: 1182 / 3002 loss=2.45, ppl=5.46, wps=5824.4, ups=0.09, wpb=64776, bsz=128, num_updates=16098, lr=9.98792e-05, gnorm=1.908, loss_scale=8, train_wall=11, gb_free=2.8, wall=185594
2021-06-20 22:12:11 | INFO | train_inner | epoch 006: 1183 / 3002 loss=2.425, ppl=5.37, wps=5936.8, ups=0.09, wpb=64842, bsz=128, num_updates=16099, lr=9.98792e-05, gnorm=1.963, loss_scale=8, train_wall=10, gb_free=2.8, wall=185605
2021-06-20 22:12:22 | INFO | train_inner | epoch 006: 1184 / 3002 loss=2.454, ppl=5.48, wps=5828.7, ups=0.09, wpb=64895, bsz=128, num_updates=16100, lr=9.98792e-05, gnorm=2.009, loss_scale=8, train_wall=11, gb_free=2.8, wall=185616
2021-06-20 22:12:33 | INFO | train_inner | epoch 006: 1185 / 3002 loss=2.508, ppl=5.69, wps=5858.5, ups=0.09, wpb=64700, bsz=128, num_updates=16101, lr=9.98792e-05, gnorm=1.991, loss_scale=8, train_wall=11, gb_free=2.8, wall=185627
2021-06-20 22:12:44 | INFO | train_inner | epoch 006: 1186 / 3002 loss=2.528, ppl=5.77, wps=5903.8, ups=0.09, wpb=64768, bsz=128, num_updates=16102, lr=9.98792e-05, gnorm=1.921, loss_scale=8, train_wall=11, gb_free=2.8, wall=185638
2021-06-20 22:12:55 | INFO | train_inner | epoch 006: 1187 / 3002 loss=2.598, ppl=6.05, wps=5840.2, ups=0.09, wpb=64798, bsz=128, num_updates=16103, lr=9.98792e-05, gnorm=1.86, loss_scale=8, train_wall=11, gb_free=2.8, wall=185649
2021-06-20 22:13:06 | INFO | train_inner | epoch 006: 1188 / 3002 loss=2.427, ppl=5.38, wps=5925.1, ups=0.09, wpb=64882, bsz=128, num_updates=16104, lr=9.98792e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=185660
2021-06-20 22:13:17 | INFO | train_inner | epoch 006: 1189 / 3002 loss=2.478, ppl=5.57, wps=5932.6, ups=0.09, wpb=64835, bsz=128, num_updates=16105, lr=9.98792e-05, gnorm=1.915, loss_scale=16, train_wall=10, gb_free=2.8, wall=185671
2021-06-20 22:13:28 | INFO | train_inner | epoch 006: 1190 / 3002 loss=2.381, ppl=5.21, wps=5870.1, ups=0.09, wpb=64846, bsz=128, num_updates=16106, lr=9.98791e-05, gnorm=2.05, loss_scale=16, train_wall=11, gb_free=2.8, wall=185682
2021-06-20 22:13:39 | INFO | train_inner | epoch 006: 1191 / 3002 loss=2.566, ppl=5.92, wps=5915.2, ups=0.09, wpb=64870, bsz=128, num_updates=16107, lr=9.98791e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=185693
2021-06-20 22:13:50 | INFO | train_inner | epoch 006: 1192 / 3002 loss=2.591, ppl=6.02, wps=5888.3, ups=0.09, wpb=64839, bsz=128, num_updates=16108, lr=9.98791e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=185704
2021-06-20 22:14:01 | INFO | train_inner | epoch 006: 1193 / 3002 loss=2.403, ppl=5.29, wps=5807.1, ups=0.09, wpb=64828, bsz=128, num_updates=16109, lr=9.98791e-05, gnorm=1.999, loss_scale=16, train_wall=11, gb_free=2.8, wall=185715
2021-06-20 22:14:12 | INFO | train_inner | epoch 006: 1194 / 3002 loss=2.406, ppl=5.3, wps=5724.9, ups=0.09, wpb=64848, bsz=128, num_updates=16110, lr=9.98791e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=185727
2021-06-20 22:14:24 | INFO | train_inner | epoch 006: 1195 / 3002 loss=2.506, ppl=5.68, wps=5847.4, ups=0.09, wpb=64846, bsz=128, num_updates=16111, lr=9.98791e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=185738
2021-06-20 22:14:35 | INFO | train_inner | epoch 006: 1196 / 3002 loss=2.392, ppl=5.25, wps=5854.9, ups=0.09, wpb=64855, bsz=128, num_updates=16112, lr=9.98791e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=185749
2021-06-20 22:14:46 | INFO | train_inner | epoch 006: 1197 / 3002 loss=2.597, ppl=6.05, wps=5890.4, ups=0.09, wpb=64852, bsz=128, num_updates=16113, lr=9.98791e-05, gnorm=1.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=185760
2021-06-20 22:14:57 | INFO | train_inner | epoch 006: 1198 / 3002 loss=2.404, ppl=5.29, wps=5745.1, ups=0.09, wpb=64794, bsz=128, num_updates=16114, lr=9.98791e-05, gnorm=1.989, loss_scale=16, train_wall=11, gb_free=2.8, wall=185771
2021-06-20 22:15:08 | INFO | train_inner | epoch 006: 1199 / 3002 loss=2.495, ppl=5.64, wps=5826.4, ups=0.09, wpb=64853, bsz=128, num_updates=16115, lr=9.98791e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=185782
2021-06-20 22:15:19 | INFO | train_inner | epoch 006: 1200 / 3002 loss=2.498, ppl=5.65, wps=5782.1, ups=0.09, wpb=64861, bsz=128, num_updates=16116, lr=9.98791e-05, gnorm=2.013, loss_scale=16, train_wall=11, gb_free=2.8, wall=185794
2021-06-20 22:15:30 | INFO | train_inner | epoch 006: 1201 / 3002 loss=2.489, ppl=5.61, wps=5841.4, ups=0.09, wpb=64855, bsz=128, num_updates=16117, lr=9.98791e-05, gnorm=1.859, loss_scale=16, train_wall=11, gb_free=2.8, wall=185805
2021-06-20 22:15:42 | INFO | train_inner | epoch 006: 1202 / 3002 loss=2.324, ppl=5.01, wps=5755.8, ups=0.09, wpb=64756, bsz=128, num_updates=16118, lr=9.9879e-05, gnorm=1.888, loss_scale=16, train_wall=11, gb_free=2.8, wall=185816
2021-06-20 22:15:53 | INFO | train_inner | epoch 006: 1203 / 3002 loss=2.498, ppl=5.65, wps=5861.1, ups=0.09, wpb=64913, bsz=128, num_updates=16119, lr=9.9879e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=185827
2021-06-20 22:16:04 | INFO | train_inner | epoch 006: 1204 / 3002 loss=2.574, ppl=5.96, wps=5850.1, ups=0.09, wpb=64754, bsz=128, num_updates=16120, lr=9.9879e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=185838
2021-06-20 22:16:15 | INFO | train_inner | epoch 006: 1205 / 3002 loss=2.391, ppl=5.25, wps=5765.5, ups=0.09, wpb=64899, bsz=128, num_updates=16121, lr=9.9879e-05, gnorm=1.915, loss_scale=16, train_wall=11, gb_free=2.8, wall=185849
2021-06-20 22:16:26 | INFO | train_inner | epoch 006: 1206 / 3002 loss=2.367, ppl=5.16, wps=5773.6, ups=0.09, wpb=64818, bsz=128, num_updates=16122, lr=9.9879e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=185861
2021-06-20 22:16:37 | INFO | train_inner | epoch 006: 1207 / 3002 loss=2.423, ppl=5.36, wps=5898.8, ups=0.09, wpb=64793, bsz=128, num_updates=16123, lr=9.9879e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=185872
2021-06-20 22:16:48 | INFO | train_inner | epoch 006: 1208 / 3002 loss=2.403, ppl=5.29, wps=5825.5, ups=0.09, wpb=64831, bsz=128, num_updates=16124, lr=9.9879e-05, gnorm=1.94, loss_scale=16, train_wall=11, gb_free=2.8, wall=185883
2021-06-20 22:16:59 | INFO | train_inner | epoch 006: 1209 / 3002 loss=2.352, ppl=5.1, wps=5870.8, ups=0.09, wpb=64895, bsz=128, num_updates=16125, lr=9.9879e-05, gnorm=1.872, loss_scale=16, train_wall=11, gb_free=2.8, wall=185894
2021-06-20 22:17:10 | INFO | train_inner | epoch 006: 1210 / 3002 loss=2.49, ppl=5.62, wps=5891.5, ups=0.09, wpb=64836, bsz=128, num_updates=16126, lr=9.9879e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=185905
2021-06-20 22:17:21 | INFO | train_inner | epoch 006: 1211 / 3002 loss=2.486, ppl=5.6, wps=5965.3, ups=0.09, wpb=64870, bsz=128, num_updates=16127, lr=9.9879e-05, gnorm=1.897, loss_scale=16, train_wall=10, gb_free=2.8, wall=185916
2021-06-20 22:17:32 | INFO | train_inner | epoch 006: 1212 / 3002 loss=2.489, ppl=5.62, wps=5855.2, ups=0.09, wpb=64881, bsz=128, num_updates=16128, lr=9.9879e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=185927
2021-06-20 22:17:43 | INFO | train_inner | epoch 006: 1213 / 3002 loss=2.419, ppl=5.35, wps=5846.8, ups=0.09, wpb=64904, bsz=128, num_updates=16129, lr=9.9879e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=185938
2021-06-20 22:17:55 | INFO | train_inner | epoch 006: 1214 / 3002 loss=2.618, ppl=6.14, wps=5763, ups=0.09, wpb=64774, bsz=128, num_updates=16130, lr=9.9879e-05, gnorm=1.915, loss_scale=16, train_wall=11, gb_free=2.8, wall=185949
2021-06-20 22:18:06 | INFO | train_inner | epoch 006: 1215 / 3002 loss=2.497, ppl=5.64, wps=5929.4, ups=0.09, wpb=64833, bsz=128, num_updates=16131, lr=9.98789e-05, gnorm=1.945, loss_scale=16, train_wall=10, gb_free=2.8, wall=185960
2021-06-20 22:18:17 | INFO | train_inner | epoch 006: 1216 / 3002 loss=2.403, ppl=5.29, wps=5850.8, ups=0.09, wpb=64853, bsz=128, num_updates=16132, lr=9.98789e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=185971
2021-06-20 22:18:28 | INFO | train_inner | epoch 006: 1217 / 3002 loss=2.508, ppl=5.69, wps=5891.9, ups=0.09, wpb=64779, bsz=128, num_updates=16133, lr=9.98789e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=185982
2021-06-20 22:18:39 | INFO | train_inner | epoch 006: 1218 / 3002 loss=2.407, ppl=5.31, wps=5938.7, ups=0.09, wpb=64835, bsz=128, num_updates=16134, lr=9.98789e-05, gnorm=1.932, loss_scale=16, train_wall=10, gb_free=2.8, wall=185993
2021-06-20 22:18:50 | INFO | train_inner | epoch 006: 1219 / 3002 loss=2.477, ppl=5.57, wps=5786, ups=0.09, wpb=64698, bsz=128, num_updates=16135, lr=9.98789e-05, gnorm=1.828, loss_scale=16, train_wall=11, gb_free=2.8, wall=186004
2021-06-20 22:19:01 | INFO | train_inner | epoch 006: 1220 / 3002 loss=2.331, ppl=5.03, wps=5888.2, ups=0.09, wpb=64886, bsz=128, num_updates=16136, lr=9.98789e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=186015
2021-06-20 22:19:12 | INFO | train_inner | epoch 006: 1221 / 3002 loss=2.523, ppl=5.75, wps=5678.9, ups=0.09, wpb=64847, bsz=128, num_updates=16137, lr=9.98789e-05, gnorm=1.886, loss_scale=16, train_wall=11, gb_free=2.8, wall=186027
2021-06-20 22:19:24 | INFO | train_inner | epoch 006: 1222 / 3002 loss=2.497, ppl=5.65, wps=5729.6, ups=0.09, wpb=64784, bsz=128, num_updates=16138, lr=9.98789e-05, gnorm=1.953, loss_scale=16, train_wall=11, gb_free=2.8, wall=186038
2021-06-20 22:19:34 | INFO | train_inner | epoch 006: 1223 / 3002 loss=2.435, ppl=5.41, wps=5956.1, ups=0.09, wpb=64857, bsz=128, num_updates=16139, lr=9.98789e-05, gnorm=1.955, loss_scale=16, train_wall=10, gb_free=2.8, wall=186049
2021-06-20 22:19:46 | INFO | train_inner | epoch 006: 1224 / 3002 loss=2.348, ppl=5.09, wps=5772.9, ups=0.09, wpb=64860, bsz=128, num_updates=16140, lr=9.98789e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=186060
2021-06-20 22:19:57 | INFO | train_inner | epoch 006: 1225 / 3002 loss=2.501, ppl=5.66, wps=5836.1, ups=0.09, wpb=64789, bsz=128, num_updates=16141, lr=9.98789e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=186071
2021-06-20 22:20:08 | INFO | train_inner | epoch 006: 1226 / 3002 loss=2.444, ppl=5.44, wps=5848.8, ups=0.09, wpb=64847, bsz=128, num_updates=16142, lr=9.98789e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=186082
2021-06-20 22:20:19 | INFO | train_inner | epoch 006: 1227 / 3002 loss=2.458, ppl=5.5, wps=5880.6, ups=0.09, wpb=64891, bsz=128, num_updates=16143, lr=9.98788e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=186093
2021-06-20 22:20:30 | INFO | train_inner | epoch 006: 1228 / 3002 loss=2.398, ppl=5.27, wps=5839.9, ups=0.09, wpb=64815, bsz=128, num_updates=16144, lr=9.98788e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=186104
2021-06-20 22:20:41 | INFO | train_inner | epoch 006: 1229 / 3002 loss=2.559, ppl=5.89, wps=5818.1, ups=0.09, wpb=64790, bsz=128, num_updates=16145, lr=9.98788e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=186115
2021-06-20 22:20:52 | INFO | train_inner | epoch 006: 1230 / 3002 loss=2.46, ppl=5.5, wps=5928.1, ups=0.09, wpb=64848, bsz=128, num_updates=16146, lr=9.98788e-05, gnorm=1.951, loss_scale=16, train_wall=10, gb_free=2.8, wall=186126
2021-06-20 22:21:03 | INFO | train_inner | epoch 006: 1231 / 3002 loss=2.501, ppl=5.66, wps=5896.5, ups=0.09, wpb=64754, bsz=128, num_updates=16147, lr=9.98788e-05, gnorm=1.912, loss_scale=16, train_wall=11, gb_free=2.8, wall=186137
2021-06-20 22:21:14 | INFO | train_inner | epoch 006: 1232 / 3002 loss=2.702, ppl=6.51, wps=5869.7, ups=0.09, wpb=64862, bsz=128, num_updates=16148, lr=9.98788e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=186148
2021-06-20 22:21:25 | INFO | train_inner | epoch 006: 1233 / 3002 loss=2.487, ppl=5.61, wps=5939.7, ups=0.09, wpb=64826, bsz=128, num_updates=16149, lr=9.98788e-05, gnorm=1.908, loss_scale=16, train_wall=10, gb_free=2.8, wall=186159
2021-06-20 22:21:36 | INFO | train_inner | epoch 006: 1234 / 3002 loss=2.446, ppl=5.45, wps=5843.2, ups=0.09, wpb=64856, bsz=128, num_updates=16150, lr=9.98788e-05, gnorm=2.075, loss_scale=16, train_wall=11, gb_free=2.8, wall=186170
2021-06-20 22:21:47 | INFO | train_inner | epoch 006: 1235 / 3002 loss=2.519, ppl=5.73, wps=5934.1, ups=0.09, wpb=64865, bsz=128, num_updates=16151, lr=9.98788e-05, gnorm=1.924, loss_scale=16, train_wall=10, gb_free=2.8, wall=186181
2021-06-20 22:21:58 | INFO | train_inner | epoch 006: 1236 / 3002 loss=2.425, ppl=5.37, wps=5982, ups=0.09, wpb=64789, bsz=128, num_updates=16152, lr=9.98788e-05, gnorm=1.915, loss_scale=16, train_wall=10, gb_free=2.8, wall=186192
2021-06-20 22:22:09 | INFO | train_inner | epoch 006: 1237 / 3002 loss=2.477, ppl=5.57, wps=5875.4, ups=0.09, wpb=64866, bsz=128, num_updates=16153, lr=9.98788e-05, gnorm=1.874, loss_scale=16, train_wall=11, gb_free=2.8, wall=186203
2021-06-20 22:22:20 | INFO | train_inner | epoch 006: 1238 / 3002 loss=2.444, ppl=5.44, wps=5935.1, ups=0.09, wpb=64924, bsz=128, num_updates=16154, lr=9.98788e-05, gnorm=1.92, loss_scale=16, train_wall=10, gb_free=2.8, wall=186214
2021-06-20 22:22:31 | INFO | train_inner | epoch 006: 1239 / 3002 loss=2.399, ppl=5.27, wps=5883.1, ups=0.09, wpb=64789, bsz=128, num_updates=16155, lr=9.98788e-05, gnorm=1.838, loss_scale=16, train_wall=11, gb_free=2.8, wall=186225
2021-06-20 22:22:42 | INFO | train_inner | epoch 006: 1240 / 3002 loss=2.362, ppl=5.14, wps=5781.6, ups=0.09, wpb=64827, bsz=128, num_updates=16156, lr=9.98787e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=186236
2021-06-20 22:22:53 | INFO | train_inner | epoch 006: 1241 / 3002 loss=2.367, ppl=5.16, wps=5926, ups=0.09, wpb=64814, bsz=128, num_updates=16157, lr=9.98787e-05, gnorm=1.851, loss_scale=16, train_wall=10, gb_free=2.8, wall=186247
2021-06-20 22:23:04 | INFO | train_inner | epoch 006: 1242 / 3002 loss=2.482, ppl=5.59, wps=5714.7, ups=0.09, wpb=64740, bsz=128, num_updates=16158, lr=9.98787e-05, gnorm=1.837, loss_scale=16, train_wall=11, gb_free=2.8, wall=186259
2021-06-20 22:23:15 | INFO | train_inner | epoch 006: 1243 / 3002 loss=2.385, ppl=5.22, wps=5890.9, ups=0.09, wpb=64864, bsz=128, num_updates=16159, lr=9.98787e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=186270
2021-06-20 22:23:26 | INFO | train_inner | epoch 006: 1244 / 3002 loss=2.446, ppl=5.45, wps=5921.7, ups=0.09, wpb=64847, bsz=128, num_updates=16160, lr=9.98787e-05, gnorm=1.881, loss_scale=16, train_wall=10, gb_free=2.8, wall=186281
2021-06-20 22:23:37 | INFO | train_inner | epoch 006: 1245 / 3002 loss=2.344, ppl=5.08, wps=5894.3, ups=0.09, wpb=64868, bsz=128, num_updates=16161, lr=9.98787e-05, gnorm=1.851, loss_scale=16, train_wall=11, gb_free=2.8, wall=186292
2021-06-20 22:23:49 | INFO | train_inner | epoch 006: 1246 / 3002 loss=2.579, ppl=5.98, wps=5759.5, ups=0.09, wpb=64867, bsz=128, num_updates=16162, lr=9.98787e-05, gnorm=2.018, loss_scale=16, train_wall=11, gb_free=2.8, wall=186303
2021-06-20 22:24:00 | INFO | train_inner | epoch 006: 1247 / 3002 loss=2.584, ppl=6, wps=5862, ups=0.09, wpb=64782, bsz=128, num_updates=16163, lr=9.98787e-05, gnorm=2.005, loss_scale=16, train_wall=11, gb_free=2.8, wall=186314
2021-06-20 22:24:11 | INFO | train_inner | epoch 006: 1248 / 3002 loss=2.48, ppl=5.58, wps=5818.5, ups=0.09, wpb=64827, bsz=128, num_updates=16164, lr=9.98787e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=186325
2021-06-20 22:24:22 | INFO | train_inner | epoch 006: 1249 / 3002 loss=2.345, ppl=5.08, wps=5816.9, ups=0.09, wpb=64836, bsz=128, num_updates=16165, lr=9.98787e-05, gnorm=1.878, loss_scale=16, train_wall=11, gb_free=2.8, wall=186336
2021-06-20 22:24:33 | INFO | train_inner | epoch 006: 1250 / 3002 loss=2.418, ppl=5.34, wps=5834.8, ups=0.09, wpb=64835, bsz=128, num_updates=16166, lr=9.98787e-05, gnorm=1.895, loss_scale=16, train_wall=11, gb_free=2.8, wall=186347
2021-06-20 22:24:44 | INFO | train_inner | epoch 006: 1251 / 3002 loss=2.521, ppl=5.74, wps=5781.7, ups=0.09, wpb=64830, bsz=128, num_updates=16167, lr=9.98787e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=186359
2021-06-20 22:24:55 | INFO | train_inner | epoch 006: 1252 / 3002 loss=2.382, ppl=5.21, wps=5894.3, ups=0.09, wpb=64898, bsz=128, num_updates=16168, lr=9.98786e-05, gnorm=1.937, loss_scale=16, train_wall=11, gb_free=2.8, wall=186370
2021-06-20 22:25:06 | INFO | train_inner | epoch 006: 1253 / 3002 loss=2.491, ppl=5.62, wps=5983, ups=0.09, wpb=64818, bsz=128, num_updates=16169, lr=9.98786e-05, gnorm=1.948, loss_scale=16, train_wall=10, gb_free=2.8, wall=186380
2021-06-20 22:25:17 | INFO | train_inner | epoch 006: 1254 / 3002 loss=2.523, ppl=5.75, wps=5887.7, ups=0.09, wpb=64867, bsz=128, num_updates=16170, lr=9.98786e-05, gnorm=1.948, loss_scale=16, train_wall=11, gb_free=2.8, wall=186391
2021-06-20 22:25:28 | INFO | train_inner | epoch 006: 1255 / 3002 loss=2.231, ppl=4.69, wps=6011.8, ups=0.09, wpb=64841, bsz=128, num_updates=16171, lr=9.98786e-05, gnorm=1.85, loss_scale=16, train_wall=10, gb_free=2.8, wall=186402
2021-06-20 22:25:39 | INFO | train_inner | epoch 006: 1256 / 3002 loss=2.47, ppl=5.54, wps=5826.4, ups=0.09, wpb=64868, bsz=128, num_updates=16172, lr=9.98786e-05, gnorm=1.96, loss_scale=16, train_wall=11, gb_free=2.8, wall=186413
2021-06-20 22:25:50 | INFO | train_inner | epoch 006: 1257 / 3002 loss=2.396, ppl=5.26, wps=5864.3, ups=0.09, wpb=64868, bsz=128, num_updates=16173, lr=9.98786e-05, gnorm=1.836, loss_scale=16, train_wall=11, gb_free=2.8, wall=186424
2021-06-20 22:26:01 | INFO | train_inner | epoch 006: 1258 / 3002 loss=2.436, ppl=5.41, wps=5878.7, ups=0.09, wpb=64820, bsz=128, num_updates=16174, lr=9.98786e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=186435
2021-06-20 22:26:12 | INFO | train_inner | epoch 006: 1259 / 3002 loss=2.459, ppl=5.5, wps=5794.8, ups=0.09, wpb=64766, bsz=128, num_updates=16175, lr=9.98786e-05, gnorm=2.395, loss_scale=16, train_wall=11, gb_free=2.8, wall=186447
2021-06-20 22:26:23 | INFO | train_inner | epoch 006: 1260 / 3002 loss=2.435, ppl=5.41, wps=5945, ups=0.09, wpb=64851, bsz=128, num_updates=16176, lr=9.98786e-05, gnorm=1.879, loss_scale=16, train_wall=10, gb_free=2.8, wall=186458
2021-06-20 22:26:34 | INFO | train_inner | epoch 006: 1261 / 3002 loss=2.517, ppl=5.72, wps=5905, ups=0.09, wpb=64842, bsz=128, num_updates=16177, lr=9.98786e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=186469
2021-06-20 22:26:45 | INFO | train_inner | epoch 006: 1262 / 3002 loss=2.488, ppl=5.61, wps=5916.7, ups=0.09, wpb=64792, bsz=128, num_updates=16178, lr=9.98786e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=186479
2021-06-20 22:26:56 | INFO | train_inner | epoch 006: 1263 / 3002 loss=2.481, ppl=5.58, wps=5831.7, ups=0.09, wpb=64863, bsz=128, num_updates=16179, lr=9.98786e-05, gnorm=1.936, loss_scale=16, train_wall=11, gb_free=2.8, wall=186491
2021-06-20 22:27:07 | INFO | train_inner | epoch 006: 1264 / 3002 loss=2.382, ppl=5.21, wps=5834.9, ups=0.09, wpb=64802, bsz=128, num_updates=16180, lr=9.98786e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=186502
2021-06-20 22:27:19 | INFO | train_inner | epoch 006: 1265 / 3002 loss=2.377, ppl=5.2, wps=5794, ups=0.09, wpb=64862, bsz=128, num_updates=16181, lr=9.98785e-05, gnorm=1.836, loss_scale=16, train_wall=11, gb_free=2.8, wall=186513
2021-06-20 22:27:30 | INFO | train_inner | epoch 006: 1266 / 3002 loss=2.38, ppl=5.2, wps=5896.1, ups=0.09, wpb=64911, bsz=128, num_updates=16182, lr=9.98785e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=186524
2021-06-20 22:27:41 | INFO | train_inner | epoch 006: 1267 / 3002 loss=2.401, ppl=5.28, wps=5850.7, ups=0.09, wpb=64844, bsz=128, num_updates=16183, lr=9.98785e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=186535
2021-06-20 22:27:52 | INFO | train_inner | epoch 006: 1268 / 3002 loss=2.486, ppl=5.6, wps=5939.7, ups=0.09, wpb=64846, bsz=128, num_updates=16184, lr=9.98785e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=186546
2021-06-20 22:28:03 | INFO | train_inner | epoch 006: 1269 / 3002 loss=2.529, ppl=5.77, wps=5853.1, ups=0.09, wpb=64824, bsz=128, num_updates=16185, lr=9.98785e-05, gnorm=2.523, loss_scale=16, train_wall=11, gb_free=2.8, wall=186557
2021-06-20 22:28:14 | INFO | train_inner | epoch 006: 1270 / 3002 loss=2.556, ppl=5.88, wps=5818.7, ups=0.09, wpb=64804, bsz=128, num_updates=16186, lr=9.98785e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=186568
2021-06-20 22:28:25 | INFO | train_inner | epoch 006: 1271 / 3002 loss=2.521, ppl=5.74, wps=5751, ups=0.09, wpb=64832, bsz=128, num_updates=16187, lr=9.98785e-05, gnorm=2.032, loss_scale=16, train_wall=11, gb_free=2.8, wall=186579
2021-06-20 22:28:36 | INFO | train_inner | epoch 006: 1272 / 3002 loss=2.514, ppl=5.71, wps=5987.7, ups=0.09, wpb=64873, bsz=128, num_updates=16188, lr=9.98785e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=186590
2021-06-20 22:28:47 | INFO | train_inner | epoch 006: 1273 / 3002 loss=2.346, ppl=5.08, wps=5890.2, ups=0.09, wpb=64877, bsz=128, num_updates=16189, lr=9.98785e-05, gnorm=1.826, loss_scale=16, train_wall=11, gb_free=2.8, wall=186601
2021-06-20 22:28:58 | INFO | train_inner | epoch 006: 1274 / 3002 loss=2.544, ppl=5.83, wps=5912.3, ups=0.09, wpb=64865, bsz=128, num_updates=16190, lr=9.98785e-05, gnorm=1.892, loss_scale=16, train_wall=11, gb_free=2.8, wall=186612
2021-06-20 22:29:09 | INFO | train_inner | epoch 006: 1275 / 3002 loss=2.306, ppl=4.94, wps=5818.5, ups=0.09, wpb=64806, bsz=128, num_updates=16191, lr=9.98785e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=186623
2021-06-20 22:29:20 | INFO | train_inner | epoch 006: 1276 / 3002 loss=2.481, ppl=5.58, wps=5946.7, ups=0.09, wpb=64899, bsz=128, num_updates=16192, lr=9.98785e-05, gnorm=2.642, loss_scale=16, train_wall=10, gb_free=2.8, wall=186634
2021-06-20 22:29:31 | INFO | train_inner | epoch 006: 1277 / 3002 loss=2.396, ppl=5.27, wps=5814.1, ups=0.09, wpb=64858, bsz=128, num_updates=16193, lr=9.98784e-05, gnorm=1.966, loss_scale=16, train_wall=11, gb_free=2.8, wall=186645
2021-06-20 22:29:42 | INFO | train_inner | epoch 006: 1278 / 3002 loss=2.393, ppl=5.25, wps=5797.2, ups=0.09, wpb=64825, bsz=128, num_updates=16194, lr=9.98784e-05, gnorm=2.1, loss_scale=16, train_wall=11, gb_free=2.8, wall=186657
2021-06-20 22:29:53 | INFO | train_inner | epoch 006: 1279 / 3002 loss=2.485, ppl=5.6, wps=5892.1, ups=0.09, wpb=64811, bsz=128, num_updates=16195, lr=9.98784e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=186668
2021-06-20 22:30:04 | INFO | train_inner | epoch 006: 1280 / 3002 loss=2.534, ppl=5.79, wps=5778.5, ups=0.09, wpb=64842, bsz=128, num_updates=16196, lr=9.98784e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=186679
2021-06-20 22:30:16 | INFO | train_inner | epoch 006: 1281 / 3002 loss=2.577, ppl=5.97, wps=5816.3, ups=0.09, wpb=64811, bsz=128, num_updates=16197, lr=9.98784e-05, gnorm=1.968, loss_scale=16, train_wall=11, gb_free=2.8, wall=186690
2021-06-20 22:30:27 | INFO | train_inner | epoch 006: 1282 / 3002 loss=2.43, ppl=5.39, wps=5860.8, ups=0.09, wpb=64827, bsz=128, num_updates=16198, lr=9.98784e-05, gnorm=1.867, loss_scale=16, train_wall=11, gb_free=2.8, wall=186701
2021-06-20 22:30:38 | INFO | train_inner | epoch 006: 1283 / 3002 loss=2.536, ppl=5.8, wps=5921, ups=0.09, wpb=64902, bsz=128, num_updates=16199, lr=9.98784e-05, gnorm=2.479, loss_scale=16, train_wall=11, gb_free=2.8, wall=186712
2021-06-20 22:30:49 | INFO | train_inner | epoch 006: 1284 / 3002 loss=2.473, ppl=5.55, wps=5901.9, ups=0.09, wpb=64846, bsz=128, num_updates=16200, lr=9.98784e-05, gnorm=1.965, loss_scale=16, train_wall=11, gb_free=2.8, wall=186723
2021-06-20 22:31:00 | INFO | train_inner | epoch 006: 1285 / 3002 loss=2.418, ppl=5.35, wps=5831.6, ups=0.09, wpb=64764, bsz=128, num_updates=16201, lr=9.98784e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=186734
2021-06-20 22:31:11 | INFO | train_inner | epoch 006: 1286 / 3002 loss=2.669, ppl=6.36, wps=5849.3, ups=0.09, wpb=64799, bsz=128, num_updates=16202, lr=9.98784e-05, gnorm=1.926, loss_scale=16, train_wall=11, gb_free=2.8, wall=186745
2021-06-20 22:31:22 | INFO | train_inner | epoch 006: 1287 / 3002 loss=2.521, ppl=5.74, wps=6021.6, ups=0.09, wpb=64882, bsz=128, num_updates=16203, lr=9.98784e-05, gnorm=1.929, loss_scale=16, train_wall=10, gb_free=2.8, wall=186756
2021-06-20 22:31:33 | INFO | train_inner | epoch 006: 1288 / 3002 loss=2.531, ppl=5.78, wps=5887.6, ups=0.09, wpb=64867, bsz=128, num_updates=16204, lr=9.98784e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=186767
2021-06-20 22:31:44 | INFO | train_inner | epoch 006: 1289 / 3002 loss=2.395, ppl=5.26, wps=5754.3, ups=0.09, wpb=64734, bsz=128, num_updates=16205, lr=9.98784e-05, gnorm=2.186, loss_scale=16, train_wall=11, gb_free=2.8, wall=186778
2021-06-20 22:31:55 | INFO | train_inner | epoch 006: 1290 / 3002 loss=2.507, ppl=5.68, wps=5861.9, ups=0.09, wpb=64775, bsz=128, num_updates=16206, lr=9.98783e-05, gnorm=2.033, loss_scale=16, train_wall=11, gb_free=2.8, wall=186789
2021-06-20 22:32:06 | INFO | train_inner | epoch 006: 1291 / 3002 loss=2.391, ppl=5.24, wps=5822.7, ups=0.09, wpb=64809, bsz=128, num_updates=16207, lr=9.98783e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=186800
2021-06-20 22:32:17 | INFO | train_inner | epoch 006: 1292 / 3002 loss=2.555, ppl=5.88, wps=5894.8, ups=0.09, wpb=64768, bsz=128, num_updates=16208, lr=9.98783e-05, gnorm=1.982, loss_scale=16, train_wall=11, gb_free=2.8, wall=186811
2021-06-20 22:32:28 | INFO | train_inner | epoch 006: 1293 / 3002 loss=2.378, ppl=5.2, wps=5785.8, ups=0.09, wpb=64784, bsz=128, num_updates=16209, lr=9.98783e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=186823
2021-06-20 22:32:39 | INFO | train_inner | epoch 006: 1294 / 3002 loss=2.464, ppl=5.52, wps=5826.9, ups=0.09, wpb=64832, bsz=128, num_updates=16210, lr=9.98783e-05, gnorm=2.109, loss_scale=16, train_wall=11, gb_free=2.8, wall=186834
2021-06-20 22:32:50 | INFO | train_inner | epoch 006: 1295 / 3002 loss=2.433, ppl=5.4, wps=5901.9, ups=0.09, wpb=64801, bsz=128, num_updates=16211, lr=9.98783e-05, gnorm=1.98, loss_scale=16, train_wall=10, gb_free=2.8, wall=186845
2021-06-20 22:33:02 | INFO | train_inner | epoch 006: 1296 / 3002 loss=2.449, ppl=5.46, wps=5777.4, ups=0.09, wpb=64815, bsz=128, num_updates=16212, lr=9.98783e-05, gnorm=1.986, loss_scale=16, train_wall=11, gb_free=2.8, wall=186856
2021-06-20 22:33:13 | INFO | train_inner | epoch 006: 1297 / 3002 loss=2.525, ppl=5.76, wps=5849.3, ups=0.09, wpb=64787, bsz=128, num_updates=16213, lr=9.98783e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=186867
2021-06-20 22:33:24 | INFO | train_inner | epoch 006: 1298 / 3002 loss=2.266, ppl=4.81, wps=5957.9, ups=0.09, wpb=64821, bsz=128, num_updates=16214, lr=9.98783e-05, gnorm=1.904, loss_scale=16, train_wall=10, gb_free=2.8, wall=186878
2021-06-20 22:33:34 | INFO | train_inner | epoch 006: 1299 / 3002 loss=2.577, ppl=5.97, wps=5970.1, ups=0.09, wpb=64838, bsz=128, num_updates=16215, lr=9.98783e-05, gnorm=1.976, loss_scale=16, train_wall=10, gb_free=2.8, wall=186889
2021-06-20 22:33:45 | INFO | train_inner | epoch 006: 1300 / 3002 loss=2.48, ppl=5.58, wps=5882.4, ups=0.09, wpb=64899, bsz=128, num_updates=16216, lr=9.98783e-05, gnorm=1.831, loss_scale=16, train_wall=11, gb_free=2.8, wall=186900
2021-06-20 22:33:56 | INFO | train_inner | epoch 006: 1301 / 3002 loss=2.594, ppl=6.04, wps=5868.9, ups=0.09, wpb=64872, bsz=128, num_updates=16217, lr=9.98783e-05, gnorm=1.997, loss_scale=16, train_wall=11, gb_free=2.8, wall=186911
2021-06-20 22:34:08 | INFO | train_inner | epoch 006: 1302 / 3002 loss=2.531, ppl=5.78, wps=5807.9, ups=0.09, wpb=64866, bsz=128, num_updates=16218, lr=9.98782e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=186922
2021-06-20 22:34:19 | INFO | train_inner | epoch 006: 1303 / 3002 loss=2.508, ppl=5.69, wps=5886.2, ups=0.09, wpb=64822, bsz=128, num_updates=16219, lr=9.98782e-05, gnorm=1.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=186933
2021-06-20 22:34:30 | INFO | train_inner | epoch 006: 1304 / 3002 loss=2.547, ppl=5.85, wps=5757.8, ups=0.09, wpb=64728, bsz=128, num_updates=16220, lr=9.98782e-05, gnorm=2.01, loss_scale=16, train_wall=11, gb_free=2.8, wall=186944
2021-06-20 22:34:41 | INFO | train_inner | epoch 006: 1305 / 3002 loss=2.394, ppl=5.26, wps=5781.6, ups=0.09, wpb=64894, bsz=128, num_updates=16221, lr=9.98782e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=186955
2021-06-20 22:34:52 | INFO | train_inner | epoch 006: 1306 / 3002 loss=2.482, ppl=5.58, wps=5812.4, ups=0.09, wpb=64852, bsz=128, num_updates=16222, lr=9.98782e-05, gnorm=2.896, loss_scale=16, train_wall=11, gb_free=2.8, wall=186967
2021-06-20 22:35:03 | INFO | train_inner | epoch 006: 1307 / 3002 loss=2.442, ppl=5.43, wps=5857.2, ups=0.09, wpb=64756, bsz=128, num_updates=16223, lr=9.98782e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=186978
2021-06-20 22:35:14 | INFO | train_inner | epoch 006: 1308 / 3002 loss=2.568, ppl=5.93, wps=5921, ups=0.09, wpb=64791, bsz=128, num_updates=16224, lr=9.98782e-05, gnorm=2.104, loss_scale=16, train_wall=11, gb_free=2.8, wall=186989
2021-06-20 22:35:26 | INFO | train_inner | epoch 006: 1309 / 3002 loss=2.459, ppl=5.5, wps=5732.8, ups=0.09, wpb=64821, bsz=128, num_updates=16225, lr=9.98782e-05, gnorm=1.983, loss_scale=16, train_wall=11, gb_free=2.8, wall=187000
2021-06-20 22:35:36 | INFO | train_inner | epoch 006: 1310 / 3002 loss=2.48, ppl=5.58, wps=5973.3, ups=0.09, wpb=64881, bsz=128, num_updates=16226, lr=9.98782e-05, gnorm=1.94, loss_scale=16, train_wall=10, gb_free=2.8, wall=187011
2021-06-20 22:35:48 | INFO | train_inner | epoch 006: 1311 / 3002 loss=2.386, ppl=5.23, wps=5740.9, ups=0.09, wpb=64881, bsz=128, num_updates=16227, lr=9.98782e-05, gnorm=1.858, loss_scale=16, train_wall=11, gb_free=2.8, wall=187022
2021-06-20 22:35:59 | INFO | train_inner | epoch 006: 1312 / 3002 loss=2.495, ppl=5.64, wps=5989.3, ups=0.09, wpb=64751, bsz=128, num_updates=16228, lr=9.98782e-05, gnorm=2.156, loss_scale=16, train_wall=10, gb_free=2.8, wall=187033
2021-06-20 22:36:10 | INFO | train_inner | epoch 006: 1313 / 3002 loss=2.572, ppl=5.95, wps=5847.5, ups=0.09, wpb=64811, bsz=128, num_updates=16229, lr=9.98782e-05, gnorm=2.825, loss_scale=16, train_wall=11, gb_free=2.8, wall=187044
2021-06-20 22:36:21 | INFO | train_inner | epoch 006: 1314 / 3002 loss=2.359, ppl=5.13, wps=5821.2, ups=0.09, wpb=64831, bsz=128, num_updates=16230, lr=9.98782e-05, gnorm=1.907, loss_scale=16, train_wall=11, gb_free=2.8, wall=187055
2021-06-20 22:36:32 | INFO | train_inner | epoch 006: 1315 / 3002 loss=2.448, ppl=5.46, wps=5762.8, ups=0.09, wpb=64836, bsz=128, num_updates=16231, lr=9.98781e-05, gnorm=2.219, loss_scale=16, train_wall=11, gb_free=2.8, wall=187066
2021-06-20 22:36:43 | INFO | train_inner | epoch 006: 1316 / 3002 loss=2.482, ppl=5.59, wps=5738.1, ups=0.09, wpb=64814, bsz=128, num_updates=16232, lr=9.98781e-05, gnorm=1.931, loss_scale=32, train_wall=11, gb_free=2.8, wall=187078
2021-06-20 22:36:54 | INFO | train_inner | epoch 006: 1317 / 3002 loss=2.473, ppl=5.55, wps=5835.5, ups=0.09, wpb=64838, bsz=128, num_updates=16233, lr=9.98781e-05, gnorm=1.92, loss_scale=32, train_wall=11, gb_free=2.8, wall=187089
2021-06-20 22:37:05 | INFO | train_inner | epoch 006: 1318 / 3002 loss=2.43, ppl=5.39, wps=5892.2, ups=0.09, wpb=64895, bsz=128, num_updates=16234, lr=9.98781e-05, gnorm=1.957, loss_scale=32, train_wall=11, gb_free=2.8, wall=187100
2021-06-20 22:37:17 | INFO | train_inner | epoch 006: 1319 / 3002 loss=2.48, ppl=5.58, wps=5839.6, ups=0.09, wpb=64872, bsz=128, num_updates=16235, lr=9.98781e-05, gnorm=1.914, loss_scale=32, train_wall=11, gb_free=2.8, wall=187111
2021-06-20 22:37:27 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0
2021-06-20 22:37:38 | INFO | train_inner | epoch 006: 1321 / 3002 loss=2.543, ppl=5.83, wps=2974.7, ups=0.05, wpb=64737, bsz=128, num_updates=16236, lr=9.98781e-05, gnorm=1.903, loss_scale=16, train_wall=21, gb_free=2.8, wall=187133
2021-06-20 22:37:49 | INFO | train_inner | epoch 006: 1322 / 3002 loss=2.435, ppl=5.41, wps=5908.5, ups=0.09, wpb=64860, bsz=128, num_updates=16237, lr=9.98781e-05, gnorm=1.895, loss_scale=16, train_wall=11, gb_free=2.8, wall=187144
2021-06-20 22:38:00 | INFO | train_inner | epoch 006: 1323 / 3002 loss=2.545, ppl=5.84, wps=5971.3, ups=0.09, wpb=64791, bsz=128, num_updates=16238, lr=9.98781e-05, gnorm=1.988, loss_scale=16, train_wall=10, gb_free=2.8, wall=187154
2021-06-20 22:38:11 | INFO | train_inner | epoch 006: 1324 / 3002 loss=2.395, ppl=5.26, wps=5872.8, ups=0.09, wpb=64829, bsz=128, num_updates=16239, lr=9.98781e-05, gnorm=1.928, loss_scale=16, train_wall=11, gb_free=2.8, wall=187166
2021-06-20 22:38:22 | INFO | train_inner | epoch 006: 1325 / 3002 loss=2.393, ppl=5.25, wps=5823.7, ups=0.09, wpb=64778, bsz=128, num_updates=16240, lr=9.98781e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=187177
2021-06-20 22:38:33 | INFO | train_inner | epoch 006: 1326 / 3002 loss=2.63, ppl=6.19, wps=5870.3, ups=0.09, wpb=64839, bsz=128, num_updates=16241, lr=9.98781e-05, gnorm=2.042, loss_scale=16, train_wall=11, gb_free=2.8, wall=187188
2021-06-20 22:38:44 | INFO | train_inner | epoch 006: 1327 / 3002 loss=2.478, ppl=5.57, wps=6029.4, ups=0.09, wpb=64836, bsz=128, num_updates=16242, lr=9.98781e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=187198
2021-06-20 22:38:55 | INFO | train_inner | epoch 006: 1328 / 3002 loss=2.545, ppl=5.84, wps=5794.1, ups=0.09, wpb=64799, bsz=128, num_updates=16243, lr=9.9878e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=187210
2021-06-20 22:39:06 | INFO | train_inner | epoch 006: 1329 / 3002 loss=2.48, ppl=5.58, wps=5907.7, ups=0.09, wpb=64888, bsz=128, num_updates=16244, lr=9.9878e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=187221
2021-06-20 22:39:17 | INFO | train_inner | epoch 006: 1330 / 3002 loss=2.336, ppl=5.05, wps=5845.7, ups=0.09, wpb=64840, bsz=128, num_updates=16245, lr=9.9878e-05, gnorm=1.88, loss_scale=16, train_wall=11, gb_free=2.8, wall=187232
2021-06-20 22:39:28 | INFO | train_inner | epoch 006: 1331 / 3002 loss=2.445, ppl=5.45, wps=5821.8, ups=0.09, wpb=64728, bsz=128, num_updates=16246, lr=9.9878e-05, gnorm=2.91, loss_scale=16, train_wall=11, gb_free=2.8, wall=187243
2021-06-20 22:39:40 | INFO | train_inner | epoch 006: 1332 / 3002 loss=2.403, ppl=5.29, wps=5806, ups=0.09, wpb=64835, bsz=128, num_updates=16247, lr=9.9878e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=187254
2021-06-20 22:39:51 | INFO | train_inner | epoch 006: 1333 / 3002 loss=2.586, ppl=6, wps=5761.8, ups=0.09, wpb=64815, bsz=128, num_updates=16248, lr=9.9878e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=187265
2021-06-20 22:40:02 | INFO | train_inner | epoch 006: 1334 / 3002 loss=2.458, ppl=5.49, wps=5774.4, ups=0.09, wpb=64836, bsz=128, num_updates=16249, lr=9.9878e-05, gnorm=2.34, loss_scale=16, train_wall=11, gb_free=2.8, wall=187276
2021-06-20 22:40:13 | INFO | train_inner | epoch 006: 1335 / 3002 loss=2.376, ppl=5.19, wps=5765.3, ups=0.09, wpb=64745, bsz=128, num_updates=16250, lr=9.9878e-05, gnorm=2.255, loss_scale=16, train_wall=11, gb_free=2.8, wall=187288
2021-06-20 22:40:24 | INFO | train_inner | epoch 006: 1336 / 3002 loss=2.403, ppl=5.29, wps=5931.5, ups=0.09, wpb=64867, bsz=128, num_updates=16251, lr=9.9878e-05, gnorm=2.649, loss_scale=16, train_wall=10, gb_free=2.8, wall=187299
2021-06-20 22:40:35 | INFO | train_inner | epoch 006: 1337 / 3002 loss=2.573, ppl=5.95, wps=5797.1, ups=0.09, wpb=64814, bsz=128, num_updates=16252, lr=9.9878e-05, gnorm=1.946, loss_scale=16, train_wall=11, gb_free=2.8, wall=187310
2021-06-20 22:40:47 | INFO | train_inner | epoch 006: 1338 / 3002 loss=2.601, ppl=6.07, wps=5827.2, ups=0.09, wpb=64868, bsz=128, num_updates=16253, lr=9.9878e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=187321
2021-06-20 22:40:57 | INFO | train_inner | epoch 006: 1339 / 3002 loss=2.548, ppl=5.85, wps=6034.3, ups=0.09, wpb=64837, bsz=128, num_updates=16254, lr=9.9878e-05, gnorm=2.022, loss_scale=16, train_wall=10, gb_free=2.8, wall=187332
2021-06-20 22:41:08 | INFO | train_inner | epoch 006: 1340 / 3002 loss=2.395, ppl=5.26, wps=5915.4, ups=0.09, wpb=64852, bsz=128, num_updates=16255, lr=9.9878e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=187343
2021-06-20 22:41:19 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 22:41:30 | INFO | train_inner | epoch 006: 1342 / 3002 loss=2.535, ppl=5.79, wps=2947.2, ups=0.05, wpb=64843, bsz=128, num_updates=16256, lr=9.98779e-05, gnorm=1.924, loss_scale=8, train_wall=21, gb_free=2.8, wall=187365
2021-06-20 22:41:41 | INFO | train_inner | epoch 006: 1343 / 3002 loss=2.445, ppl=5.45, wps=5888.5, ups=0.09, wpb=64841, bsz=128, num_updates=16257, lr=9.98779e-05, gnorm=1.902, loss_scale=8, train_wall=11, gb_free=2.8, wall=187376
2021-06-20 22:41:52 | INFO | train_inner | epoch 006: 1344 / 3002 loss=2.468, ppl=5.53, wps=5917.6, ups=0.09, wpb=64900, bsz=128, num_updates=16258, lr=9.98779e-05, gnorm=1.967, loss_scale=8, train_wall=11, gb_free=2.8, wall=187387
2021-06-20 22:42:03 | INFO | train_inner | epoch 006: 1345 / 3002 loss=2.343, ppl=5.08, wps=5936.4, ups=0.09, wpb=64894, bsz=128, num_updates=16259, lr=9.98779e-05, gnorm=1.849, loss_scale=8, train_wall=10, gb_free=2.8, wall=187398
2021-06-20 22:42:14 | INFO | train_inner | epoch 006: 1346 / 3002 loss=2.545, ppl=5.83, wps=5794.1, ups=0.09, wpb=64811, bsz=128, num_updates=16260, lr=9.98779e-05, gnorm=1.952, loss_scale=8, train_wall=11, gb_free=2.8, wall=187409
2021-06-20 22:42:26 | INFO | train_inner | epoch 006: 1347 / 3002 loss=2.297, ppl=4.91, wps=5825.6, ups=0.09, wpb=64813, bsz=128, num_updates=16261, lr=9.98779e-05, gnorm=1.945, loss_scale=8, train_wall=11, gb_free=2.8, wall=187420
2021-06-20 22:42:36 | INFO | train_inner | epoch 006: 1348 / 3002 loss=2.456, ppl=5.49, wps=5925.2, ups=0.09, wpb=64829, bsz=128, num_updates=16262, lr=9.98779e-05, gnorm=1.897, loss_scale=8, train_wall=10, gb_free=2.8, wall=187431
2021-06-20 22:42:48 | INFO | train_inner | epoch 006: 1349 / 3002 loss=2.464, ppl=5.52, wps=5850.4, ups=0.09, wpb=64780, bsz=128, num_updates=16263, lr=9.98779e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=187442
2021-06-20 22:42:59 | INFO | train_inner | epoch 006: 1350 / 3002 loss=2.456, ppl=5.49, wps=5858, ups=0.09, wpb=64838, bsz=128, num_updates=16264, lr=9.98779e-05, gnorm=2.024, loss_scale=8, train_wall=11, gb_free=2.8, wall=187453
2021-06-20 22:43:09 | INFO | train_inner | epoch 006: 1351 / 3002 loss=2.372, ppl=5.18, wps=6093.9, ups=0.09, wpb=64842, bsz=128, num_updates=16265, lr=9.98779e-05, gnorm=1.927, loss_scale=8, train_wall=10, gb_free=2.8, wall=187464
2021-06-20 22:43:20 | INFO | train_inner | epoch 006: 1352 / 3002 loss=2.464, ppl=5.52, wps=5811.2, ups=0.09, wpb=64864, bsz=128, num_updates=16266, lr=9.98779e-05, gnorm=1.882, loss_scale=8, train_wall=11, gb_free=2.8, wall=187475
2021-06-20 22:43:31 | INFO | train_inner | epoch 006: 1353 / 3002 loss=2.509, ppl=5.69, wps=5880.1, ups=0.09, wpb=64832, bsz=128, num_updates=16267, lr=9.98779e-05, gnorm=2.095, loss_scale=8, train_wall=11, gb_free=2.8, wall=187486
2021-06-20 22:43:43 | INFO | train_inner | epoch 006: 1354 / 3002 loss=2.591, ppl=6.03, wps=5823.1, ups=0.09, wpb=64821, bsz=128, num_updates=16268, lr=9.98778e-05, gnorm=1.983, loss_scale=8, train_wall=11, gb_free=2.8, wall=187497
2021-06-20 22:43:54 | INFO | train_inner | epoch 006: 1355 / 3002 loss=2.432, ppl=5.4, wps=5943.5, ups=0.09, wpb=64899, bsz=128, num_updates=16269, lr=9.98778e-05, gnorm=3.677, loss_scale=8, train_wall=10, gb_free=2.8, wall=187508
2021-06-20 22:44:05 | INFO | train_inner | epoch 006: 1356 / 3002 loss=2.365, ppl=5.15, wps=5889.8, ups=0.09, wpb=64867, bsz=128, num_updates=16270, lr=9.98778e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=187519
2021-06-20 22:44:16 | INFO | train_inner | epoch 006: 1357 / 3002 loss=2.439, ppl=5.42, wps=5854.1, ups=0.09, wpb=64762, bsz=128, num_updates=16271, lr=9.98778e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=187530
2021-06-20 22:44:26 | INFO | train_inner | epoch 006: 1358 / 3002 loss=2.579, ppl=5.98, wps=5947, ups=0.09, wpb=64787, bsz=128, num_updates=16272, lr=9.98778e-05, gnorm=1.931, loss_scale=8, train_wall=10, gb_free=2.8, wall=187541
2021-06-20 22:44:38 | INFO | train_inner | epoch 006: 1359 / 3002 loss=2.439, ppl=5.42, wps=5829.3, ups=0.09, wpb=64866, bsz=128, num_updates=16273, lr=9.98778e-05, gnorm=1.857, loss_scale=8, train_wall=11, gb_free=2.8, wall=187552
2021-06-20 22:44:49 | INFO | train_inner | epoch 006: 1360 / 3002 loss=2.474, ppl=5.55, wps=5924.9, ups=0.09, wpb=64759, bsz=128, num_updates=16274, lr=9.98778e-05, gnorm=2.488, loss_scale=8, train_wall=10, gb_free=2.8, wall=187563
2021-06-20 22:45:00 | INFO | train_inner | epoch 006: 1361 / 3002 loss=2.53, ppl=5.78, wps=5829.1, ups=0.09, wpb=64776, bsz=128, num_updates=16275, lr=9.98778e-05, gnorm=2.018, loss_scale=8, train_wall=11, gb_free=2.8, wall=187574
2021-06-20 22:45:11 | INFO | train_inner | epoch 006: 1362 / 3002 loss=2.468, ppl=5.53, wps=5797.9, ups=0.09, wpb=64837, bsz=128, num_updates=16276, lr=9.98778e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=187585
2021-06-20 22:45:22 | INFO | train_inner | epoch 006: 1363 / 3002 loss=2.277, ppl=4.85, wps=5830.1, ups=0.09, wpb=64857, bsz=128, num_updates=16277, lr=9.98778e-05, gnorm=1.861, loss_scale=8, train_wall=11, gb_free=2.8, wall=187596
2021-06-20 22:45:33 | INFO | train_inner | epoch 006: 1364 / 3002 loss=2.401, ppl=5.28, wps=5900.6, ups=0.09, wpb=64857, bsz=128, num_updates=16278, lr=9.98778e-05, gnorm=1.914, loss_scale=8, train_wall=10, gb_free=2.8, wall=187607
2021-06-20 22:45:44 | INFO | train_inner | epoch 006: 1365 / 3002 loss=2.514, ppl=5.71, wps=5823.2, ups=0.09, wpb=64897, bsz=128, num_updates=16279, lr=9.98778e-05, gnorm=1.994, loss_scale=8, train_wall=11, gb_free=2.8, wall=187618
2021-06-20 22:45:55 | INFO | train_inner | epoch 006: 1366 / 3002 loss=2.317, ppl=4.98, wps=5954.2, ups=0.09, wpb=64871, bsz=128, num_updates=16280, lr=9.98778e-05, gnorm=1.938, loss_scale=8, train_wall=10, gb_free=2.8, wall=187629
2021-06-20 22:46:06 | INFO | train_inner | epoch 006: 1367 / 3002 loss=2.523, ppl=5.75, wps=5865, ups=0.09, wpb=64801, bsz=128, num_updates=16281, lr=9.98777e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=187640
2021-06-20 22:46:17 | INFO | train_inner | epoch 006: 1368 / 3002 loss=2.427, ppl=5.38, wps=5895.7, ups=0.09, wpb=64824, bsz=128, num_updates=16282, lr=9.98777e-05, gnorm=1.906, loss_scale=8, train_wall=11, gb_free=2.8, wall=187651
2021-06-20 22:46:28 | INFO | train_inner | epoch 006: 1369 / 3002 loss=2.343, ppl=5.07, wps=5946.4, ups=0.09, wpb=64810, bsz=128, num_updates=16283, lr=9.98777e-05, gnorm=1.918, loss_scale=8, train_wall=10, gb_free=2.8, wall=187662
2021-06-20 22:46:39 | INFO | train_inner | epoch 006: 1370 / 3002 loss=2.434, ppl=5.4, wps=5985.8, ups=0.09, wpb=64806, bsz=128, num_updates=16284, lr=9.98777e-05, gnorm=1.927, loss_scale=8, train_wall=10, gb_free=2.8, wall=187673
2021-06-20 22:46:50 | INFO | train_inner | epoch 006: 1371 / 3002 loss=2.436, ppl=5.41, wps=5898.1, ups=0.09, wpb=64823, bsz=128, num_updates=16285, lr=9.98777e-05, gnorm=2.026, loss_scale=8, train_wall=11, gb_free=2.8, wall=187684
2021-06-20 22:47:01 | INFO | train_inner | epoch 006: 1372 / 3002 loss=2.386, ppl=5.23, wps=5845.8, ups=0.09, wpb=64957, bsz=128, num_updates=16286, lr=9.98777e-05, gnorm=2.092, loss_scale=8, train_wall=11, gb_free=2.8, wall=187695
2021-06-20 22:47:12 | INFO | train_inner | epoch 006: 1373 / 3002 loss=2.389, ppl=5.24, wps=5987.8, ups=0.09, wpb=64830, bsz=128, num_updates=16287, lr=9.98777e-05, gnorm=2.016, loss_scale=8, train_wall=10, gb_free=2.8, wall=187706
2021-06-20 22:47:23 | INFO | train_inner | epoch 006: 1374 / 3002 loss=2.628, ppl=6.18, wps=5875, ups=0.09, wpb=64823, bsz=128, num_updates=16288, lr=9.98777e-05, gnorm=2.014, loss_scale=8, train_wall=11, gb_free=2.8, wall=187717
2021-06-20 22:47:34 | INFO | train_inner | epoch 006: 1375 / 3002 loss=2.412, ppl=5.32, wps=5852.5, ups=0.09, wpb=64919, bsz=128, num_updates=16289, lr=9.98777e-05, gnorm=3.893, loss_scale=8, train_wall=11, gb_free=2.8, wall=187728
2021-06-20 22:47:45 | INFO | train_inner | epoch 006: 1376 / 3002 loss=2.598, ppl=6.06, wps=5869.6, ups=0.09, wpb=64782, bsz=128, num_updates=16290, lr=9.98777e-05, gnorm=2.188, loss_scale=8, train_wall=11, gb_free=2.8, wall=187739
2021-06-20 22:47:56 | INFO | train_inner | epoch 006: 1377 / 3002 loss=2.495, ppl=5.64, wps=5940.8, ups=0.09, wpb=64817, bsz=128, num_updates=16291, lr=9.98777e-05, gnorm=1.976, loss_scale=8, train_wall=10, gb_free=2.8, wall=187750
2021-06-20 22:48:07 | INFO | train_inner | epoch 006: 1378 / 3002 loss=2.323, ppl=5.01, wps=5813.2, ups=0.09, wpb=64822, bsz=128, num_updates=16292, lr=9.98777e-05, gnorm=1.867, loss_scale=8, train_wall=11, gb_free=2.8, wall=187761
2021-06-20 22:48:18 | INFO | train_inner | epoch 006: 1379 / 3002 loss=2.406, ppl=5.3, wps=5855.8, ups=0.09, wpb=64832, bsz=128, num_updates=16293, lr=9.98776e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=187772
2021-06-20 22:48:29 | INFO | train_inner | epoch 006: 1380 / 3002 loss=2.446, ppl=5.45, wps=5758.9, ups=0.09, wpb=64834, bsz=128, num_updates=16294, lr=9.98776e-05, gnorm=2.144, loss_scale=8, train_wall=11, gb_free=2.8, wall=187784
2021-06-20 22:48:40 | INFO | train_inner | epoch 006: 1381 / 3002 loss=2.402, ppl=5.28, wps=5972.2, ups=0.09, wpb=64886, bsz=128, num_updates=16295, lr=9.98776e-05, gnorm=1.905, loss_scale=8, train_wall=10, gb_free=2.8, wall=187794
2021-06-20 22:48:51 | INFO | train_inner | epoch 006: 1382 / 3002 loss=2.428, ppl=5.38, wps=5828.9, ups=0.09, wpb=64828, bsz=128, num_updates=16296, lr=9.98776e-05, gnorm=1.93, loss_scale=8, train_wall=11, gb_free=2.8, wall=187806
2021-06-20 22:49:03 | INFO | train_inner | epoch 006: 1383 / 3002 loss=2.465, ppl=5.52, wps=5752, ups=0.09, wpb=64839, bsz=128, num_updates=16297, lr=9.98776e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=187817
2021-06-20 22:49:14 | INFO | train_inner | epoch 006: 1384 / 3002 loss=2.535, ppl=5.79, wps=5867.9, ups=0.09, wpb=64775, bsz=128, num_updates=16298, lr=9.98776e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=187828
2021-06-20 22:49:25 | INFO | train_inner | epoch 006: 1385 / 3002 loss=2.722, ppl=6.6, wps=5826.2, ups=0.09, wpb=64856, bsz=128, num_updates=16299, lr=9.98776e-05, gnorm=2.061, loss_scale=8, train_wall=11, gb_free=2.8, wall=187839
2021-06-20 22:49:36 | INFO | train_inner | epoch 006: 1386 / 3002 loss=2.432, ppl=5.4, wps=5828.2, ups=0.09, wpb=64774, bsz=128, num_updates=16300, lr=9.98776e-05, gnorm=2.174, loss_scale=8, train_wall=11, gb_free=2.8, wall=187850
2021-06-20 22:49:47 | INFO | train_inner | epoch 006: 1387 / 3002 loss=2.442, ppl=5.43, wps=5819.1, ups=0.09, wpb=64804, bsz=128, num_updates=16301, lr=9.98776e-05, gnorm=2.234, loss_scale=8, train_wall=11, gb_free=2.8, wall=187861
2021-06-20 22:49:58 | INFO | train_inner | epoch 006: 1388 / 3002 loss=2.433, ppl=5.4, wps=5744.6, ups=0.09, wpb=64821, bsz=128, num_updates=16302, lr=9.98776e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=187873
2021-06-20 22:50:09 | INFO | train_inner | epoch 006: 1389 / 3002 loss=2.382, ppl=5.21, wps=5753.7, ups=0.09, wpb=64841, bsz=128, num_updates=16303, lr=9.98776e-05, gnorm=1.993, loss_scale=8, train_wall=11, gb_free=2.8, wall=187884
2021-06-20 22:50:21 | INFO | train_inner | epoch 006: 1390 / 3002 loss=2.43, ppl=5.39, wps=5750.3, ups=0.09, wpb=64767, bsz=128, num_updates=16304, lr=9.98776e-05, gnorm=2.065, loss_scale=8, train_wall=11, gb_free=2.8, wall=187895
2021-06-20 22:50:31 | INFO | train_inner | epoch 006: 1391 / 3002 loss=2.335, ppl=5.05, wps=6060.2, ups=0.09, wpb=64805, bsz=128, num_updates=16305, lr=9.98776e-05, gnorm=1.936, loss_scale=8, train_wall=10, gb_free=2.8, wall=187906
2021-06-20 22:50:43 | INFO | train_inner | epoch 006: 1392 / 3002 loss=2.489, ppl=5.62, wps=5783, ups=0.09, wpb=64794, bsz=128, num_updates=16306, lr=9.98775e-05, gnorm=4.609, loss_scale=8, train_wall=11, gb_free=2.8, wall=187917
2021-06-20 22:50:54 | INFO | train_inner | epoch 006: 1393 / 3002 loss=2.587, ppl=6.01, wps=5845, ups=0.09, wpb=64737, bsz=128, num_updates=16307, lr=9.98775e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=187928
2021-06-20 22:51:05 | INFO | train_inner | epoch 006: 1394 / 3002 loss=2.432, ppl=5.4, wps=5871.1, ups=0.09, wpb=64826, bsz=128, num_updates=16308, lr=9.98775e-05, gnorm=1.869, loss_scale=8, train_wall=11, gb_free=2.8, wall=187939
2021-06-20 22:51:16 | INFO | train_inner | epoch 006: 1395 / 3002 loss=2.398, ppl=5.27, wps=5887, ups=0.09, wpb=64732, bsz=128, num_updates=16309, lr=9.98775e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=187950
2021-06-20 22:51:27 | INFO | train_inner | epoch 006: 1396 / 3002 loss=2.47, ppl=5.54, wps=5830.1, ups=0.09, wpb=64758, bsz=128, num_updates=16310, lr=9.98775e-05, gnorm=1.964, loss_scale=8, train_wall=11, gb_free=2.8, wall=187961
2021-06-20 22:51:38 | INFO | train_inner | epoch 006: 1397 / 3002 loss=2.533, ppl=5.79, wps=5764.1, ups=0.09, wpb=64745, bsz=128, num_updates=16311, lr=9.98775e-05, gnorm=1.879, loss_scale=8, train_wall=11, gb_free=2.8, wall=187972
2021-06-20 22:51:49 | INFO | train_inner | epoch 006: 1398 / 3002 loss=2.406, ppl=5.3, wps=5843.3, ups=0.09, wpb=64803, bsz=128, num_updates=16312, lr=9.98775e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=187984
2021-06-20 22:52:00 | INFO | train_inner | epoch 006: 1399 / 3002 loss=2.416, ppl=5.34, wps=5810.8, ups=0.09, wpb=64829, bsz=128, num_updates=16313, lr=9.98775e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=187995
2021-06-20 22:52:11 | INFO | train_inner | epoch 006: 1400 / 3002 loss=2.444, ppl=5.44, wps=5858.8, ups=0.09, wpb=64840, bsz=128, num_updates=16314, lr=9.98775e-05, gnorm=1.995, loss_scale=8, train_wall=11, gb_free=2.8, wall=188006
2021-06-20 22:52:22 | INFO | train_inner | epoch 006: 1401 / 3002 loss=2.49, ppl=5.62, wps=5868.1, ups=0.09, wpb=64915, bsz=128, num_updates=16315, lr=9.98775e-05, gnorm=2.09, loss_scale=8, train_wall=11, gb_free=2.8, wall=188017
2021-06-20 22:52:33 | INFO | train_inner | epoch 006: 1402 / 3002 loss=2.582, ppl=5.99, wps=5941.4, ups=0.09, wpb=64854, bsz=128, num_updates=16316, lr=9.98775e-05, gnorm=2.033, loss_scale=8, train_wall=10, gb_free=2.8, wall=188028
2021-06-20 22:52:44 | INFO | train_inner | epoch 006: 1403 / 3002 loss=2.53, ppl=5.78, wps=5862.3, ups=0.09, wpb=64821, bsz=128, num_updates=16317, lr=9.98775e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=188039
2021-06-20 22:52:56 | INFO | train_inner | epoch 006: 1404 / 3002 loss=2.41, ppl=5.32, wps=5854.2, ups=0.09, wpb=64841, bsz=128, num_updates=16318, lr=9.98774e-05, gnorm=2.079, loss_scale=8, train_wall=11, gb_free=2.8, wall=188050
2021-06-20 22:53:07 | INFO | train_inner | epoch 006: 1405 / 3002 loss=2.485, ppl=5.6, wps=5879.5, ups=0.09, wpb=64843, bsz=128, num_updates=16319, lr=9.98774e-05, gnorm=1.955, loss_scale=8, train_wall=11, gb_free=2.8, wall=188061
2021-06-20 22:53:18 | INFO | train_inner | epoch 006: 1406 / 3002 loss=2.529, ppl=5.77, wps=5823.2, ups=0.09, wpb=64849, bsz=128, num_updates=16320, lr=9.98774e-05, gnorm=2.108, loss_scale=8, train_wall=11, gb_free=2.8, wall=188072
2021-06-20 22:53:29 | INFO | train_inner | epoch 006: 1407 / 3002 loss=2.571, ppl=5.94, wps=5897.1, ups=0.09, wpb=64811, bsz=128, num_updates=16321, lr=9.98774e-05, gnorm=2.01, loss_scale=8, train_wall=11, gb_free=2.8, wall=188083
2021-06-20 22:53:40 | INFO | train_inner | epoch 006: 1408 / 3002 loss=2.414, ppl=5.33, wps=5891.8, ups=0.09, wpb=64846, bsz=128, num_updates=16322, lr=9.98774e-05, gnorm=2.104, loss_scale=8, train_wall=11, gb_free=2.8, wall=188094
2021-06-20 22:53:51 | INFO | train_inner | epoch 006: 1409 / 3002 loss=2.553, ppl=5.87, wps=5907.6, ups=0.09, wpb=64743, bsz=128, num_updates=16323, lr=9.98774e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=188105
2021-06-20 22:54:02 | INFO | train_inner | epoch 006: 1410 / 3002 loss=2.367, ppl=5.16, wps=5870.4, ups=0.09, wpb=64827, bsz=128, num_updates=16324, lr=9.98774e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=188116
2021-06-20 22:54:13 | INFO | train_inner | epoch 006: 1411 / 3002 loss=2.654, ppl=6.29, wps=5891.2, ups=0.09, wpb=64683, bsz=128, num_updates=16325, lr=9.98774e-05, gnorm=2.244, loss_scale=8, train_wall=11, gb_free=2.8, wall=188127
2021-06-20 22:54:24 | INFO | train_inner | epoch 006: 1412 / 3002 loss=2.483, ppl=5.59, wps=5911.7, ups=0.09, wpb=64887, bsz=128, num_updates=16326, lr=9.98774e-05, gnorm=2.001, loss_scale=8, train_wall=10, gb_free=2.8, wall=188138
2021-06-20 22:54:35 | INFO | train_inner | epoch 006: 1413 / 3002 loss=2.351, ppl=5.1, wps=5863, ups=0.09, wpb=64852, bsz=128, num_updates=16327, lr=9.98774e-05, gnorm=5.618, loss_scale=8, train_wall=11, gb_free=2.8, wall=188149
2021-06-20 22:54:46 | INFO | train_inner | epoch 006: 1414 / 3002 loss=2.534, ppl=5.79, wps=5709.3, ups=0.09, wpb=64816, bsz=128, num_updates=16328, lr=9.98774e-05, gnorm=1.968, loss_scale=8, train_wall=11, gb_free=2.8, wall=188160
2021-06-20 22:54:57 | INFO | train_inner | epoch 006: 1415 / 3002 loss=2.476, ppl=5.56, wps=5772, ups=0.09, wpb=64796, bsz=128, num_updates=16329, lr=9.98774e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=188172
2021-06-20 22:55:08 | INFO | train_inner | epoch 006: 1416 / 3002 loss=2.355, ppl=5.12, wps=5887.8, ups=0.09, wpb=64827, bsz=128, num_updates=16330, lr=9.98774e-05, gnorm=1.935, loss_scale=8, train_wall=11, gb_free=2.8, wall=188183
2021-06-20 22:55:20 | INFO | train_inner | epoch 006: 1417 / 3002 loss=2.458, ppl=5.5, wps=5769.2, ups=0.09, wpb=64796, bsz=128, num_updates=16331, lr=9.98773e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=188194
2021-06-20 22:55:31 | INFO | train_inner | epoch 006: 1418 / 3002 loss=2.377, ppl=5.19, wps=5869.9, ups=0.09, wpb=64751, bsz=128, num_updates=16332, lr=9.98773e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=188205
2021-06-20 22:55:42 | INFO | train_inner | epoch 006: 1419 / 3002 loss=2.362, ppl=5.14, wps=5823.9, ups=0.09, wpb=64834, bsz=128, num_updates=16333, lr=9.98773e-05, gnorm=1.889, loss_scale=8, train_wall=11, gb_free=2.8, wall=188216
2021-06-20 22:55:53 | INFO | train_inner | epoch 006: 1420 / 3002 loss=2.423, ppl=5.36, wps=5813.8, ups=0.09, wpb=64911, bsz=128, num_updates=16334, lr=9.98773e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=188227
2021-06-20 22:56:04 | INFO | train_inner | epoch 006: 1421 / 3002 loss=2.522, ppl=5.74, wps=5860.4, ups=0.09, wpb=64791, bsz=128, num_updates=16335, lr=9.98773e-05, gnorm=2.413, loss_scale=8, train_wall=11, gb_free=2.8, wall=188238
2021-06-20 22:56:15 | INFO | train_inner | epoch 006: 1422 / 3002 loss=2.424, ppl=5.37, wps=5897, ups=0.09, wpb=64836, bsz=128, num_updates=16336, lr=9.98773e-05, gnorm=1.99, loss_scale=8, train_wall=11, gb_free=2.8, wall=188249
2021-06-20 22:56:26 | INFO | train_inner | epoch 006: 1423 / 3002 loss=2.599, ppl=6.06, wps=5816.6, ups=0.09, wpb=64745, bsz=128, num_updates=16337, lr=9.98773e-05, gnorm=2.228, loss_scale=8, train_wall=11, gb_free=2.8, wall=188260
2021-06-20 22:56:37 | INFO | train_inner | epoch 006: 1424 / 3002 loss=2.591, ppl=6.03, wps=5898.7, ups=0.09, wpb=64843, bsz=128, num_updates=16338, lr=9.98773e-05, gnorm=2.041, loss_scale=8, train_wall=11, gb_free=2.8, wall=188271
2021-06-20 22:56:48 | INFO | train_inner | epoch 006: 1425 / 3002 loss=2.443, ppl=5.44, wps=5986.4, ups=0.09, wpb=64904, bsz=128, num_updates=16339, lr=9.98773e-05, gnorm=1.918, loss_scale=8, train_wall=10, gb_free=2.8, wall=188282
2021-06-20 22:56:59 | INFO | train_inner | epoch 006: 1426 / 3002 loss=2.488, ppl=5.61, wps=5854, ups=0.09, wpb=64744, bsz=128, num_updates=16340, lr=9.98773e-05, gnorm=1.996, loss_scale=8, train_wall=11, gb_free=2.8, wall=188293
2021-06-20 22:57:10 | INFO | train_inner | epoch 006: 1427 / 3002 loss=2.648, ppl=6.27, wps=5754.4, ups=0.09, wpb=64883, bsz=128, num_updates=16341, lr=9.98773e-05, gnorm=1.973, loss_scale=8, train_wall=11, gb_free=2.8, wall=188305
2021-06-20 22:57:21 | INFO | train_inner | epoch 006: 1428 / 3002 loss=2.525, ppl=5.76, wps=5920.8, ups=0.09, wpb=64888, bsz=128, num_updates=16342, lr=9.98773e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=188316
2021-06-20 22:57:32 | INFO | train_inner | epoch 006: 1429 / 3002 loss=2.455, ppl=5.48, wps=5845.6, ups=0.09, wpb=64797, bsz=128, num_updates=16343, lr=9.98772e-05, gnorm=1.938, loss_scale=8, train_wall=11, gb_free=2.8, wall=188327
2021-06-20 22:57:43 | INFO | train_inner | epoch 006: 1430 / 3002 loss=2.433, ppl=5.4, wps=5870.5, ups=0.09, wpb=64771, bsz=128, num_updates=16344, lr=9.98772e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=188338
2021-06-20 22:57:55 | INFO | train_inner | epoch 006: 1431 / 3002 loss=2.554, ppl=5.87, wps=5753.7, ups=0.09, wpb=64815, bsz=128, num_updates=16345, lr=9.98772e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=188349
2021-06-20 22:58:06 | INFO | train_inner | epoch 006: 1432 / 3002 loss=2.46, ppl=5.5, wps=5875.3, ups=0.09, wpb=64902, bsz=128, num_updates=16346, lr=9.98772e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=188360
2021-06-20 22:58:17 | INFO | train_inner | epoch 006: 1433 / 3002 loss=2.391, ppl=5.25, wps=5873.9, ups=0.09, wpb=64789, bsz=128, num_updates=16347, lr=9.98772e-05, gnorm=2.002, loss_scale=8, train_wall=11, gb_free=2.8, wall=188371
2021-06-20 22:58:28 | INFO | train_inner | epoch 006: 1434 / 3002 loss=2.52, ppl=5.73, wps=5837.1, ups=0.09, wpb=64792, bsz=128, num_updates=16348, lr=9.98772e-05, gnorm=2.234, loss_scale=8, train_wall=11, gb_free=2.8, wall=188382
2021-06-20 22:58:39 | INFO | train_inner | epoch 006: 1435 / 3002 loss=2.469, ppl=5.54, wps=5815.7, ups=0.09, wpb=64883, bsz=128, num_updates=16349, lr=9.98772e-05, gnorm=2.125, loss_scale=8, train_wall=11, gb_free=2.8, wall=188393
2021-06-20 22:58:50 | INFO | train_inner | epoch 006: 1436 / 3002 loss=2.471, ppl=5.54, wps=5919.8, ups=0.09, wpb=64801, bsz=128, num_updates=16350, lr=9.98772e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=188404
2021-06-20 22:59:01 | INFO | train_inner | epoch 006: 1437 / 3002 loss=2.601, ppl=6.07, wps=5892.8, ups=0.09, wpb=64889, bsz=128, num_updates=16351, lr=9.98772e-05, gnorm=1.903, loss_scale=8, train_wall=11, gb_free=2.8, wall=188415
2021-06-20 22:59:12 | INFO | train_inner | epoch 006: 1438 / 3002 loss=2.434, ppl=5.4, wps=5953.7, ups=0.09, wpb=64844, bsz=128, num_updates=16352, lr=9.98772e-05, gnorm=1.895, loss_scale=8, train_wall=10, gb_free=2.8, wall=188426
2021-06-20 22:59:23 | INFO | train_inner | epoch 006: 1439 / 3002 loss=2.393, ppl=5.25, wps=5772.5, ups=0.09, wpb=64866, bsz=128, num_updates=16353, lr=9.98772e-05, gnorm=1.896, loss_scale=8, train_wall=11, gb_free=2.8, wall=188437
2021-06-20 22:59:34 | INFO | train_inner | epoch 006: 1440 / 3002 loss=2.451, ppl=5.47, wps=5795.7, ups=0.09, wpb=64746, bsz=128, num_updates=16354, lr=9.98772e-05, gnorm=1.883, loss_scale=8, train_wall=11, gb_free=2.8, wall=188448
2021-06-20 22:59:45 | INFO | train_inner | epoch 006: 1441 / 3002 loss=2.551, ppl=5.86, wps=5817.8, ups=0.09, wpb=64717, bsz=128, num_updates=16355, lr=9.98772e-05, gnorm=1.91, loss_scale=8, train_wall=11, gb_free=2.8, wall=188460
2021-06-20 22:59:57 | INFO | train_inner | epoch 006: 1442 / 3002 loss=2.58, ppl=5.98, wps=5763.9, ups=0.09, wpb=64879, bsz=128, num_updates=16356, lr=9.98771e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=188471
2021-06-20 23:00:08 | INFO | train_inner | epoch 006: 1443 / 3002 loss=2.435, ppl=5.41, wps=5702.5, ups=0.09, wpb=64860, bsz=128, num_updates=16357, lr=9.98771e-05, gnorm=1.948, loss_scale=8, train_wall=11, gb_free=2.8, wall=188482
2021-06-20 23:00:19 | INFO | train_inner | epoch 006: 1444 / 3002 loss=2.417, ppl=5.34, wps=5777.6, ups=0.09, wpb=64829, bsz=128, num_updates=16358, lr=9.98771e-05, gnorm=1.83, loss_scale=8, train_wall=11, gb_free=2.8, wall=188493
2021-06-20 23:00:30 | INFO | train_inner | epoch 006: 1445 / 3002 loss=2.501, ppl=5.66, wps=5900.6, ups=0.09, wpb=64820, bsz=128, num_updates=16359, lr=9.98771e-05, gnorm=1.875, loss_scale=8, train_wall=11, gb_free=2.8, wall=188504
2021-06-20 23:00:41 | INFO | train_inner | epoch 006: 1446 / 3002 loss=2.623, ppl=6.16, wps=5758.1, ups=0.09, wpb=64746, bsz=128, num_updates=16360, lr=9.98771e-05, gnorm=2.077, loss_scale=8, train_wall=11, gb_free=2.8, wall=188516
2021-06-20 23:00:53 | INFO | train_inner | epoch 006: 1447 / 3002 loss=2.431, ppl=5.39, wps=5801.8, ups=0.09, wpb=64824, bsz=128, num_updates=16361, lr=9.98771e-05, gnorm=1.95, loss_scale=8, train_wall=11, gb_free=2.8, wall=188527
2021-06-20 23:01:04 | INFO | train_inner | epoch 006: 1448 / 3002 loss=2.494, ppl=5.63, wps=5835.7, ups=0.09, wpb=64802, bsz=128, num_updates=16362, lr=9.98771e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=188538
2021-06-20 23:01:15 | INFO | train_inner | epoch 006: 1449 / 3002 loss=2.598, ppl=6.06, wps=5829.7, ups=0.09, wpb=64753, bsz=128, num_updates=16363, lr=9.98771e-05, gnorm=1.92, loss_scale=8, train_wall=11, gb_free=2.8, wall=188549
2021-06-20 23:01:26 | INFO | train_inner | epoch 006: 1450 / 3002 loss=2.402, ppl=5.28, wps=5802.2, ups=0.09, wpb=64873, bsz=128, num_updates=16364, lr=9.98771e-05, gnorm=2.073, loss_scale=8, train_wall=11, gb_free=2.8, wall=188560
2021-06-20 23:01:37 | INFO | train_inner | epoch 006: 1451 / 3002 loss=2.592, ppl=6.03, wps=5848.1, ups=0.09, wpb=64906, bsz=128, num_updates=16365, lr=9.98771e-05, gnorm=1.936, loss_scale=8, train_wall=11, gb_free=2.8, wall=188571
2021-06-20 23:01:48 | INFO | train_inner | epoch 006: 1452 / 3002 loss=2.56, ppl=5.9, wps=5937.9, ups=0.09, wpb=64854, bsz=128, num_updates=16366, lr=9.98771e-05, gnorm=1.846, loss_scale=8, train_wall=10, gb_free=2.8, wall=188582
2021-06-20 23:01:59 | INFO | train_inner | epoch 006: 1453 / 3002 loss=2.446, ppl=5.45, wps=5857.6, ups=0.09, wpb=64858, bsz=128, num_updates=16367, lr=9.98771e-05, gnorm=1.911, loss_scale=8, train_wall=11, gb_free=2.8, wall=188593
2021-06-20 23:02:10 | INFO | train_inner | epoch 006: 1454 / 3002 loss=2.362, ppl=5.14, wps=5808.1, ups=0.09, wpb=64819, bsz=128, num_updates=16368, lr=9.9877e-05, gnorm=1.859, loss_scale=8, train_wall=11, gb_free=2.8, wall=188605
2021-06-20 23:02:21 | INFO | train_inner | epoch 006: 1455 / 3002 loss=2.528, ppl=5.77, wps=5874, ups=0.09, wpb=64781, bsz=128, num_updates=16369, lr=9.9877e-05, gnorm=1.84, loss_scale=8, train_wall=11, gb_free=2.8, wall=188616
2021-06-20 23:02:32 | INFO | train_inner | epoch 006: 1456 / 3002 loss=2.584, ppl=6, wps=5905.7, ups=0.09, wpb=64862, bsz=128, num_updates=16370, lr=9.9877e-05, gnorm=3.726, loss_scale=8, train_wall=11, gb_free=2.8, wall=188627
2021-06-20 23:02:43 | INFO | train_inner | epoch 006: 1457 / 3002 loss=2.482, ppl=5.58, wps=6002.6, ups=0.09, wpb=64922, bsz=128, num_updates=16371, lr=9.9877e-05, gnorm=1.932, loss_scale=8, train_wall=10, gb_free=2.8, wall=188637
2021-06-20 23:02:54 | INFO | train_inner | epoch 006: 1458 / 3002 loss=2.547, ppl=5.84, wps=5818.2, ups=0.09, wpb=64806, bsz=128, num_updates=16372, lr=9.9877e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=188648
2021-06-20 23:03:05 | INFO | train_inner | epoch 006: 1459 / 3002 loss=2.511, ppl=5.7, wps=5880.3, ups=0.09, wpb=64782, bsz=128, num_updates=16373, lr=9.9877e-05, gnorm=1.97, loss_scale=8, train_wall=11, gb_free=2.8, wall=188660
2021-06-20 23:03:16 | INFO | train_inner | epoch 006: 1460 / 3002 loss=2.424, ppl=5.37, wps=5753.4, ups=0.09, wpb=64811, bsz=128, num_updates=16374, lr=9.9877e-05, gnorm=1.868, loss_scale=8, train_wall=11, gb_free=2.8, wall=188671
2021-06-20 23:03:28 | INFO | train_inner | epoch 006: 1461 / 3002 loss=2.516, ppl=5.72, wps=5777.3, ups=0.09, wpb=64787, bsz=128, num_updates=16375, lr=9.9877e-05, gnorm=2.168, loss_scale=8, train_wall=11, gb_free=2.8, wall=188682
2021-06-20 23:03:39 | INFO | train_inner | epoch 006: 1462 / 3002 loss=2.496, ppl=5.64, wps=5873, ups=0.09, wpb=64821, bsz=128, num_updates=16376, lr=9.9877e-05, gnorm=2.501, loss_scale=8, train_wall=11, gb_free=2.8, wall=188693
2021-06-20 23:03:50 | INFO | train_inner | epoch 006: 1463 / 3002 loss=2.492, ppl=5.63, wps=5779.6, ups=0.09, wpb=64870, bsz=128, num_updates=16377, lr=9.9877e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=188704
2021-06-20 23:04:01 | INFO | train_inner | epoch 006: 1464 / 3002 loss=2.636, ppl=6.21, wps=5836.1, ups=0.09, wpb=64777, bsz=128, num_updates=16378, lr=9.9877e-05, gnorm=3.611, loss_scale=8, train_wall=11, gb_free=2.8, wall=188715
2021-06-20 23:04:12 | INFO | train_inner | epoch 006: 1465 / 3002 loss=2.355, ppl=5.12, wps=5860.6, ups=0.09, wpb=64888, bsz=128, num_updates=16379, lr=9.9877e-05, gnorm=1.927, loss_scale=8, train_wall=11, gb_free=2.8, wall=188726
2021-06-20 23:04:23 | INFO | train_inner | epoch 006: 1466 / 3002 loss=2.397, ppl=5.27, wps=5869.6, ups=0.09, wpb=64832, bsz=128, num_updates=16380, lr=9.9877e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=188737
2021-06-20 23:04:34 | INFO | train_inner | epoch 006: 1467 / 3002 loss=2.537, ppl=5.8, wps=5966.6, ups=0.09, wpb=64840, bsz=128, num_updates=16381, lr=9.98769e-05, gnorm=1.902, loss_scale=8, train_wall=10, gb_free=2.8, wall=188748
2021-06-20 23:04:45 | INFO | train_inner | epoch 006: 1468 / 3002 loss=2.36, ppl=5.13, wps=5838, ups=0.09, wpb=64826, bsz=128, num_updates=16382, lr=9.98769e-05, gnorm=1.907, loss_scale=8, train_wall=11, gb_free=2.8, wall=188759
2021-06-20 23:04:56 | INFO | train_inner | epoch 006: 1469 / 3002 loss=2.339, ppl=5.06, wps=5773.7, ups=0.09, wpb=64824, bsz=128, num_updates=16383, lr=9.98769e-05, gnorm=2.034, loss_scale=16, train_wall=11, gb_free=2.8, wall=188771
2021-06-20 23:05:07 | INFO | train_inner | epoch 006: 1470 / 3002 loss=2.491, ppl=5.62, wps=5867.6, ups=0.09, wpb=64821, bsz=128, num_updates=16384, lr=9.98769e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=188782
2021-06-20 23:05:18 | INFO | train_inner | epoch 006: 1471 / 3002 loss=2.499, ppl=5.65, wps=5882.9, ups=0.09, wpb=64850, bsz=128, num_updates=16385, lr=9.98769e-05, gnorm=1.958, loss_scale=16, train_wall=11, gb_free=2.8, wall=188793
2021-06-20 23:05:30 | INFO | train_inner | epoch 006: 1472 / 3002 loss=2.432, ppl=5.4, wps=5805.3, ups=0.09, wpb=64787, bsz=128, num_updates=16386, lr=9.98769e-05, gnorm=1.954, loss_scale=16, train_wall=11, gb_free=2.8, wall=188804
2021-06-20 23:05:41 | INFO | train_inner | epoch 006: 1473 / 3002 loss=2.442, ppl=5.44, wps=5716.9, ups=0.09, wpb=64844, bsz=128, num_updates=16387, lr=9.98769e-05, gnorm=1.889, loss_scale=16, train_wall=11, gb_free=2.8, wall=188815
2021-06-20 23:05:52 | INFO | train_inner | epoch 006: 1474 / 3002 loss=2.422, ppl=5.36, wps=5989.2, ups=0.09, wpb=64865, bsz=128, num_updates=16388, lr=9.98769e-05, gnorm=1.892, loss_scale=16, train_wall=10, gb_free=2.8, wall=188826
2021-06-20 23:06:03 | INFO | train_inner | epoch 006: 1475 / 3002 loss=2.475, ppl=5.56, wps=5866.9, ups=0.09, wpb=64808, bsz=128, num_updates=16389, lr=9.98769e-05, gnorm=3.562, loss_scale=16, train_wall=11, gb_free=2.8, wall=188837
2021-06-20 23:06:14 | INFO | train_inner | epoch 006: 1476 / 3002 loss=2.423, ppl=5.36, wps=5836.2, ups=0.09, wpb=64776, bsz=128, num_updates=16390, lr=9.98769e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=188848
2021-06-20 23:06:25 | INFO | train_inner | epoch 006: 1477 / 3002 loss=2.492, ppl=5.62, wps=5808.3, ups=0.09, wpb=64829, bsz=128, num_updates=16391, lr=9.98769e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=188859
2021-06-20 23:06:36 | INFO | train_inner | epoch 006: 1478 / 3002 loss=2.517, ppl=5.72, wps=5877.9, ups=0.09, wpb=64929, bsz=128, num_updates=16392, lr=9.98769e-05, gnorm=1.893, loss_scale=16, train_wall=11, gb_free=2.8, wall=188870
2021-06-20 23:06:47 | INFO | train_inner | epoch 006: 1479 / 3002 loss=2.448, ppl=5.46, wps=6027.4, ups=0.09, wpb=64872, bsz=128, num_updates=16393, lr=9.98768e-05, gnorm=1.819, loss_scale=16, train_wall=10, gb_free=2.8, wall=188881
2021-06-20 23:06:58 | INFO | train_inner | epoch 006: 1480 / 3002 loss=2.34, ppl=5.06, wps=5849, ups=0.09, wpb=64799, bsz=128, num_updates=16394, lr=9.98768e-05, gnorm=1.848, loss_scale=16, train_wall=11, gb_free=2.8, wall=188892
2021-06-20 23:07:09 | INFO | train_inner | epoch 006: 1481 / 3002 loss=2.568, ppl=5.93, wps=5980.8, ups=0.09, wpb=64866, bsz=128, num_updates=16395, lr=9.98768e-05, gnorm=2.062, loss_scale=16, train_wall=10, gb_free=2.8, wall=188903
2021-06-20 23:07:20 | INFO | train_inner | epoch 006: 1482 / 3002 loss=2.43, ppl=5.39, wps=5941.1, ups=0.09, wpb=64862, bsz=128, num_updates=16396, lr=9.98768e-05, gnorm=3.321, loss_scale=16, train_wall=10, gb_free=2.8, wall=188914
2021-06-20 23:07:31 | INFO | train_inner | epoch 006: 1483 / 3002 loss=2.496, ppl=5.64, wps=5940, ups=0.09, wpb=64776, bsz=128, num_updates=16397, lr=9.98768e-05, gnorm=1.974, loss_scale=16, train_wall=10, gb_free=2.8, wall=188925
2021-06-20 23:07:42 | INFO | train_inner | epoch 006: 1484 / 3002 loss=2.416, ppl=5.34, wps=5872.9, ups=0.09, wpb=64825, bsz=128, num_updates=16398, lr=9.98768e-05, gnorm=1.949, loss_scale=16, train_wall=11, gb_free=2.8, wall=188936
2021-06-20 23:07:53 | INFO | train_inner | epoch 006: 1485 / 3002 loss=2.407, ppl=5.31, wps=5776.6, ups=0.09, wpb=64911, bsz=128, num_updates=16399, lr=9.98768e-05, gnorm=1.918, loss_scale=16, train_wall=11, gb_free=2.8, wall=188947
2021-06-20 23:08:04 | INFO | train_inner | epoch 006: 1486 / 3002 loss=2.612, ppl=6.12, wps=5851, ups=0.09, wpb=64789, bsz=128, num_updates=16400, lr=9.98768e-05, gnorm=2.052, loss_scale=16, train_wall=11, gb_free=2.8, wall=188958
2021-06-20 23:08:15 | INFO | train_inner | epoch 006: 1487 / 3002 loss=2.451, ppl=5.47, wps=5888, ups=0.09, wpb=64899, bsz=128, num_updates=16401, lr=9.98768e-05, gnorm=1.902, loss_scale=16, train_wall=11, gb_free=2.8, wall=188969
2021-06-20 23:08:26 | INFO | train_inner | epoch 006: 1488 / 3002 loss=2.47, ppl=5.54, wps=5756.6, ups=0.09, wpb=64780, bsz=128, num_updates=16402, lr=9.98768e-05, gnorm=1.873, loss_scale=16, train_wall=11, gb_free=2.8, wall=188981
2021-06-20 23:08:37 | INFO | train_inner | epoch 006: 1489 / 3002 loss=2.509, ppl=5.69, wps=5894.5, ups=0.09, wpb=64795, bsz=128, num_updates=16403, lr=9.98768e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=188992
2021-06-20 23:08:48 | INFO | train_inner | epoch 006: 1490 / 3002 loss=2.459, ppl=5.5, wps=5770.2, ups=0.09, wpb=64822, bsz=128, num_updates=16404, lr=9.98768e-05, gnorm=1.925, loss_scale=16, train_wall=11, gb_free=2.8, wall=189003
2021-06-20 23:09:00 | INFO | train_inner | epoch 006: 1491 / 3002 loss=2.599, ppl=6.06, wps=5822.3, ups=0.09, wpb=64843, bsz=128, num_updates=16405, lr=9.98768e-05, gnorm=1.932, loss_scale=16, train_wall=11, gb_free=2.8, wall=189014
2021-06-20 23:09:10 | INFO | train_inner | epoch 006: 1492 / 3002 loss=2.599, ppl=6.06, wps=5993.3, ups=0.09, wpb=64891, bsz=128, num_updates=16406, lr=9.98767e-05, gnorm=1.956, loss_scale=16, train_wall=10, gb_free=2.8, wall=189025
2021-06-20 23:09:22 | INFO | train_inner | epoch 006: 1493 / 3002 loss=2.498, ppl=5.65, wps=5834.2, ups=0.09, wpb=64819, bsz=128, num_updates=16407, lr=9.98767e-05, gnorm=2.057, loss_scale=16, train_wall=11, gb_free=2.8, wall=189036
2021-06-20 23:09:33 | INFO | train_inner | epoch 006: 1494 / 3002 loss=2.4, ppl=5.28, wps=5773.6, ups=0.09, wpb=64839, bsz=128, num_updates=16408, lr=9.98767e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=189047
2021-06-20 23:09:44 | INFO | train_inner | epoch 006: 1495 / 3002 loss=2.212, ppl=4.63, wps=5918.5, ups=0.09, wpb=64851, bsz=128, num_updates=16409, lr=9.98767e-05, gnorm=1.932, loss_scale=16, train_wall=10, gb_free=2.8, wall=189058
2021-06-20 23:09:55 | INFO | train_inner | epoch 006: 1496 / 3002 loss=2.423, ppl=5.36, wps=5945.1, ups=0.09, wpb=64858, bsz=128, num_updates=16410, lr=9.98767e-05, gnorm=1.974, loss_scale=16, train_wall=10, gb_free=2.8, wall=189069
2021-06-20 23:10:06 | INFO | train_inner | epoch 006: 1497 / 3002 loss=2.521, ppl=5.74, wps=5813.9, ups=0.09, wpb=64830, bsz=128, num_updates=16411, lr=9.98767e-05, gnorm=1.976, loss_scale=16, train_wall=11, gb_free=2.8, wall=189080
2021-06-20 23:10:17 | INFO | train_inner | epoch 006: 1498 / 3002 loss=2.505, ppl=5.68, wps=5878, ups=0.09, wpb=64824, bsz=128, num_updates=16412, lr=9.98767e-05, gnorm=2.139, loss_scale=16, train_wall=11, gb_free=2.8, wall=189091
2021-06-20 23:10:28 | INFO | train_inner | epoch 006: 1499 / 3002 loss=2.502, ppl=5.66, wps=5814.7, ups=0.09, wpb=64758, bsz=128, num_updates=16413, lr=9.98767e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=189102
2021-06-20 23:10:39 | INFO | train_inner | epoch 006: 1500 / 3002 loss=2.405, ppl=5.3, wps=5762.3, ups=0.09, wpb=64794, bsz=128, num_updates=16414, lr=9.98767e-05, gnorm=1.831, loss_scale=16, train_wall=11, gb_free=2.8, wall=189114
2021-06-20 23:10:50 | INFO | train_inner | epoch 006: 1501 / 3002 loss=2.468, ppl=5.53, wps=5878.2, ups=0.09, wpb=64816, bsz=128, num_updates=16415, lr=9.98767e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=189125
2021-06-20 23:11:02 | INFO | train_inner | epoch 006: 1502 / 3002 loss=2.466, ppl=5.52, wps=5742.7, ups=0.09, wpb=64823, bsz=128, num_updates=16416, lr=9.98767e-05, gnorm=1.895, loss_scale=16, train_wall=11, gb_free=2.8, wall=189136
2021-06-20 23:11:13 | INFO | train_inner | epoch 006: 1503 / 3002 loss=2.44, ppl=5.43, wps=5724, ups=0.09, wpb=64832, bsz=128, num_updates=16417, lr=9.98767e-05, gnorm=2.457, loss_scale=16, train_wall=11, gb_free=2.8, wall=189147
2021-06-20 23:11:24 | INFO | train_inner | epoch 006: 1504 / 3002 loss=2.564, ppl=5.91, wps=6012.9, ups=0.09, wpb=64778, bsz=128, num_updates=16418, lr=9.98766e-05, gnorm=1.959, loss_scale=16, train_wall=10, gb_free=2.8, wall=189158
2021-06-20 23:11:34 | INFO | train_inner | epoch 006: 1505 / 3002 loss=2.608, ppl=6.1, wps=5989.9, ups=0.09, wpb=64857, bsz=128, num_updates=16419, lr=9.98766e-05, gnorm=1.974, loss_scale=16, train_wall=10, gb_free=2.8, wall=189169
2021-06-20 23:11:46 | INFO | train_inner | epoch 006: 1506 / 3002 loss=2.544, ppl=5.83, wps=5849.8, ups=0.09, wpb=64795, bsz=128, num_updates=16420, lr=9.98766e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=189180
2021-06-20 23:11:57 | INFO | train_inner | epoch 006: 1507 / 3002 loss=2.601, ppl=6.07, wps=5822.1, ups=0.09, wpb=64858, bsz=128, num_updates=16421, lr=9.98766e-05, gnorm=1.947, loss_scale=16, train_wall=11, gb_free=2.8, wall=189191
2021-06-20 23:12:08 | INFO | train_inner | epoch 006: 1508 / 3002 loss=2.381, ppl=5.21, wps=5924.5, ups=0.09, wpb=64837, bsz=128, num_updates=16422, lr=9.98766e-05, gnorm=2, loss_scale=16, train_wall=11, gb_free=2.8, wall=189202
2021-06-20 23:12:18 | INFO | train_inner | epoch 006: 1509 / 3002 loss=2.333, ppl=5.04, wps=5953.6, ups=0.09, wpb=64840, bsz=128, num_updates=16423, lr=9.98766e-05, gnorm=1.841, loss_scale=16, train_wall=10, gb_free=2.8, wall=189213
2021-06-20 23:12:30 | INFO | train_inner | epoch 006: 1510 / 3002 loss=2.45, ppl=5.46, wps=5706.2, ups=0.09, wpb=64856, bsz=128, num_updates=16424, lr=9.98766e-05, gnorm=1.87, loss_scale=16, train_wall=11, gb_free=2.8, wall=189224
2021-06-20 23:12:41 | INFO | train_inner | epoch 006: 1511 / 3002 loss=2.552, ppl=5.86, wps=6058.3, ups=0.09, wpb=64837, bsz=128, num_updates=16425, lr=9.98766e-05, gnorm=1.879, loss_scale=16, train_wall=10, gb_free=2.8, wall=189235
2021-06-20 23:12:51 | INFO | train_inner | epoch 006: 1512 / 3002 loss=2.447, ppl=5.45, wps=5954.4, ups=0.09, wpb=64866, bsz=128, num_updates=16426, lr=9.98766e-05, gnorm=1.885, loss_scale=16, train_wall=10, gb_free=2.8, wall=189246
2021-06-20 23:13:03 | INFO | train_inner | epoch 006: 1513 / 3002 loss=2.582, ppl=5.99, wps=5781.4, ups=0.09, wpb=64879, bsz=128, num_updates=16427, lr=9.98766e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=189257
2021-06-20 23:13:14 | INFO | train_inner | epoch 006: 1514 / 3002 loss=2.506, ppl=5.68, wps=5849.7, ups=0.09, wpb=64751, bsz=128, num_updates=16428, lr=9.98766e-05, gnorm=1.931, loss_scale=16, train_wall=11, gb_free=2.8, wall=189268
2021-06-20 23:13:25 | INFO | train_inner | epoch 006: 1515 / 3002 loss=2.511, ppl=5.7, wps=5876.7, ups=0.09, wpb=64851, bsz=128, num_updates=16429, lr=9.98766e-05, gnorm=1.833, loss_scale=16, train_wall=11, gb_free=2.8, wall=189279
2021-06-20 23:13:36 | INFO | train_inner | epoch 006: 1516 / 3002 loss=2.532, ppl=5.78, wps=5843.4, ups=0.09, wpb=64869, bsz=128, num_updates=16430, lr=9.98766e-05, gnorm=2.041, loss_scale=16, train_wall=11, gb_free=2.8, wall=189290
2021-06-20 23:13:47 | INFO | train_inner | epoch 006: 1517 / 3002 loss=2.519, ppl=5.73, wps=5846.1, ups=0.09, wpb=64880, bsz=128, num_updates=16431, lr=9.98765e-05, gnorm=1.895, loss_scale=16, train_wall=11, gb_free=2.8, wall=189301
2021-06-20 23:13:58 | INFO | train_inner | epoch 006: 1518 / 3002 loss=2.349, ppl=5.1, wps=5933.8, ups=0.09, wpb=64867, bsz=128, num_updates=16432, lr=9.98765e-05, gnorm=2.268, loss_scale=16, train_wall=10, gb_free=2.8, wall=189312
2021-06-20 23:14:09 | INFO | train_inner | epoch 006: 1519 / 3002 loss=2.465, ppl=5.52, wps=5980.8, ups=0.09, wpb=64758, bsz=128, num_updates=16433, lr=9.98765e-05, gnorm=1.944, loss_scale=16, train_wall=10, gb_free=2.8, wall=189323
2021-06-20 23:14:20 | INFO | train_inner | epoch 006: 1520 / 3002 loss=2.534, ppl=5.79, wps=5798.8, ups=0.09, wpb=64901, bsz=128, num_updates=16434, lr=9.98765e-05, gnorm=2.161, loss_scale=16, train_wall=11, gb_free=2.8, wall=189334
2021-06-20 23:14:31 | INFO | train_inner | epoch 006: 1521 / 3002 loss=2.421, ppl=5.35, wps=5827.5, ups=0.09, wpb=64838, bsz=128, num_updates=16435, lr=9.98765e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=189345
2021-06-20 23:14:42 | INFO | train_inner | epoch 006: 1522 / 3002 loss=2.553, ppl=5.87, wps=5797, ups=0.09, wpb=64815, bsz=128, num_updates=16436, lr=9.98765e-05, gnorm=1.943, loss_scale=16, train_wall=11, gb_free=2.8, wall=189357
2021-06-20 23:14:53 | INFO | train_inner | epoch 006: 1523 / 3002 loss=2.434, ppl=5.4, wps=5893.4, ups=0.09, wpb=64809, bsz=128, num_updates=16437, lr=9.98765e-05, gnorm=2.045, loss_scale=16, train_wall=11, gb_free=2.8, wall=189368
2021-06-20 23:15:04 | INFO | train_inner | epoch 006: 1524 / 3002 loss=2.327, ppl=5.02, wps=5963.9, ups=0.09, wpb=64934, bsz=128, num_updates=16438, lr=9.98765e-05, gnorm=4.552, loss_scale=16, train_wall=10, gb_free=2.8, wall=189378
2021-06-20 23:15:15 | INFO | train_inner | epoch 006: 1525 / 3002 loss=2.37, ppl=5.17, wps=5799.5, ups=0.09, wpb=64852, bsz=128, num_updates=16439, lr=9.98765e-05, gnorm=1.82, loss_scale=16, train_wall=11, gb_free=2.8, wall=189390
2021-06-20 23:15:26 | INFO | train_inner | epoch 006: 1526 / 3002 loss=2.394, ppl=5.25, wps=5867.7, ups=0.09, wpb=64921, bsz=128, num_updates=16440, lr=9.98765e-05, gnorm=1.959, loss_scale=16, train_wall=11, gb_free=2.8, wall=189401
2021-06-20 23:15:38 | INFO | train_inner | epoch 006: 1527 / 3002 loss=2.487, ppl=5.61, wps=5793.6, ups=0.09, wpb=64834, bsz=128, num_updates=16441, lr=9.98765e-05, gnorm=1.972, loss_scale=16, train_wall=11, gb_free=2.8, wall=189412
2021-06-20 23:15:48 | INFO | train_inner | epoch 006: 1528 / 3002 loss=2.499, ppl=5.65, wps=5935.2, ups=0.09, wpb=64768, bsz=128, num_updates=16442, lr=9.98765e-05, gnorm=1.985, loss_scale=16, train_wall=10, gb_free=2.8, wall=189423
2021-06-20 23:16:00 | INFO | train_inner | epoch 006: 1529 / 3002 loss=2.449, ppl=5.46, wps=5810.8, ups=0.09, wpb=64860, bsz=128, num_updates=16443, lr=9.98764e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=189434
2021-06-20 23:16:11 | INFO | train_inner | epoch 006: 1530 / 3002 loss=2.498, ppl=5.65, wps=5809.5, ups=0.09, wpb=64896, bsz=128, num_updates=16444, lr=9.98764e-05, gnorm=1.993, loss_scale=16, train_wall=11, gb_free=2.8, wall=189445
2021-06-20 23:16:22 | INFO | train_inner | epoch 006: 1531 / 3002 loss=2.536, ppl=5.8, wps=5802.3, ups=0.09, wpb=64811, bsz=128, num_updates=16445, lr=9.98764e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=189456
2021-06-20 23:16:33 | INFO | train_inner | epoch 006: 1532 / 3002 loss=2.534, ppl=5.79, wps=5786.6, ups=0.09, wpb=64799, bsz=128, num_updates=16446, lr=9.98764e-05, gnorm=1.974, loss_scale=16, train_wall=11, gb_free=2.8, wall=189468
2021-06-20 23:16:44 | INFO | train_inner | epoch 006: 1533 / 3002 loss=2.551, ppl=5.86, wps=5869.4, ups=0.09, wpb=64813, bsz=128, num_updates=16447, lr=9.98764e-05, gnorm=2.011, loss_scale=16, train_wall=11, gb_free=2.8, wall=189479
2021-06-20 23:16:55 | INFO | train_inner | epoch 006: 1534 / 3002 loss=2.544, ppl=5.83, wps=5878.3, ups=0.09, wpb=64727, bsz=128, num_updates=16448, lr=9.98764e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=189490
2021-06-20 23:17:06 | INFO | train_inner | epoch 006: 1535 / 3002 loss=2.279, ppl=4.85, wps=5963, ups=0.09, wpb=64829, bsz=128, num_updates=16449, lr=9.98764e-05, gnorm=1.817, loss_scale=16, train_wall=10, gb_free=2.8, wall=189500
2021-06-20 23:17:17 | INFO | train_inner | epoch 006: 1536 / 3002 loss=2.445, ppl=5.44, wps=5802.9, ups=0.09, wpb=64779, bsz=128, num_updates=16450, lr=9.98764e-05, gnorm=1.97, loss_scale=16, train_wall=11, gb_free=2.8, wall=189512
2021-06-20 23:17:28 | INFO | train_inner | epoch 006: 1537 / 3002 loss=2.33, ppl=5.03, wps=5820.6, ups=0.09, wpb=64835, bsz=128, num_updates=16451, lr=9.98764e-05, gnorm=1.991, loss_scale=16, train_wall=11, gb_free=2.8, wall=189523
2021-06-20 23:17:39 | INFO | train_inner | epoch 006: 1538 / 3002 loss=2.471, ppl=5.54, wps=5889.5, ups=0.09, wpb=64768, bsz=128, num_updates=16452, lr=9.98764e-05, gnorm=2.031, loss_scale=16, train_wall=11, gb_free=2.8, wall=189534
2021-06-20 23:17:50 | INFO | train_inner | epoch 006: 1539 / 3002 loss=2.552, ppl=5.86, wps=6028, ups=0.09, wpb=64876, bsz=128, num_updates=16453, lr=9.98764e-05, gnorm=1.916, loss_scale=16, train_wall=10, gb_free=2.8, wall=189544
2021-06-20 23:18:01 | INFO | train_inner | epoch 006: 1540 / 3002 loss=2.632, ppl=6.2, wps=5885.5, ups=0.09, wpb=64857, bsz=128, num_updates=16454, lr=9.98764e-05, gnorm=1.861, loss_scale=16, train_wall=11, gb_free=2.8, wall=189556
2021-06-20 23:18:12 | INFO | train_inner | epoch 006: 1541 / 3002 loss=2.579, ppl=5.97, wps=6000.8, ups=0.09, wpb=64845, bsz=128, num_updates=16455, lr=9.98764e-05, gnorm=2.081, loss_scale=16, train_wall=10, gb_free=2.8, wall=189566
2021-06-20 23:18:23 | INFO | train_inner | epoch 006: 1542 / 3002 loss=2.499, ppl=5.65, wps=5868.7, ups=0.09, wpb=64554, bsz=128, num_updates=16456, lr=9.98763e-05, gnorm=1.906, loss_scale=16, train_wall=11, gb_free=2.8, wall=189577
2021-06-20 23:18:34 | INFO | train_inner | epoch 006: 1543 / 3002 loss=2.523, ppl=5.75, wps=5870.5, ups=0.09, wpb=64869, bsz=128, num_updates=16457, lr=9.98763e-05, gnorm=1.903, loss_scale=16, train_wall=11, gb_free=2.8, wall=189588
2021-06-20 23:18:45 | INFO | train_inner | epoch 006: 1544 / 3002 loss=2.453, ppl=5.47, wps=5905.6, ups=0.09, wpb=64858, bsz=128, num_updates=16458, lr=9.98763e-05, gnorm=1.926, loss_scale=16, train_wall=10, gb_free=2.8, wall=189599
2021-06-20 23:18:56 | INFO | train_inner | epoch 006: 1545 / 3002 loss=2.479, ppl=5.57, wps=5943.2, ups=0.09, wpb=64859, bsz=128, num_updates=16459, lr=9.98763e-05, gnorm=2.002, loss_scale=16, train_wall=10, gb_free=2.8, wall=189610
2021-06-20 23:19:07 | INFO | train_inner | epoch 006: 1546 / 3002 loss=2.39, ppl=5.24, wps=5889, ups=0.09, wpb=64873, bsz=128, num_updates=16460, lr=9.98763e-05, gnorm=1.95, loss_scale=16, train_wall=11, gb_free=2.8, wall=189621
2021-06-20 23:19:18 | INFO | train_inner | epoch 006: 1547 / 3002 loss=2.388, ppl=5.24, wps=5958.6, ups=0.09, wpb=64892, bsz=128, num_updates=16461, lr=9.98763e-05, gnorm=1.923, loss_scale=16, train_wall=10, gb_free=2.8, wall=189632
2021-06-20 23:19:29 | INFO | train_inner | epoch 006: 1548 / 3002 loss=2.527, ppl=5.76, wps=5922.5, ups=0.09, wpb=64858, bsz=128, num_updates=16462, lr=9.98763e-05, gnorm=2.105, loss_scale=16, train_wall=10, gb_free=2.8, wall=189643
2021-06-20 23:19:40 | INFO | train_inner | epoch 006: 1549 / 3002 loss=2.504, ppl=5.67, wps=5858.7, ups=0.09, wpb=64816, bsz=128, num_updates=16463, lr=9.98763e-05, gnorm=1.879, loss_scale=16, train_wall=11, gb_free=2.8, wall=189654
2021-06-20 23:19:51 | INFO | train_inner | epoch 006: 1550 / 3002 loss=2.582, ppl=5.99, wps=5829.4, ups=0.09, wpb=64910, bsz=128, num_updates=16464, lr=9.98763e-05, gnorm=1.92, loss_scale=16, train_wall=11, gb_free=2.8, wall=189665
2021-06-20 23:20:02 | INFO | train_inner | epoch 006: 1551 / 3002 loss=2.387, ppl=5.23, wps=5946, ups=0.09, wpb=64776, bsz=128, num_updates=16465, lr=9.98763e-05, gnorm=2.376, loss_scale=16, train_wall=10, gb_free=2.8, wall=189676
2021-06-20 23:20:13 | INFO | train_inner | epoch 006: 1552 / 3002 loss=2.361, ppl=5.14, wps=5985.4, ups=0.09, wpb=64822, bsz=128, num_updates=16466, lr=9.98763e-05, gnorm=1.895, loss_scale=16, train_wall=10, gb_free=2.8, wall=189687
2021-06-20 23:20:24 | INFO | train_inner | epoch 006: 1553 / 3002 loss=2.608, ppl=6.1, wps=5882.6, ups=0.09, wpb=64851, bsz=128, num_updates=16467, lr=9.98763e-05, gnorm=2.175, loss_scale=16, train_wall=11, gb_free=2.8, wall=189698
2021-06-20 23:20:35 | INFO | train_inner | epoch 006: 1554 / 3002 loss=2.587, ppl=6.01, wps=5719, ups=0.09, wpb=64864, bsz=128, num_updates=16468, lr=9.98762e-05, gnorm=1.921, loss_scale=16, train_wall=11, gb_free=2.8, wall=189709
2021-06-20 23:20:46 | INFO | train_inner | epoch 006: 1555 / 3002 loss=2.616, ppl=6.13, wps=6022.1, ups=0.09, wpb=64727, bsz=128, num_updates=16469, lr=9.98762e-05, gnorm=2.045, loss_scale=16, train_wall=10, gb_free=2.8, wall=189720
2021-06-20 23:20:57 | INFO | train_inner | epoch 006: 1556 / 3002 loss=2.403, ppl=5.29, wps=5756, ups=0.09, wpb=64882, bsz=128, num_updates=16470, lr=9.98762e-05, gnorm=1.863, loss_scale=16, train_wall=11, gb_free=2.8, wall=189731
2021-06-20 23:21:08 | INFO | train_inner | epoch 006: 1557 / 3002 loss=2.435, ppl=5.41, wps=5970, ups=0.09, wpb=64898, bsz=128, num_updates=16471, lr=9.98762e-05, gnorm=1.856, loss_scale=16, train_wall=10, gb_free=2.8, wall=189742
2021-06-20 23:21:19 | INFO | train_inner | epoch 006: 1558 / 3002 loss=2.459, ppl=5.5, wps=5777.3, ups=0.09, wpb=64814, bsz=128, num_updates=16472, lr=9.98762e-05, gnorm=1.98, loss_scale=16, train_wall=11, gb_free=2.8, wall=189754
2021-06-20 23:21:30 | INFO | train_inner | epoch 006: 1559 / 3002 loss=2.58, ppl=5.98, wps=5736.5, ups=0.09, wpb=64801, bsz=128, num_updates=16473, lr=9.98762e-05, gnorm=1.887, loss_scale=16, train_wall=11, gb_free=2.8, wall=189765
2021-06-20 23:21:42 | INFO | train_inner | epoch 006: 1560 / 3002 loss=2.568, ppl=5.93, wps=5807, ups=0.09, wpb=64716, bsz=128, num_updates=16474, lr=9.98762e-05, gnorm=1.894, loss_scale=16, train_wall=11, gb_free=2.8, wall=189776
2021-06-20 23:21:53 | INFO | train_inner | epoch 006: 1561 / 3002 loss=2.484, ppl=5.6, wps=5902.8, ups=0.09, wpb=64882, bsz=128, num_updates=16475, lr=9.98762e-05, gnorm=1.788, loss_scale=16, train_wall=11, gb_free=2.8, wall=189787
2021-06-20 23:22:04 | INFO | train_inner | epoch 006: 1562 / 3002 loss=2.571, ppl=5.94, wps=5863.2, ups=0.09, wpb=64812, bsz=128, num_updates=16476, lr=9.98762e-05, gnorm=2.941, loss_scale=16, train_wall=11, gb_free=2.8, wall=189798
2021-06-20 23:22:15 | INFO | train_inner | epoch 006: 1563 / 3002 loss=2.463, ppl=5.51, wps=5825.6, ups=0.09, wpb=64801, bsz=128, num_updates=16477, lr=9.98762e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=189809
2021-06-20 23:22:26 | INFO | train_inner | epoch 006: 1564 / 3002 loss=2.512, ppl=5.7, wps=5933.3, ups=0.09, wpb=64889, bsz=128, num_updates=16478, lr=9.98762e-05, gnorm=1.965, loss_scale=16, train_wall=10, gb_free=2.8, wall=189820
2021-06-20 23:22:37 | INFO | train_inner | epoch 006: 1565 / 3002 loss=2.406, ppl=5.3, wps=5812.9, ups=0.09, wpb=64830, bsz=128, num_updates=16479, lr=9.98762e-05, gnorm=1.874, loss_scale=16, train_wall=11, gb_free=2.8, wall=189831
2021-06-20 23:22:48 | INFO | train_inner | epoch 006: 1566 / 3002 loss=2.524, ppl=5.75, wps=5909.6, ups=0.09, wpb=64813, bsz=128, num_updates=16480, lr=9.98762e-05, gnorm=1.914, loss_scale=16, train_wall=11, gb_free=2.8, wall=189842
2021-06-20 23:22:59 | INFO | train_inner | epoch 006: 1567 / 3002 loss=2.469, ppl=5.54, wps=5781.3, ups=0.09, wpb=64834, bsz=128, num_updates=16481, lr=9.98761e-05, gnorm=1.931, loss_scale=16, train_wall=11, gb_free=2.8, wall=189853
2021-06-20 23:23:10 | INFO | train_inner | epoch 006: 1568 / 3002 loss=2.431, ppl=5.39, wps=5887.4, ups=0.09, wpb=64826, bsz=128, num_updates=16482, lr=9.98761e-05, gnorm=2.544, loss_scale=16, train_wall=11, gb_free=2.8, wall=189864
2021-06-20 23:23:21 | INFO | train_inner | epoch 006: 1569 / 3002 loss=2.525, ppl=5.75, wps=5871.2, ups=0.09, wpb=64794, bsz=128, num_updates=16483, lr=9.98761e-05, gnorm=1.916, loss_scale=16, train_wall=11, gb_free=2.8, wall=189875
2021-06-20 23:23:32 | INFO | train_inner | epoch 006: 1570 / 3002 loss=2.559, ppl=5.89, wps=5900.4, ups=0.09, wpb=64813, bsz=128, num_updates=16484, lr=9.98761e-05, gnorm=1.871, loss_scale=16, train_wall=11, gb_free=2.8, wall=189886
2021-06-20 23:23:43 | INFO | train_inner | epoch 006: 1571 / 3002 loss=2.48, ppl=5.58, wps=5957.1, ups=0.09, wpb=64956, bsz=128, num_updates=16485, lr=9.98761e-05, gnorm=1.917, loss_scale=16, train_wall=10, gb_free=2.8, wall=189897
2021-06-20 23:23:54 | INFO | train_inner | epoch 006: 1572 / 3002 loss=2.493, ppl=5.63, wps=5789.4, ups=0.09, wpb=64839, bsz=128, num_updates=16486, lr=9.98761e-05, gnorm=1.984, loss_scale=16, train_wall=11, gb_free=2.8, wall=189909
2021-06-20 23:24:05 | INFO | train_inner | epoch 006: 1573 / 3002 loss=2.367, ppl=5.16, wps=5848.8, ups=0.09, wpb=64809, bsz=128, num_updates=16487, lr=9.98761e-05, gnorm=1.942, loss_scale=16, train_wall=11, gb_free=2.8, wall=189920
2021-06-20 23:24:16 | INFO | train_inner | epoch 006: 1574 / 3002 loss=2.362, ppl=5.14, wps=5793, ups=0.09, wpb=64886, bsz=128, num_updates=16488, lr=9.98761e-05, gnorm=1.99, loss_scale=16, train_wall=11, gb_free=2.8, wall=189931
2021-06-20 23:24:28 | INFO | train_inner | epoch 006: 1575 / 3002 loss=2.362, ppl=5.14, wps=5791.7, ups=0.09, wpb=64885, bsz=128, num_updates=16489, lr=9.98761e-05, gnorm=1.833, loss_scale=16, train_wall=11, gb_free=2.8, wall=189942
2021-06-20 23:24:39 | INFO | train_inner | epoch 006: 1576 / 3002 loss=2.481, ppl=5.58, wps=5899.9, ups=0.09, wpb=64883, bsz=128, num_updates=16490, lr=9.98761e-05, gnorm=1.927, loss_scale=16, train_wall=11, gb_free=2.8, wall=189953
2021-06-20 23:24:50 | INFO | train_inner | epoch 006: 1577 / 3002 loss=2.471, ppl=5.54, wps=5841.1, ups=0.09, wpb=64873, bsz=128, num_updates=16491, lr=9.98761e-05, gnorm=1.939, loss_scale=16, train_wall=11, gb_free=2.8, wall=189964
2021-06-20 23:25:01 | INFO | train_inner | epoch 006: 1578 / 3002 loss=2.509, ppl=5.69, wps=5919.6, ups=0.09, wpb=64865, bsz=128, num_updates=16492, lr=9.98761e-05, gnorm=1.952, loss_scale=16, train_wall=10, gb_free=2.8, wall=189975
2021-06-20 23:25:12 | INFO | train_inner | epoch 006: 1579 / 3002 loss=2.624, ppl=6.16, wps=5780.6, ups=0.09, wpb=64792, bsz=128, num_updates=16493, lr=9.9876e-05, gnorm=1.944, loss_scale=16, train_wall=11, gb_free=2.8, wall=189986
2021-06-20 23:25:23 | INFO | train_inner | epoch 006: 1580 / 3002 loss=2.483, ppl=5.59, wps=5909.3, ups=0.09, wpb=64796, bsz=128, num_updates=16494, lr=9.9876e-05, gnorm=1.853, loss_scale=16, train_wall=11, gb_free=2.8, wall=189997
2021-06-20 23:25:34 | INFO | train_inner | epoch 006: 1581 / 3002 loss=2.348, ppl=5.09, wps=5867.9, ups=0.09, wpb=64829, bsz=128, num_updates=16495, lr=9.9876e-05, gnorm=1.898, loss_scale=16, train_wall=11, gb_free=2.8, wall=190008
2021-06-20 23:25:45 | INFO | train_inner | epoch 006: 1582 / 3002 loss=2.357, ppl=5.12, wps=5858.9, ups=0.09, wpb=64828, bsz=128, num_updates=16496, lr=9.9876e-05, gnorm=1.874, loss_scale=16, train_wall=11, gb_free=2.8, wall=190019
2021-06-20 23:25:56 | INFO | train_inner | epoch 006: 1583 / 3002 loss=2.446, ppl=5.45, wps=5731.6, ups=0.09, wpb=64617, bsz=128, num_updates=16497, lr=9.9876e-05, gnorm=1.834, loss_scale=16, train_wall=11, gb_free=2.8, wall=190031
2021-06-20 23:26:07 | INFO | train_inner | epoch 006: 1584 / 3002 loss=2.463, ppl=5.51, wps=5915.9, ups=0.09, wpb=64842, bsz=128, num_updates=16498, lr=9.9876e-05, gnorm=1.931, loss_scale=16, train_wall=10, gb_free=2.8, wall=190042
2021-06-20 23:26:18 | INFO | train_inner | epoch 006: 1585 / 3002 loss=2.443, ppl=5.44, wps=5831.5, ups=0.09, wpb=64818, bsz=128, num_updates=16499, lr=9.9876e-05, gnorm=2.721, loss_scale=16, train_wall=11, gb_free=2.8, wall=190053
2021-06-20 23:26:30 | INFO | train_inner | epoch 006: 1586 / 3002 loss=2.608, ppl=6.1, wps=5812, ups=0.09, wpb=64728, bsz=128, num_updates=16500, lr=9.9876e-05, gnorm=1.899, loss_scale=16, train_wall=11, gb_free=2.8, wall=190064
2021-06-20 23:26:41 | INFO | train_inner | epoch 006: 1587 / 3002 loss=2.355, ppl=5.12, wps=5796.7, ups=0.09, wpb=64770, bsz=128, num_updates=16501, lr=9.9876e-05, gnorm=1.886, loss_scale=16, train_wall=11, gb_free=2.8, wall=190075
2021-06-20 23:26:52 | INFO | train_inner | epoch 006: 1588 / 3002 loss=2.611, ppl=6.11, wps=5848.4, ups=0.09, wpb=64800, bsz=128, num_updates=16502, lr=9.9876e-05, gnorm=1.998, loss_scale=16, train_wall=11, gb_free=2.8, wall=190086
2021-06-20 23:27:03 | INFO | train_inner | epoch 006: 1589 / 3002 loss=2.47, ppl=5.54, wps=5945.5, ups=0.09, wpb=64809, bsz=128, num_updates=16503, lr=9.9876e-05, gnorm=1.899, loss_scale=16, train_wall=10, gb_free=2.8, wall=190097
2021-06-20 23:27:14 | INFO | train_inner | epoch 006: 1590 / 3002 loss=2.502, ppl=5.67, wps=5915.9, ups=0.09, wpb=64918, bsz=128, num_updates=16504, lr=9.9876e-05, gnorm=2.005, loss_scale=16, train_wall=10, gb_free=2.8, wall=190108
2021-06-20 23:27:25 | INFO | train_inner | epoch 006: 1591 / 3002 loss=2.546, ppl=5.84, wps=5717.1, ups=0.09, wpb=64784, bsz=128, num_updates=16505, lr=9.9876e-05, gnorm=1.933, loss_scale=16, train_wall=11, gb_free=2.8, wall=190119
2021-06-20 23:27:36 | INFO | train_inner | epoch 006: 1592 / 3002 loss=2.373, ppl=5.18, wps=5868, ups=0.09, wpb=64872, bsz=128, num_updates=16506, lr=9.98759e-05, gnorm=2.279, loss_scale=16, train_wall=11, gb_free=2.8, wall=190130
2021-06-20 23:27:47 | INFO | train_inner | epoch 006: 1593 / 3002 loss=2.445, ppl=5.45, wps=5833.5, ups=0.09, wpb=64658, bsz=128, num_updates=16507, lr=9.98759e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=190141
2021-06-20 23:27:58 | INFO | train_inner | epoch 006: 1594 / 3002 loss=2.529, ppl=5.77, wps=5878.6, ups=0.09, wpb=64845, bsz=128, num_updates=16508, lr=9.98759e-05, gnorm=1.978, loss_scale=16, train_wall=11, gb_free=2.8, wall=190153
2021-06-20 23:28:09 | INFO | train_inner | epoch 006: 1595 / 3002 loss=2.486, ppl=5.6, wps=5792.1, ups=0.09, wpb=64807, bsz=128, num_updates=16509, lr=9.98759e-05, gnorm=2.938, loss_scale=16, train_wall=11, gb_free=2.8, wall=190164
2021-06-20 23:28:20 | INFO | train_inner | epoch 006: 1596 / 3002 loss=2.492, ppl=5.62, wps=5875.2, ups=0.09, wpb=64826, bsz=128, num_updates=16510, lr=9.98759e-05, gnorm=2.019, loss_scale=16, train_wall=11, gb_free=2.8, wall=190175
2021-06-20 23:28:32 | INFO | train_inner | epoch 006: 1597 / 3002 loss=2.414, ppl=5.33, wps=5827.1, ups=0.09, wpb=64795, bsz=128, num_updates=16511, lr=9.98759e-05, gnorm=1.912, loss_scale=32, train_wall=11, gb_free=2.8, wall=190186
2021-06-20 23:28:42 | INFO | train_inner | epoch 006: 1598 / 3002 loss=2.467, ppl=5.53, wps=5917.9, ups=0.09, wpb=64826, bsz=128, num_updates=16512, lr=9.98759e-05, gnorm=2.093, loss_scale=32, train_wall=11, gb_free=2.8, wall=190197
2021-06-20 23:28:53 | INFO | train_inner | epoch 006: 1599 / 3002 loss=2.511, ppl=5.7, wps=5908.9, ups=0.09, wpb=64912, bsz=128, num_updates=16513, lr=9.98759e-05, gnorm=1.954, loss_scale=32, train_wall=11, gb_free=2.8, wall=190208
2021-06-20 23:29:05 | INFO | train_inner | epoch 006: 1600 / 3002 loss=2.523, ppl=5.75, wps=5715.8, ups=0.09, wpb=64769, bsz=128, num_updates=16514, lr=9.98759e-05, gnorm=1.933, loss_scale=32, train_wall=11, gb_free=2.8, wall=190219
2021-06-20 23:29:16 | INFO | train_inner | epoch 006: 1601 / 3002 loss=2.535, ppl=5.8, wps=5849.6, ups=0.09, wpb=64921, bsz=128, num_updates=16515, lr=9.98759e-05, gnorm=1.896, loss_scale=32, train_wall=11, gb_free=2.8, wall=190230
2021-06-20 23:29:27 | INFO | train_inner | epoch 006: 1602 / 3002 loss=2.617, ppl=6.14, wps=5846.1, ups=0.09, wpb=64839, bsz=128, num_updates=16516, lr=9.98759e-05, gnorm=1.896, loss_scale=32, train_wall=11, gb_free=2.8, wall=190241
2021-06-20 23:29:38 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0
2021-06-20 23:29:49 | INFO | train_inner | epoch 006: 1604 / 3002 loss=2.328, ppl=5.02, wps=2928.2, ups=0.05, wpb=64830, bsz=128, num_updates=16517, lr=9.98759e-05, gnorm=1.899, loss_scale=16, train_wall=21, gb_free=2.8, wall=190263
2021-06-20 23:30:00 | INFO | train_inner | epoch 006: 1605 / 3002 loss=2.659, ppl=6.32, wps=5889, ups=0.09, wpb=64765, bsz=128, num_updates=16518, lr=9.98758e-05, gnorm=1.938, loss_scale=16, train_wall=11, gb_free=2.8, wall=190274
2021-06-20 23:30:11 | INFO | train_inner | epoch 006: 1606 / 3002 loss=2.503, ppl=5.67, wps=5862.2, ups=0.09, wpb=64803, bsz=128, num_updates=16519, lr=9.98758e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=190286
2021-06-20 23:30:22 | INFO | train_inner | epoch 006: 1607 / 3002 loss=2.37, ppl=5.17, wps=5912.1, ups=0.09, wpb=64894, bsz=128, num_updates=16520, lr=9.98758e-05, gnorm=1.901, loss_scale=16, train_wall=11, gb_free=2.8, wall=190296
2021-06-20 23:30:33 | INFO | train_inner | epoch 006: 1608 / 3002 loss=2.331, ppl=5.03, wps=5817.5, ups=0.09, wpb=64838, bsz=128, num_updates=16521, lr=9.98758e-05, gnorm=1.963, loss_scale=16, train_wall=11, gb_free=2.8, wall=190308
2021-06-20 23:30:44 | INFO | train_inner | epoch 006: 1609 / 3002 loss=2.555, ppl=5.88, wps=5797.1, ups=0.09, wpb=64781, bsz=128, num_updates=16522, lr=9.98758e-05, gnorm=2.128, loss_scale=16, train_wall=11, gb_free=2.8, wall=190319
2021-06-20 23:30:56 | INFO | train_inner | epoch 006: 1610 / 3002 loss=2.521, ppl=5.74, wps=5801.4, ups=0.09, wpb=64741, bsz=128, num_updates=16523, lr=9.98758e-05, gnorm=1.908, loss_scale=16, train_wall=11, gb_free=2.8, wall=190330
2021-06-20 23:31:07 | INFO | train_inner | epoch 006: 1611 / 3002 loss=2.433, ppl=5.4, wps=5911.3, ups=0.09, wpb=64871, bsz=128, num_updates=16524, lr=9.98758e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=190341
2021-06-20 23:31:18 | INFO | train_inner | epoch 006: 1612 / 3002 loss=2.505, ppl=5.68, wps=5856.9, ups=0.09, wpb=64839, bsz=128, num_updates=16525, lr=9.98758e-05, gnorm=1.919, loss_scale=16, train_wall=11, gb_free=2.8, wall=190352
2021-06-20 23:31:29 | INFO | train_inner | epoch 006: 1613 / 3002 loss=2.416, ppl=5.34, wps=5811.5, ups=0.09, wpb=64910, bsz=128, num_updates=16526, lr=9.98758e-05, gnorm=1.866, loss_scale=16, train_wall=11, gb_free=2.8, wall=190363
2021-06-20 23:31:40 | INFO | train_inner | epoch 006: 1614 / 3002 loss=2.452, ppl=5.47, wps=5959.8, ups=0.09, wpb=64866, bsz=128, num_updates=16527, lr=9.98758e-05, gnorm=1.866, loss_scale=16, train_wall=10, gb_free=2.8, wall=190374
2021-06-20 23:31:51 | INFO | train_inner | epoch 006: 1615 / 3002 loss=2.403, ppl=5.29, wps=5836, ups=0.09, wpb=64848, bsz=128, num_updates=16528, lr=9.98758e-05, gnorm=1.897, loss_scale=16, train_wall=11, gb_free=2.8, wall=190385
2021-06-20 23:32:02 | INFO | train_inner | epoch 006: 1616 / 3002 loss=2.574, ppl=5.95, wps=5753.7, ups=0.09, wpb=64821, bsz=128, num_updates=16529, lr=9.98758e-05, gnorm=1.83, loss_scale=16, train_wall=11, gb_free=2.8, wall=190396
2021-06-20 23:32:13 | INFO | train_inner | epoch 006: 1617 / 3002 loss=2.368, ppl=5.16, wps=5843.5, ups=0.09, wpb=64862, bsz=128, num_updates=16530, lr=9.98758e-05, gnorm=2.016, loss_scale=16, train_wall=11, gb_free=2.8, wall=190408
2021-06-20 23:32:24 | INFO | train_inner | epoch 006: 1618 / 3002 loss=2.349, ppl=5.1, wps=5875.6, ups=0.09, wpb=64780, bsz=128, num_updates=16531, lr=9.98757e-05, gnorm=1.868, loss_scale=16, train_wall=11, gb_free=2.8, wall=190419
2021-06-20 23:32:35 | INFO | train_inner | epoch 006: 1619 / 3002 loss=2.42, ppl=5.35, wps=5828.1, ups=0.09, wpb=64845, bsz=128, num_updates=16532, lr=9.98757e-05, gnorm=1.875, loss_scale=16, train_wall=11, gb_free=2.8, wall=190430
2021-06-20 23:32:46 | INFO | train_inner | epoch 006: 1620 / 3002 loss=2.393, ppl=5.25, wps=5940.9, ups=0.09, wpb=64881, bsz=128, num_updates=16533, lr=9.98757e-05, gnorm=1.906, loss_scale=16, train_wall=10, gb_free=2.8, wall=190441
2021-06-20 23:32:57 | INFO | train_inner | epoch 006: 1621 / 3002 loss=2.468, ppl=5.53, wps=5860.2, ups=0.09, wpb=64880, bsz=128, num_updates=16534, lr=9.98757e-05, gnorm=2.076, loss_scale=16, train_wall=11, gb_free=2.8, wall=190452
2021-06-20 23:33:09 | INFO | train_inner | epoch 006: 1622 / 3002 loss=2.507, ppl=5.69, wps=5792.3, ups=0.09, wpb=64867, bsz=128, num_updates=16535, lr=9.98757e-05, gnorm=2.05, loss_scale=16, train_wall=11, gb_free=2.8, wall=190463
2021-06-20 23:33:20 | INFO | train_inner | epoch 006: 1623 / 3002 loss=2.386, ppl=5.23, wps=5739.8, ups=0.09, wpb=64799, bsz=128, num_updates=16536, lr=9.98757e-05, gnorm=1.911, loss_scale=16, train_wall=11, gb_free=2.8, wall=190474
2021-06-20 23:33:31 | INFO | train_inner | epoch 006: 1624 / 3002 loss=2.561, ppl=5.9, wps=5811.5, ups=0.09, wpb=64746, bsz=128, num_updates=16537, lr=9.98757e-05, gnorm=1.9, loss_scale=16, train_wall=11, gb_free=2.8, wall=190485
2021-06-20 23:33:42 | INFO | train_inner | epoch 006: 1625 / 3002 loss=2.401, ppl=5.28, wps=5729.5, ups=0.09, wpb=64756, bsz=128, num_updates=16538, lr=9.98757e-05, gnorm=3.747, loss_scale=16, train_wall=11, gb_free=2.8, wall=190497
2021-06-20 23:33:54 | INFO | train_inner | epoch 006: 1626 / 3002 loss=2.402, ppl=5.29, wps=5737.9, ups=0.09, wpb=64743, bsz=128, num_updates=16539, lr=9.98757e-05, gnorm=2.048, loss_scale=16, train_wall=11, gb_free=2.8, wall=190508
2021-06-20 23:34:05 | INFO | train_inner | epoch 006: 1627 / 3002 loss=2.405, ppl=5.3, wps=5825, ups=0.09, wpb=64803, bsz=128, num_updates=16540, lr=9.98757e-05, gnorm=1.904, loss_scale=16, train_wall=11, gb_free=2.8, wall=190519
2021-06-20 23:34:16 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0
2021-06-20 23:34:27 | INFO | train_inner | epoch 006: 1629 / 3002 loss=2.576, ppl=5.96, wps=2897.3, ups=0.04, wpb=64877, bsz=128, num_updates=16541, lr=9.98757e-05, gnorm=1.955, loss_scale=8, train_wall=21, gb_free=2.8, wall=190541
2021-06-20 23:34:38 | INFO | train_inner | epoch 006: 1630 / 3002 loss=2.416, ppl=5.34, wps=5850.6, ups=0.09, wpb=64866, bsz=128, num_updates=16542, lr=9.98757e-05, gnorm=1.891, loss_scale=8, train_wall=11, gb_free=2.8, wall=190553
2021-06-20 23:34:49 | INFO | train_inner | epoch 006: 1631 / 3002 loss=2.413, ppl=5.33, wps=5888.2, ups=0.09, wpb=64870, bsz=128, num_updates=16543, lr=9.98756e-05, gnorm=1.958, loss_scale=8, train_wall=11, gb_free=2.8, wall=190564
2021-06-20 23:35:00 | INFO | train_inner | epoch 006: 1632 / 3002 loss=2.435, ppl=5.41, wps=5801.5, ups=0.09, wpb=64836, bsz=128, num_updates=16544, lr=9.98756e-05, gnorm=1.861, loss_scale=8, train_wall=11, gb_free=2.8, wall=190575
2021-06-20 23:35:12 | INFO | train_inner | epoch 006: 1633 / 3002 loss=2.583, ppl=5.99, wps=5794.3, ups=0.09, wpb=64845, bsz=128, num_updates=16545, lr=9.98756e-05, gnorm=1.986, loss_scale=8, train_wall=11, gb_free=2.8, wall=190586
2021-06-20 23:35:23 | INFO | train_inner | epoch 006: 1634 / 3002 loss=2.532, ppl=5.78, wps=5801.7, ups=0.09, wpb=64793, bsz=128, num_updates=16546, lr=9.98756e-05, gnorm=1.858, loss_scale=8, train_wall=11, gb_free=2.8, wall=190597
2021-06-20 23:35:34 | INFO | train_inner | epoch 006: 1635 / 3002 loss=2.49, ppl=5.62, wps=5900.5, ups=0.09, wpb=64808, bsz=128, num_updates=16547, lr=9.98756e-05, gnorm=1.946, loss_scale=8, train_wall=11, gb_free=2.8, wall=190608
2021-06-20 23:35:45 | INFO | train_inner | epoch 006: 1636 / 3002 loss=2.506, ppl=5.68, wps=5876, ups=0.09, wpb=64841, bsz=128, num_updates=16548, lr=9.98756e-05, gnorm=2.004, loss_scale=8, train_wall=11, gb_free=2.8, wall=190619
2021-06-20 23:35:56 | INFO | train_inner | epoch 006: 1637 / 3002 loss=2.543, ppl=5.83, wps=5783.3, ups=0.09, wpb=64840, bsz=128, num_updates=16549, lr=9.98756e-05, gnorm=1.957, loss_scale=8, train_wall=11, gb_free=2.8, wall=190630
2021-06-20 23:36:07 | INFO | train_inner | epoch 006: 1638 / 3002 loss=2.459, ppl=5.5, wps=5997.5, ups=0.09, wpb=64835, bsz=128, num_updates=16550, lr=9.98756e-05, gnorm=1.96, loss_scale=8, train_wall=10, gb_free=2.8, wall=190641
2021-06-20 23:36:18 | INFO | train_inner | epoch 006: 1639 / 3002 loss=2.489, ppl=5.61, wps=5815.9, ups=0.09, wpb=64815, bsz=128, num_updates=16551, lr=9.98756e-05, gnorm=2.031, loss_scale=8, train_wall=11, gb_free=2.8, wall=190652
2021-06-20 23:36:29 | INFO | train_inner | epoch 006: 1640 / 3002 loss=2.539, ppl=5.81, wps=5902.9, ups=0.09, wpb=64849, bsz=128, num_updates=16552, lr=9.98756e-05, gnorm=1.94, loss_scale=8, train_wall=11, gb_free=2.8, wall=190663
2021-06-20 23:36:40 | INFO | train_inner | epoch 006: 1641 / 3002 loss=2.46, ppl=5.5, wps=5728.4, ups=0.09, wpb=64801, bsz=128, num_updates=16553, lr=9.98756e-05, gnorm=1.989, loss_scale=8, train_wall=11, gb_free=2.8, wall=190675
2021-06-20 23:36:51 | INFO | train_inner | epoch 006: 1642 / 3002 loss=2.567, ppl=5.93, wps=5911.8, ups=0.09, wpb=64874, bsz=128, num_updates=16554, lr=9.98756e-05, gnorm=1.954, loss_scale=8, train_wall=11, gb_free=2.8, wall=190686
2021-06-20 23:37:02 | INFO | train_inner | epoch 006: 1643 / 3002 loss=2.314, ppl=4.97, wps=5746, ups=0.09, wpb=64775, bsz=128, num_updates=16555, lr=9.98756e-05, gnorm=1.869, loss_scale=8, train_wall=11, gb_free=2.8, wall=190697
2021-06-20 23:37:14 | INFO | train_inner | epoch 006: 1644 / 3002 loss=2.496, ppl=5.64, wps=5771.8, ups=0.09, wpb=64853, bsz=128, num_updates=16556, lr=9.98755e-05, gnorm=1.831, loss_scale=8, train_wall=11, gb_free=2.8, wall=190708
2021-06-20 23:37:25 | INFO | train_inner | epoch 006: 1645 / 3002 loss=2.517, ppl=5.72, wps=5886.6, ups=0.09, wpb=64873, bsz=128, num_updates=16557, lr=9.98755e-05, gnorm=1.96, loss_scale=8, train_wall=11, gb_free=2.8, wall=190719
2021-06-20 23:37:36 | INFO | train_inner | epoch 006: 1646 / 3002 loss=2.434, ppl=5.4, wps=5848.6, ups=0.09, wpb=64793, bsz=128, num_updates=16558, lr=9.98755e-05, gnorm=1.859, loss_scale=8, train_wall=11, gb_free=2.8, wall=190730
2021-06-20 23:37:47 | INFO | train_inner | epoch 006: 1647 / 3002 loss=2.585, ppl=6, wps=5840.7, ups=0.09, wpb=64783, bsz=128, num_updates=16559, lr=9.98755e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=190741
2021-06-20 23:37:58 | INFO | train_inner | epoch 006: 1648 / 3002 loss=2.377, ppl=5.2, wps=5812.6, ups=0.09, wpb=64806, bsz=128, num_updates=16560, lr=9.98755e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=190752
2021-06-20 23:38:09 | INFO | train_inner | epoch 006: 1649 / 3002 loss=2.468, ppl=5.53, wps=5891.8, ups=0.09, wpb=64820, bsz=128, num_updates=16561, lr=9.98755e-05, gnorm=2.02, loss_scale=8, train_wall=11, gb_free=2.8, wall=190763
2021-06-20 23:38:20 | INFO | train_inner | epoch 006: 1650 / 3002 loss=2.394, ppl=5.26, wps=5907.4, ups=0.09, wpb=64860, bsz=128, num_updates=16562, lr=9.98755e-05, gnorm=2.038, loss_scale=8, train_wall=11, gb_free=2.8, wall=190774
2021-06-20 23:38:31 | INFO | train_inner | epoch 006: 1651 / 3002 loss=2.5, ppl=5.65, wps=5873.2, ups=0.09, wpb=64818, bsz=128, num_updates=16563, lr=9.98755e-05, gnorm=1.942, loss_scale=8, train_wall=11, gb_free=2.8, wall=190785
2021-06-20 23:38:42 | INFO | train_inner | epoch 006: 1652 / 3002 loss=2.487, ppl=5.61, wps=5898.3, ups=0.09, wpb=64791, bsz=128, num_updates=16564, lr=9.98755e-05, gnorm=1.919, loss_scale=8, train_wall=11, gb_free=2.8, wall=190796
2021-06-20 23:38:53 | INFO | train_inner | epoch 006: 1653 / 3002 loss=2.538, ppl=5.81, wps=5801.4, ups=0.09, wpb=64786, bsz=128, num_updates=16565, lr=9.98755e-05, gnorm=1.937, loss_scale=8, train_wall=11, gb_free=2.8, wall=190808
2021-06-20 23:39:04 | INFO | train_inner | epoch 006: 1654 / 3002 loss=2.408, ppl=5.31, wps=5853.4, ups=0.09, wpb=64750, bsz=128, num_updates=16566, lr=9.98755e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=190819
2021-06-20 23:39:16 | INFO | train_inner | epoch 006: 1655 / 3002 loss=2.559, ppl=5.89, wps=5724.8, ups=0.09, wpb=64770, bsz=128, num_updates=16567, lr=9.98755e-05, gnorm=1.959, loss_scale=8, train_wall=11, gb_free=2.8, wall=190830
2021-06-20 23:39:27 | INFO | train_inner | epoch 006: 1656 / 3002 loss=2.358, ppl=5.13, wps=5822, ups=0.09, wpb=64830, bsz=128, num_updates=16568, lr=9.98754e-05, gnorm=1.924, loss_scale=8, train_wall=11, gb_free=2.8, wall=190841
2021-06-20 23:39:38 | INFO | train_inner | epoch 006: 1657 / 3002 loss=2.453, ppl=5.48, wps=5869.7, ups=0.09, wpb=64821, bsz=128, num_updates=16569, lr=9.98754e-05, gnorm=2.083, loss_scale=8, train_wall=11, gb_free=2.8, wall=190852
2021-06-20 23:39:49 | INFO | train_inner | epoch 006: 1658 / 3002 loss=2.445, ppl=5.44, wps=5811.6, ups=0.09, wpb=64828, bsz=128, num_updates=16570, lr=9.98754e-05, gnorm=1.951, loss_scale=8, train_wall=11, gb_free=2.8, wall=190863
2021-06-20 23:40:00 | INFO | train_inner | epoch 006: 1659 / 3002 loss=2.511, ppl=5.7, wps=5911.6, ups=0.09, wpb=64770, bsz=128, num_updates=16571, lr=9.98754e-05, gnorm=1.923, loss_scale=8, train_wall=10, gb_free=2.8, wall=190874
2021-06-20 23:40:11 | INFO | train_inner | epoch 006: 1660 / 3002 loss=2.491, ppl=5.62, wps=5935, ups=0.09, wpb=64843, bsz=128, num_updates=16572, lr=9.98754e-05, gnorm=1.984, loss_scale=8, train_wall=10, gb_free=2.8, wall=190885
2021-06-20 23:40:22 | INFO | train_inner | epoch 006: 1661 / 3002 loss=2.36, ppl=5.13, wps=5935.7, ups=0.09, wpb=64804, bsz=128, num_updates=16573, lr=9.98754e-05, gnorm=1.892, loss_scale=8, train_wall=10, gb_free=2.8, wall=190896
2021-06-20 23:40:33 | INFO | train_inner | epoch 006: 1662 / 3002 loss=2.466, ppl=5.53, wps=5790.6, ups=0.09, wpb=64848, bsz=128, num_updates=16574, lr=9.98754e-05, gnorm=1.929, loss_scale=8, train_wall=11, gb_free=2.8, wall=190907
2021-06-20 23:40:44 | INFO | train_inner | epoch 006: 1663 / 3002 loss=2.502, ppl=5.67, wps=6018.1, ups=0.09, wpb=64884, bsz=128, num_updates=16575, lr=9.98754e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=190918
2021-06-20 23:40:55 | INFO | train_inner | epoch 006: 1664 / 3002 loss=2.442, ppl=5.43, wps=5981.4, ups=0.09, wpb=64829, bsz=128, num_updates=16576, lr=9.98754e-05, gnorm=1.93, loss_scale=8, train_wall=10, gb_free=2.8, wall=190929
2021-06-20 23:41:06 | INFO | train_inner | epoch 006: 1665 / 3002 loss=2.355, ppl=5.11, wps=5857, ups=0.09, wpb=64766, bsz=128, num_updates=16577, lr=9.98754e-05, gnorm=1.918, loss_scale=8, train_wall=11, gb_free=2.8, wall=190940
2021-06-20 23:41:17 | INFO | train_inner | epoch 006: 1666 / 3002 loss=2.367, ppl=5.16, wps=5916.7, ups=0.09, wpb=64887, bsz=128, num_updates=16578, lr=9.98754e-05, gnorm=1.913, loss_scale=8, train_wall=11, gb_free=2.8, wall=190951
2021-06-20 23:41:27 | INFO | train_inner | epoch 006: 1667 / 3002 loss=2.566, ppl=5.92, wps=5943.4, ups=0.09, wpb=64825, bsz=128, num_updates=16579, lr=9.98754e-05, gnorm=1.953, loss_scale=8, train_wall=10, gb_free=2.8, wall=190962
2021-06-20 23:41:39 | INFO | train_inner | epoch 006: 1668 / 3002 loss=2.693, ppl=6.47, wps=5855.9, ups=0.09, wpb=64830, bsz=128, num_updates=16580, lr=9.98754e-05, gnorm=1.975, loss_scale=8, train_wall=11, gb_free=2.8, wall=190973
2021-06-20 23:41:50 | INFO | train_inner | epoch 006: 1669 / 3002 loss=2.568, ppl=5.93, wps=5731.2, ups=0.09, wpb=64807, bsz=128, num_updates=16581, lr=9.98753e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=190984
2021-06-20 23:42:01 | INFO | train_inner | epoch 006: 1670 / 3002 loss=2.432, ppl=5.4, wps=5732.7, ups=0.09, wpb=64770, bsz=128, num_updates=16582, lr=9.98753e-05, gnorm=2.43, loss_scale=8, train_wall=11, gb_free=2.8, wall=190995
2021-06-20 23:42:12 | INFO | train_inner | epoch 006: 1671 / 3002 loss=2.431, ppl=5.39, wps=5893.5, ups=0.09, wpb=64785, bsz=128, num_updates=16583, lr=9.98753e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=191006
2021-06-20 23:42:23 | INFO | train_inner | epoch 006: 1672 / 3002 loss=2.712, ppl=6.55, wps=5837.1, ups=0.09, wpb=64782, bsz=128, num_updates=16584, lr=9.98753e-05, gnorm=2.048, loss_scale=8, train_wall=11, gb_free=2.8, wall=191018
2021-06-20 23:42:35 | INFO | train_inner | epoch 006: 1673 / 3002 loss=2.459, ppl=5.5, wps=5721.6, ups=0.09, wpb=64901, bsz=128, num_updates=16585, lr=9.98753e-05, gnorm=1.917, loss_scale=8, train_wall=11, gb_free=2.8, wall=191029
2021-06-20 23:42:45 | INFO | train_inner | epoch 006: 1674 / 3002 loss=2.431, ppl=5.39, wps=6029.9, ups=0.09, wpb=64891, bsz=128, num_updates=16586, lr=9.98753e-05, gnorm=1.95, loss_scale=8, train_wall=10, gb_free=2.8, wall=191040
2021-06-20 23:42:56 | INFO | train_inner | epoch 006: 1675 / 3002 loss=2.432, ppl=5.4, wps=5890.1, ups=0.09, wpb=64821, bsz=128, num_updates=16587, lr=9.98753e-05, gnorm=2.032, loss_scale=8, train_wall=11, gb_free=2.8, wall=191051
2021-06-20 23:43:07 | INFO | train_inner | epoch 006: 1676 / 3002 loss=2.444, ppl=5.44, wps=5857.4, ups=0.09, wpb=64881, bsz=128, num_updates=16588, lr=9.98753e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=191062
2021-06-20 23:43:19 | INFO | train_inner | epoch 006: 1677 / 3002 loss=2.446, ppl=5.45, wps=5713.7, ups=0.09, wpb=64847, bsz=128, num_updates=16589, lr=9.98753e-05, gnorm=1.915, loss_scale=8, train_wall=11, gb_free=2.8, wall=191073
2021-06-20 23:43:30 | INFO | train_inner | epoch 006: 1678 / 3002 loss=2.521, ppl=5.74, wps=5823.7, ups=0.09, wpb=64867, bsz=128, num_updates=16590, lr=9.98753e-05, gnorm=1.901, loss_scale=8, train_wall=11, gb_free=2.8, wall=191084
2021-06-20 23:43:41 | INFO | train_inner | epoch 006: 1679 / 3002 loss=2.523, ppl=5.75, wps=5798.7, ups=0.09, wpb=64754, bsz=128, num_updates=16591, lr=9.98753e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=191095
2021-06-20 23:43:52 | INFO | train_inner | epoch 006: 1680 / 3002 loss=2.629, ppl=6.19, wps=5824.1, ups=0.09, wpb=64854, bsz=128, num_updates=16592, lr=9.98753e-05, gnorm=2.058, loss_scale=8, train_wall=11, gb_free=2.8, wall=191107
2021-06-20 23:44:03 | INFO | train_inner | epoch 006: 1681 / 3002 loss=2.521, ppl=5.74, wps=5810.2, ups=0.09, wpb=64916, bsz=128, num_updates=16593, lr=9.98752e-05, gnorm=1.857, loss_scale=8, train_wall=11, gb_free=2.8, wall=191118
2021-06-20 23:44:15 | INFO | train_inner | epoch 006: 1682 / 3002 loss=2.615, ppl=6.13, wps=5784.5, ups=0.09, wpb=64784, bsz=128, num_updates=16594, lr=9.98752e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=191129
2021-06-20 23:44:25 | INFO | train_inner | epoch 006: 1683 / 3002 loss=2.558, ppl=5.89, wps=5951.2, ups=0.09, wpb=64808, bsz=128, num_updates=16595, lr=9.98752e-05, gnorm=1.927, loss_scale=8, train_wall=10, gb_free=2.8, wall=191140
2021-06-20 23:44:36 | INFO | train_inner | epoch 006: 1684 / 3002 loss=2.415, ppl=5.33, wps=5908.3, ups=0.09, wpb=64933, bsz=128, num_updates=16596, lr=9.98752e-05, gnorm=1.877, loss_scale=8, train_wall=11, gb_free=2.8, wall=191151
2021-06-20 23:44:47 | INFO | train_inner | epoch 006: 1685 / 3002 loss=2.496, ppl=5.64, wps=5895.8, ups=0.09, wpb=64826, bsz=128, num_updates=16597, lr=9.98752e-05, gnorm=1.916, loss_scale=8, train_wall=11, gb_free=2.8, wall=191162
2021-06-20 23:44:58 | INFO | train_inner | epoch 006: 1686 / 3002 loss=2.32, ppl=4.99, wps=5915.8, ups=0.09, wpb=64907, bsz=128, num_updates=16598, lr=9.98752e-05, gnorm=1.813, loss_scale=8, train_wall=11, gb_free=2.8, wall=191173
2021-06-20 23:45:09 | INFO | train_inner | epoch 006: 1687 / 3002 loss=2.478, ppl=5.57, wps=5929.2, ups=0.09, wpb=64858, bsz=128, num_updates=16599, lr=9.98752e-05, gnorm=1.853, loss_scale=8, train_wall=10, gb_free=2.8, wall=191184
2021-06-20 23:45:21 | INFO | train_inner | epoch 006: 1688 / 3002 loss=2.524, ppl=5.75, wps=5806.3, ups=0.09, wpb=64844, bsz=128, num_updates=16600, lr=9.98752e-05, gnorm=2.192, loss_scale=8, train_wall=11, gb_free=2.8, wall=191195
2021-06-20 23:45:32 | INFO | train_inner | epoch 006: 1689 / 3002 loss=2.532, ppl=5.78, wps=5840.4, ups=0.09, wpb=64708, bsz=128, num_updates=16601, lr=9.98752e-05, gnorm=1.887, loss_scale=8, train_wall=11, gb_free=2.8, wall=191206
2021-06-20 23:45:43 | INFO | train_inner | epoch 006: 1690 / 3002 loss=2.429, ppl=5.38, wps=5819.5, ups=0.09, wpb=64840, bsz=128, num_updates=16602, lr=9.98752e-05, gnorm=1.925, loss_scale=8, train_wall=11, gb_free=2.8, wall=191217
2021-06-20 23:45:54 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-20 23:46:05 | INFO | train_inner | epoch 006: 1692 / 3002 loss=2.463, ppl=5.51, wps=2899.6, ups=0.04, wpb=64815, bsz=128, num_updates=16603, lr=9.98752e-05, gnorm=1.922, loss_scale=4, train_wall=21, gb_free=2.8, wall=191239
2021-06-20 23:46:16 | INFO | train_inner | epoch 006: 1693 / 3002 loss=2.538, ppl=5.81, wps=5808.4, ups=0.09, wpb=64861, bsz=128, num_updates=16604, lr=9.98752e-05, gnorm=1.986, loss_scale=4, train_wall=11, gb_free=2.8, wall=191251
2021-06-20 23:46:27 | INFO | train_inner | epoch 006: 1694 / 3002 loss=2.323, ppl=5, wps=5826.4, ups=0.09, wpb=64868, bsz=128, num_updates=16605, lr=9.98752e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=191262
2021-06-20 23:46:38 | INFO | train_inner | epoch 006: 1695 / 3002 loss=2.492, ppl=5.63, wps=5850.2, ups=0.09, wpb=64760, bsz=128, num_updates=16606, lr=9.98751e-05, gnorm=1.956, loss_scale=4, train_wall=11, gb_free=2.8, wall=191273
2021-06-20 23:46:50 | INFO | train_inner | epoch 006: 1696 / 3002 loss=2.597, ppl=6.05, wps=5824, ups=0.09, wpb=64831, bsz=128, num_updates=16607, lr=9.98751e-05, gnorm=1.943, loss_scale=4, train_wall=11, gb_free=2.8, wall=191284
2021-06-20 23:47:01 | INFO | train_inner | epoch 006: 1697 / 3002 loss=2.435, ppl=5.41, wps=5823.6, ups=0.09, wpb=64852, bsz=128, num_updates=16608, lr=9.98751e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=191295
2021-06-20 23:47:12 | INFO | train_inner | epoch 006: 1698 / 3002 loss=2.525, ppl=5.75, wps=5936.2, ups=0.09, wpb=64817, bsz=128, num_updates=16609, lr=9.98751e-05, gnorm=1.979, loss_scale=4, train_wall=10, gb_free=2.8, wall=191306
2021-06-20 23:47:23 | INFO | train_inner | epoch 006: 1699 / 3002 loss=2.502, ppl=5.66, wps=5870.7, ups=0.09, wpb=64821, bsz=128, num_updates=16610, lr=9.98751e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=191317
2021-06-20 23:47:34 | INFO | train_inner | epoch 006: 1700 / 3002 loss=2.593, ppl=6.04, wps=5823.8, ups=0.09, wpb=64809, bsz=128, num_updates=16611, lr=9.98751e-05, gnorm=5.179, loss_scale=4, train_wall=11, gb_free=2.8, wall=191328
2021-06-20 23:47:45 | INFO | train_inner | epoch 006: 1701 / 3002 loss=2.573, ppl=5.95, wps=5844.1, ups=0.09, wpb=64841, bsz=128, num_updates=16612, lr=9.98751e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=191339
2021-06-20 23:47:56 | INFO | train_inner | epoch 006: 1702 / 3002 loss=2.429, ppl=5.39, wps=5986.9, ups=0.09, wpb=64907, bsz=128, num_updates=16613, lr=9.98751e-05, gnorm=1.917, loss_scale=4, train_wall=10, gb_free=2.8, wall=191350
2021-06-20 23:48:07 | INFO | train_inner | epoch 006: 1703 / 3002 loss=2.567, ppl=5.93, wps=5799.2, ups=0.09, wpb=64803, bsz=128, num_updates=16614, lr=9.98751e-05, gnorm=2.041, loss_scale=4, train_wall=11, gb_free=2.8, wall=191361
2021-06-20 23:48:18 | INFO | train_inner | epoch 006: 1704 / 3002 loss=2.579, ppl=5.98, wps=5942.4, ups=0.09, wpb=64844, bsz=128, num_updates=16615, lr=9.98751e-05, gnorm=2.044, loss_scale=4, train_wall=10, gb_free=2.8, wall=191372
2021-06-20 23:48:29 | INFO | train_inner | epoch 006: 1705 / 3002 loss=2.471, ppl=5.55, wps=5816.4, ups=0.09, wpb=64771, bsz=128, num_updates=16616, lr=9.98751e-05, gnorm=1.842, loss_scale=4, train_wall=11, gb_free=2.8, wall=191383
2021-06-20 23:48:40 | INFO | train_inner | epoch 006: 1706 / 3002 loss=2.422, ppl=5.36, wps=5868.7, ups=0.09, wpb=64864, bsz=128, num_updates=16617, lr=9.98751e-05, gnorm=1.886, loss_scale=4, train_wall=11, gb_free=2.8, wall=191394
2021-06-20 23:48:51 | INFO | train_inner | epoch 006: 1707 / 3002 loss=2.484, ppl=5.59, wps=6008.6, ups=0.09, wpb=64895, bsz=128, num_updates=16618, lr=9.9875e-05, gnorm=2.017, loss_scale=4, train_wall=10, gb_free=2.8, wall=191405
2021-06-20 23:49:02 | INFO | train_inner | epoch 006: 1708 / 3002 loss=2.338, ppl=5.06, wps=5899.4, ups=0.09, wpb=64878, bsz=128, num_updates=16619, lr=9.9875e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=191416
2021-06-20 23:49:13 | INFO | train_inner | epoch 006: 1709 / 3002 loss=2.387, ppl=5.23, wps=5812, ups=0.09, wpb=64814, bsz=128, num_updates=16620, lr=9.9875e-05, gnorm=2.49, loss_scale=4, train_wall=11, gb_free=2.8, wall=191427
2021-06-20 23:49:24 | INFO | train_inner | epoch 006: 1710 / 3002 loss=2.21, ppl=4.63, wps=5898.8, ups=0.09, wpb=64838, bsz=128, num_updates=16621, lr=9.9875e-05, gnorm=1.904, loss_scale=4, train_wall=11, gb_free=2.8, wall=191438
2021-06-20 23:49:35 | INFO | train_inner | epoch 006: 1711 / 3002 loss=2.507, ppl=5.69, wps=5918.2, ups=0.09, wpb=64870, bsz=128, num_updates=16622, lr=9.9875e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=191449
2021-06-20 23:49:46 | INFO | train_inner | epoch 006: 1712 / 3002 loss=2.28, ppl=4.86, wps=5893.7, ups=0.09, wpb=64845, bsz=128, num_updates=16623, lr=9.9875e-05, gnorm=1.929, loss_scale=4, train_wall=11, gb_free=2.8, wall=191460
2021-06-20 23:49:57 | INFO | train_inner | epoch 006: 1713 / 3002 loss=2.456, ppl=5.49, wps=5721.8, ups=0.09, wpb=64870, bsz=128, num_updates=16624, lr=9.9875e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=191472
2021-06-20 23:50:08 | INFO | train_inner | epoch 006: 1714 / 3002 loss=2.496, ppl=5.64, wps=5836.3, ups=0.09, wpb=64807, bsz=128, num_updates=16625, lr=9.9875e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=191483
2021-06-20 23:50:20 | INFO | train_inner | epoch 006: 1715 / 3002 loss=2.423, ppl=5.36, wps=5791.6, ups=0.09, wpb=64811, bsz=128, num_updates=16626, lr=9.9875e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=191494
2021-06-20 23:50:31 | INFO | train_inner | epoch 006: 1716 / 3002 loss=2.463, ppl=5.52, wps=5815.4, ups=0.09, wpb=64829, bsz=128, num_updates=16627, lr=9.9875e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=191505
2021-06-20 23:50:42 | INFO | train_inner | epoch 006: 1717 / 3002 loss=2.48, ppl=5.58, wps=5916.7, ups=0.09, wpb=64878, bsz=128, num_updates=16628, lr=9.9875e-05, gnorm=1.918, loss_scale=4, train_wall=10, gb_free=2.8, wall=191516
2021-06-20 23:50:53 | INFO | train_inner | epoch 006: 1718 / 3002 loss=2.489, ppl=5.61, wps=5843.8, ups=0.09, wpb=64838, bsz=128, num_updates=16629, lr=9.9875e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=191527
2021-06-20 23:51:04 | INFO | train_inner | epoch 006: 1719 / 3002 loss=2.537, ppl=5.8, wps=5760.9, ups=0.09, wpb=64779, bsz=128, num_updates=16630, lr=9.98749e-05, gnorm=2.356, loss_scale=4, train_wall=11, gb_free=2.8, wall=191538
2021-06-20 23:51:15 | INFO | train_inner | epoch 006: 1720 / 3002 loss=2.407, ppl=5.3, wps=5908.9, ups=0.09, wpb=64911, bsz=128, num_updates=16631, lr=9.98749e-05, gnorm=1.891, loss_scale=4, train_wall=11, gb_free=2.8, wall=191549
2021-06-20 23:51:26 | INFO | train_inner | epoch 006: 1721 / 3002 loss=2.539, ppl=5.81, wps=5823.3, ups=0.09, wpb=64837, bsz=128, num_updates=16632, lr=9.98749e-05, gnorm=1.903, loss_scale=4, train_wall=11, gb_free=2.8, wall=191561
2021-06-20 23:51:37 | INFO | train_inner | epoch 006: 1722 / 3002 loss=2.356, ppl=5.12, wps=5923.9, ups=0.09, wpb=64875, bsz=128, num_updates=16633, lr=9.98749e-05, gnorm=1.891, loss_scale=4, train_wall=11, gb_free=2.8, wall=191571
2021-06-20 23:51:48 | INFO | train_inner | epoch 006: 1723 / 3002 loss=2.517, ppl=5.72, wps=5812.7, ups=0.09, wpb=64824, bsz=128, num_updates=16634, lr=9.98749e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=191583
2021-06-20 23:52:00 | INFO | train_inner | epoch 006: 1724 / 3002 loss=2.407, ppl=5.3, wps=5770.9, ups=0.09, wpb=64803, bsz=128, num_updates=16635, lr=9.98749e-05, gnorm=1.859, loss_scale=4, train_wall=11, gb_free=2.8, wall=191594
2021-06-20 23:52:11 | INFO | train_inner | epoch 006: 1725 / 3002 loss=2.44, ppl=5.43, wps=5800, ups=0.09, wpb=64809, bsz=128, num_updates=16636, lr=9.98749e-05, gnorm=1.898, loss_scale=4, train_wall=11, gb_free=2.8, wall=191605
2021-06-20 23:52:22 | INFO | train_inner | epoch 006: 1726 / 3002 loss=2.486, ppl=5.6, wps=5793, ups=0.09, wpb=64859, bsz=128, num_updates=16637, lr=9.98749e-05, gnorm=1.886, loss_scale=4, train_wall=11, gb_free=2.8, wall=191616
2021-06-20 23:52:33 | INFO | train_inner | epoch 006: 1727 / 3002 loss=2.427, ppl=5.38, wps=5800.7, ups=0.09, wpb=64912, bsz=128, num_updates=16638, lr=9.98749e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=191627
2021-06-20 23:52:44 | INFO | train_inner | epoch 006: 1728 / 3002 loss=2.497, ppl=5.64, wps=5854.1, ups=0.09, wpb=64815, bsz=128, num_updates=16639, lr=9.98749e-05, gnorm=1.907, loss_scale=4, train_wall=11, gb_free=2.8, wall=191638
2021-06-20 23:52:55 | INFO | train_inner | epoch 006: 1729 / 3002 loss=2.394, ppl=5.26, wps=5913.9, ups=0.09, wpb=64787, bsz=128, num_updates=16640, lr=9.98749e-05, gnorm=1.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=191649
2021-06-20 23:53:06 | INFO | train_inner | epoch 006: 1730 / 3002 loss=2.473, ppl=5.55, wps=5811.9, ups=0.09, wpb=64871, bsz=128, num_updates=16641, lr=9.98749e-05, gnorm=1.911, loss_scale=4, train_wall=11, gb_free=2.8, wall=191661
2021-06-20 23:53:17 | INFO | train_inner | epoch 006: 1731 / 3002 loss=2.451, ppl=5.47, wps=5836, ups=0.09, wpb=64884, bsz=128, num_updates=16642, lr=9.98749e-05, gnorm=1.882, loss_scale=4, train_wall=11, gb_free=2.8, wall=191672
2021-06-20 23:53:29 | INFO | train_inner | epoch 006: 1732 / 3002 loss=2.409, ppl=5.31, wps=5789.8, ups=0.09, wpb=64893, bsz=128, num_updates=16643, lr=9.98748e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=191683
2021-06-20 23:53:40 | INFO | train_inner | epoch 006: 1733 / 3002 loss=2.501, ppl=5.66, wps=5925.1, ups=0.09, wpb=64837, bsz=128, num_updates=16644, lr=9.98748e-05, gnorm=1.923, loss_scale=4, train_wall=10, gb_free=2.8, wall=191694
2021-06-20 23:53:51 | INFO | train_inner | epoch 006: 1734 / 3002 loss=2.324, ppl=5.01, wps=5879.1, ups=0.09, wpb=64904, bsz=128, num_updates=16645, lr=9.98748e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=191705
2021-06-20 23:54:02 | INFO | train_inner | epoch 006: 1735 / 3002 loss=2.361, ppl=5.14, wps=5871.6, ups=0.09, wpb=64810, bsz=128, num_updates=16646, lr=9.98748e-05, gnorm=1.869, loss_scale=4, train_wall=11, gb_free=2.8, wall=191716
2021-06-20 23:54:13 | INFO | train_inner | epoch 006: 1736 / 3002 loss=2.522, ppl=5.74, wps=5863.8, ups=0.09, wpb=64833, bsz=128, num_updates=16647, lr=9.98748e-05, gnorm=1.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=191727
2021-06-20 23:54:24 | INFO | train_inner | epoch 006: 1737 / 3002 loss=2.441, ppl=5.43, wps=5820.6, ups=0.09, wpb=64794, bsz=128, num_updates=16648, lr=9.98748e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=191738
2021-06-20 23:54:35 | INFO | train_inner | epoch 006: 1738 / 3002 loss=2.441, ppl=5.43, wps=5956.5, ups=0.09, wpb=64807, bsz=128, num_updates=16649, lr=9.98748e-05, gnorm=1.918, loss_scale=4, train_wall=10, gb_free=2.8, wall=191749
2021-06-20 23:54:46 | INFO | train_inner | epoch 006: 1739 / 3002 loss=2.404, ppl=5.29, wps=5787.9, ups=0.09, wpb=64806, bsz=128, num_updates=16650, lr=9.98748e-05, gnorm=1.832, loss_scale=4, train_wall=11, gb_free=2.8, wall=191760
2021-06-20 23:54:57 | INFO | train_inner | epoch 006: 1740 / 3002 loss=2.497, ppl=5.65, wps=5929.6, ups=0.09, wpb=64770, bsz=128, num_updates=16651, lr=9.98748e-05, gnorm=1.974, loss_scale=4, train_wall=10, gb_free=2.8, wall=191771
2021-06-20 23:55:08 | INFO | train_inner | epoch 006: 1741 / 3002 loss=2.342, ppl=5.07, wps=5899.5, ups=0.09, wpb=64890, bsz=128, num_updates=16652, lr=9.98748e-05, gnorm=1.923, loss_scale=4, train_wall=11, gb_free=2.8, wall=191782
2021-06-20 23:55:19 | INFO | train_inner | epoch 006: 1742 / 3002 loss=2.556, ppl=5.88, wps=5831.3, ups=0.09, wpb=64824, bsz=128, num_updates=16653, lr=9.98748e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=191793
2021-06-20 23:55:30 | INFO | train_inner | epoch 006: 1743 / 3002 loss=2.516, ppl=5.72, wps=6009.4, ups=0.09, wpb=64901, bsz=128, num_updates=16654, lr=9.98748e-05, gnorm=1.92, loss_scale=4, train_wall=10, gb_free=2.8, wall=191804
2021-06-20 23:55:41 | INFO | train_inner | epoch 006: 1744 / 3002 loss=2.507, ppl=5.69, wps=5857.6, ups=0.09, wpb=64840, bsz=128, num_updates=16655, lr=9.98747e-05, gnorm=2.028, loss_scale=4, train_wall=11, gb_free=2.8, wall=191815
2021-06-20 23:55:52 | INFO | train_inner | epoch 006: 1745 / 3002 loss=2.513, ppl=5.71, wps=5854.9, ups=0.09, wpb=64828, bsz=128, num_updates=16656, lr=9.98747e-05, gnorm=1.86, loss_scale=4, train_wall=11, gb_free=2.8, wall=191826
2021-06-20 23:56:03 | INFO | train_inner | epoch 006: 1746 / 3002 loss=2.535, ppl=5.79, wps=5760, ups=0.09, wpb=64895, bsz=128, num_updates=16657, lr=9.98747e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=191837
2021-06-20 23:56:14 | INFO | train_inner | epoch 006: 1747 / 3002 loss=2.519, ppl=5.73, wps=5867.1, ups=0.09, wpb=64816, bsz=128, num_updates=16658, lr=9.98747e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=191849
2021-06-20 23:56:25 | INFO | train_inner | epoch 006: 1748 / 3002 loss=2.51, ppl=5.69, wps=5973.7, ups=0.09, wpb=64861, bsz=128, num_updates=16659, lr=9.98747e-05, gnorm=1.875, loss_scale=4, train_wall=10, gb_free=2.8, wall=191859
2021-06-20 23:56:36 | INFO | train_inner | epoch 006: 1749 / 3002 loss=2.445, ppl=5.44, wps=5943.7, ups=0.09, wpb=64832, bsz=128, num_updates=16660, lr=9.98747e-05, gnorm=1.837, loss_scale=4, train_wall=10, gb_free=2.8, wall=191870
2021-06-20 23:56:47 | INFO | train_inner | epoch 006: 1750 / 3002 loss=2.372, ppl=5.18, wps=5857.8, ups=0.09, wpb=64788, bsz=128, num_updates=16661, lr=9.98747e-05, gnorm=1.857, loss_scale=4, train_wall=11, gb_free=2.8, wall=191881
2021-06-20 23:56:58 | INFO | train_inner | epoch 006: 1751 / 3002 loss=2.451, ppl=5.47, wps=5825.6, ups=0.09, wpb=64855, bsz=128, num_updates=16662, lr=9.98747e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=191892
2021-06-20 23:57:09 | INFO | train_inner | epoch 006: 1752 / 3002 loss=2.629, ppl=6.18, wps=5811.9, ups=0.09, wpb=64931, bsz=128, num_updates=16663, lr=9.98747e-05, gnorm=1.891, loss_scale=4, train_wall=11, gb_free=2.8, wall=191904
2021-06-20 23:57:20 | INFO | train_inner | epoch 006: 1753 / 3002 loss=2.398, ppl=5.27, wps=5899.8, ups=0.09, wpb=64801, bsz=128, num_updates=16664, lr=9.98747e-05, gnorm=1.885, loss_scale=4, train_wall=11, gb_free=2.8, wall=191915
2021-06-20 23:57:31 | INFO | train_inner | epoch 006: 1754 / 3002 loss=2.545, ppl=5.84, wps=5891.1, ups=0.09, wpb=64870, bsz=128, num_updates=16665, lr=9.98747e-05, gnorm=1.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=191926
2021-06-20 23:57:42 | INFO | train_inner | epoch 006: 1755 / 3002 loss=2.494, ppl=5.63, wps=5879.3, ups=0.09, wpb=64756, bsz=128, num_updates=16666, lr=9.98747e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=191937
2021-06-20 23:57:54 | INFO | train_inner | epoch 006: 1756 / 3002 loss=2.486, ppl=5.6, wps=5747.9, ups=0.09, wpb=64806, bsz=128, num_updates=16667, lr=9.98747e-05, gnorm=1.877, loss_scale=4, train_wall=11, gb_free=2.8, wall=191948
2021-06-20 23:58:05 | INFO | train_inner | epoch 006: 1757 / 3002 loss=2.468, ppl=5.53, wps=5895.3, ups=0.09, wpb=64847, bsz=128, num_updates=16668, lr=9.98746e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=191959
2021-06-20 23:58:16 | INFO | train_inner | epoch 006: 1758 / 3002 loss=2.63, ppl=6.19, wps=5923.8, ups=0.09, wpb=64791, bsz=128, num_updates=16669, lr=9.98746e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=191970
2021-06-20 23:58:27 | INFO | train_inner | epoch 006: 1759 / 3002 loss=2.402, ppl=5.29, wps=5780.7, ups=0.09, wpb=64771, bsz=128, num_updates=16670, lr=9.98746e-05, gnorm=1.897, loss_scale=4, train_wall=11, gb_free=2.8, wall=191981
2021-06-20 23:58:38 | INFO | train_inner | epoch 006: 1760 / 3002 loss=2.284, ppl=4.87, wps=5815.7, ups=0.09, wpb=64750, bsz=128, num_updates=16671, lr=9.98746e-05, gnorm=1.836, loss_scale=4, train_wall=11, gb_free=2.8, wall=191992
2021-06-20 23:58:49 | INFO | train_inner | epoch 006: 1761 / 3002 loss=2.536, ppl=5.8, wps=5792.7, ups=0.09, wpb=64839, bsz=128, num_updates=16672, lr=9.98746e-05, gnorm=1.861, loss_scale=4, train_wall=11, gb_free=2.8, wall=192003
2021-06-20 23:59:00 | INFO | train_inner | epoch 006: 1762 / 3002 loss=2.501, ppl=5.66, wps=5889, ups=0.09, wpb=64822, bsz=128, num_updates=16673, lr=9.98746e-05, gnorm=2.006, loss_scale=4, train_wall=11, gb_free=2.8, wall=192014
2021-06-20 23:59:11 | INFO | train_inner | epoch 006: 1763 / 3002 loss=2.482, ppl=5.59, wps=5789.2, ups=0.09, wpb=64840, bsz=128, num_updates=16674, lr=9.98746e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=192026
2021-06-20 23:59:22 | INFO | train_inner | epoch 006: 1764 / 3002 loss=2.502, ppl=5.67, wps=5884.6, ups=0.09, wpb=64782, bsz=128, num_updates=16675, lr=9.98746e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=192037
2021-06-20 23:59:33 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-20 23:59:44 | INFO | train_inner | epoch 006: 1766 / 3002 loss=2.506, ppl=5.68, wps=2937.9, ups=0.05, wpb=64833, bsz=128, num_updates=16676, lr=9.98746e-05, gnorm=2.041, loss_scale=2, train_wall=21, gb_free=2.8, wall=192059
2021-06-20 23:59:55 | INFO | train_inner | epoch 006: 1767 / 3002 loss=2.366, ppl=5.16, wps=5826.7, ups=0.09, wpb=64893, bsz=128, num_updates=16677, lr=9.98746e-05, gnorm=1.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=192070
2021-06-21 00:00:07 | INFO | train_inner | epoch 006: 1768 / 3002 loss=2.505, ppl=5.68, wps=5788, ups=0.09, wpb=64733, bsz=128, num_updates=16678, lr=9.98746e-05, gnorm=1.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=192081
2021-06-21 00:00:18 | INFO | train_inner | epoch 006: 1769 / 3002 loss=2.443, ppl=5.44, wps=5835.3, ups=0.09, wpb=64864, bsz=128, num_updates=16679, lr=9.98746e-05, gnorm=1.833, loss_scale=2, train_wall=11, gb_free=2.8, wall=192092
2021-06-21 00:00:29 | INFO | train_inner | epoch 006: 1770 / 3002 loss=2.381, ppl=5.21, wps=5843.9, ups=0.09, wpb=64831, bsz=128, num_updates=16680, lr=9.98745e-05, gnorm=1.864, loss_scale=2, train_wall=11, gb_free=2.8, wall=192103
2021-06-21 00:00:40 | INFO | train_inner | epoch 006: 1771 / 3002 loss=2.516, ppl=5.72, wps=6028.2, ups=0.09, wpb=64815, bsz=128, num_updates=16681, lr=9.98745e-05, gnorm=1.968, loss_scale=2, train_wall=10, gb_free=2.8, wall=192114
2021-06-21 00:00:51 | INFO | train_inner | epoch 006: 1772 / 3002 loss=2.374, ppl=5.18, wps=5790.1, ups=0.09, wpb=64776, bsz=128, num_updates=16682, lr=9.98745e-05, gnorm=1.856, loss_scale=2, train_wall=11, gb_free=2.8, wall=192125
2021-06-21 00:01:02 | INFO | train_inner | epoch 006: 1773 / 3002 loss=2.504, ppl=5.67, wps=5796.8, ups=0.09, wpb=64815, bsz=128, num_updates=16683, lr=9.98745e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=192136
2021-06-21 00:01:13 | INFO | train_inner | epoch 006: 1774 / 3002 loss=2.465, ppl=5.52, wps=5911.5, ups=0.09, wpb=64860, bsz=128, num_updates=16684, lr=9.98745e-05, gnorm=1.923, loss_scale=2, train_wall=10, gb_free=2.8, wall=192147
2021-06-21 00:01:24 | INFO | train_inner | epoch 006: 1775 / 3002 loss=2.45, ppl=5.47, wps=5818.8, ups=0.09, wpb=64850, bsz=128, num_updates=16685, lr=9.98745e-05, gnorm=1.917, loss_scale=2, train_wall=11, gb_free=2.8, wall=192158
2021-06-21 00:01:35 | INFO | train_inner | epoch 006: 1776 / 3002 loss=2.437, ppl=5.41, wps=5944.6, ups=0.09, wpb=64765, bsz=128, num_updates=16686, lr=9.98745e-05, gnorm=2.002, loss_scale=2, train_wall=10, gb_free=2.8, wall=192169
2021-06-21 00:01:46 | INFO | train_inner | epoch 006: 1777 / 3002 loss=2.5, ppl=5.66, wps=5872, ups=0.09, wpb=64768, bsz=128, num_updates=16687, lr=9.98745e-05, gnorm=1.919, loss_scale=2, train_wall=11, gb_free=2.8, wall=192180
2021-06-21 00:01:57 | INFO | train_inner | epoch 006: 1778 / 3002 loss=2.443, ppl=5.44, wps=5903, ups=0.09, wpb=64856, bsz=128, num_updates=16688, lr=9.98745e-05, gnorm=1.985, loss_scale=2, train_wall=11, gb_free=2.8, wall=192191
2021-06-21 00:02:08 | INFO | train_inner | epoch 006: 1779 / 3002 loss=2.449, ppl=5.46, wps=5887.3, ups=0.09, wpb=64854, bsz=128, num_updates=16689, lr=9.98745e-05, gnorm=1.998, loss_scale=2, train_wall=11, gb_free=2.8, wall=192202
2021-06-21 00:02:19 | INFO | train_inner | epoch 006: 1780 / 3002 loss=2.413, ppl=5.33, wps=5822.8, ups=0.09, wpb=64888, bsz=128, num_updates=16690, lr=9.98745e-05, gnorm=1.954, loss_scale=2, train_wall=11, gb_free=2.8, wall=192214
2021-06-21 00:02:30 | INFO | train_inner | epoch 006: 1781 / 3002 loss=2.314, ppl=4.97, wps=5943, ups=0.09, wpb=64843, bsz=128, num_updates=16691, lr=9.98745e-05, gnorm=1.912, loss_scale=2, train_wall=10, gb_free=2.8, wall=192224
2021-06-21 00:02:41 | INFO | train_inner | epoch 006: 1782 / 3002 loss=2.528, ppl=5.77, wps=5873.9, ups=0.09, wpb=64838, bsz=128, num_updates=16692, lr=9.98745e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=192235
2021-06-21 00:02:52 | INFO | train_inner | epoch 006: 1783 / 3002 loss=2.453, ppl=5.48, wps=5936.9, ups=0.09, wpb=64796, bsz=128, num_updates=16693, lr=9.98744e-05, gnorm=1.908, loss_scale=2, train_wall=10, gb_free=2.8, wall=192246
2021-06-21 00:03:03 | INFO | train_inner | epoch 006: 1784 / 3002 loss=2.281, ppl=4.86, wps=5844.7, ups=0.09, wpb=64858, bsz=128, num_updates=16694, lr=9.98744e-05, gnorm=1.97, loss_scale=2, train_wall=11, gb_free=2.8, wall=192257
2021-06-21 00:03:14 | INFO | train_inner | epoch 006: 1785 / 3002 loss=2.398, ppl=5.27, wps=5796.9, ups=0.09, wpb=64784, bsz=128, num_updates=16695, lr=9.98744e-05, gnorm=1.898, loss_scale=2, train_wall=11, gb_free=2.8, wall=192269
2021-06-21 00:03:25 | INFO | train_inner | epoch 006: 1786 / 3002 loss=2.419, ppl=5.35, wps=5935.7, ups=0.09, wpb=64871, bsz=128, num_updates=16696, lr=9.98744e-05, gnorm=1.909, loss_scale=2, train_wall=10, gb_free=2.8, wall=192280
2021-06-21 00:03:37 | INFO | train_inner | epoch 006: 1787 / 3002 loss=2.429, ppl=5.38, wps=5733.9, ups=0.09, wpb=64826, bsz=128, num_updates=16697, lr=9.98744e-05, gnorm=1.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=192291
2021-06-21 00:03:48 | INFO | train_inner | epoch 006: 1788 / 3002 loss=2.559, ppl=5.89, wps=5889.1, ups=0.09, wpb=64823, bsz=128, num_updates=16698, lr=9.98744e-05, gnorm=2.132, loss_scale=2, train_wall=11, gb_free=2.8, wall=192302
2021-06-21 00:03:58 | INFO | train_inner | epoch 006: 1789 / 3002 loss=2.436, ppl=5.41, wps=6025.1, ups=0.09, wpb=64822, bsz=128, num_updates=16699, lr=9.98744e-05, gnorm=1.834, loss_scale=2, train_wall=10, gb_free=2.8, wall=192313
2021-06-21 00:04:09 | INFO | train_inner | epoch 006: 1790 / 3002 loss=2.334, ppl=5.04, wps=5839.4, ups=0.09, wpb=64811, bsz=128, num_updates=16700, lr=9.98744e-05, gnorm=1.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=192324
2021-06-21 00:04:20 | INFO | train_inner | epoch 006: 1791 / 3002 loss=2.305, ppl=4.94, wps=5905.6, ups=0.09, wpb=64831, bsz=128, num_updates=16701, lr=9.98744e-05, gnorm=1.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=192335
2021-06-21 00:04:32 | INFO | train_inner | epoch 006: 1792 / 3002 loss=2.561, ppl=5.9, wps=5762.7, ups=0.09, wpb=64851, bsz=128, num_updates=16702, lr=9.98744e-05, gnorm=1.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=192346
2021-06-21 00:04:43 | INFO | train_inner | epoch 006: 1793 / 3002 loss=2.47, ppl=5.54, wps=5784.1, ups=0.09, wpb=64833, bsz=128, num_updates=16703, lr=9.98744e-05, gnorm=2.048, loss_scale=2, train_wall=11, gb_free=2.8, wall=192357
2021-06-21 00:04:54 | INFO | train_inner | epoch 006: 1794 / 3002 loss=2.395, ppl=5.26, wps=5836.2, ups=0.09, wpb=64840, bsz=128, num_updates=16704, lr=9.98744e-05, gnorm=1.88, loss_scale=2, train_wall=11, gb_free=2.8, wall=192368
2021-06-21 00:05:05 | INFO | train_inner | epoch 006: 1795 / 3002 loss=2.345, ppl=5.08, wps=5952.7, ups=0.09, wpb=64839, bsz=128, num_updates=16705, lr=9.98743e-05, gnorm=1.889, loss_scale=2, train_wall=10, gb_free=2.8, wall=192379
2021-06-21 00:05:16 | INFO | train_inner | epoch 006: 1796 / 3002 loss=2.37, ppl=5.17, wps=5734.7, ups=0.09, wpb=64894, bsz=128, num_updates=16706, lr=9.98743e-05, gnorm=1.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=192391
2021-06-21 00:05:27 | INFO | train_inner | epoch 006: 1797 / 3002 loss=2.469, ppl=5.54, wps=5765.8, ups=0.09, wpb=64901, bsz=128, num_updates=16707, lr=9.98743e-05, gnorm=1.907, loss_scale=2, train_wall=11, gb_free=2.8, wall=192402
2021-06-21 00:05:39 | INFO | train_inner | epoch 006: 1798 / 3002 loss=2.402, ppl=5.29, wps=5802.4, ups=0.09, wpb=64835, bsz=128, num_updates=16708, lr=9.98743e-05, gnorm=1.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=192413
2021-06-21 00:05:50 | INFO | train_inner | epoch 006: 1799 / 3002 loss=2.464, ppl=5.52, wps=5887.5, ups=0.09, wpb=64793, bsz=128, num_updates=16709, lr=9.98743e-05, gnorm=3.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=192424
2021-06-21 00:06:01 | INFO | train_inner | epoch 006: 1800 / 3002 loss=2.369, ppl=5.17, wps=5800, ups=0.09, wpb=64813, bsz=128, num_updates=16710, lr=9.98743e-05, gnorm=1.877, loss_scale=2, train_wall=11, gb_free=2.8, wall=192435
2021-06-21 00:06:12 | INFO | train_inner | epoch 006: 1801 / 3002 loss=2.536, ppl=5.8, wps=5813.3, ups=0.09, wpb=64783, bsz=128, num_updates=16711, lr=9.98743e-05, gnorm=1.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=192446
2021-06-21 00:06:23 | INFO | train_inner | epoch 006: 1802 / 3002 loss=2.509, ppl=5.69, wps=5818.2, ups=0.09, wpb=64811, bsz=128, num_updates=16712, lr=9.98743e-05, gnorm=1.892, loss_scale=2, train_wall=11, gb_free=2.8, wall=192457
2021-06-21 00:06:34 | INFO | train_inner | epoch 006: 1803 / 3002 loss=2.474, ppl=5.55, wps=5849.6, ups=0.09, wpb=64794, bsz=128, num_updates=16713, lr=9.98743e-05, gnorm=1.92, loss_scale=2, train_wall=11, gb_free=2.8, wall=192468
2021-06-21 00:06:45 | INFO | train_inner | epoch 006: 1804 / 3002 loss=2.284, ppl=4.87, wps=5941.9, ups=0.09, wpb=64865, bsz=128, num_updates=16714, lr=9.98743e-05, gnorm=1.944, loss_scale=2, train_wall=10, gb_free=2.8, wall=192479
2021-06-21 00:06:56 | INFO | train_inner | epoch 006: 1805 / 3002 loss=2.494, ppl=5.63, wps=5885.1, ups=0.09, wpb=64821, bsz=128, num_updates=16715, lr=9.98743e-05, gnorm=4.336, loss_scale=2, train_wall=11, gb_free=2.8, wall=192490
2021-06-21 00:07:07 | INFO | train_inner | epoch 006: 1806 / 3002 loss=2.441, ppl=5.43, wps=5834.5, ups=0.09, wpb=64814, bsz=128, num_updates=16716, lr=9.98743e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=192502
2021-06-21 00:07:18 | INFO | train_inner | epoch 006: 1807 / 3002 loss=2.493, ppl=5.63, wps=5852.4, ups=0.09, wpb=64819, bsz=128, num_updates=16717, lr=9.98743e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=192513
2021-06-21 00:07:29 | INFO | train_inner | epoch 006: 1808 / 3002 loss=2.472, ppl=5.55, wps=5917.8, ups=0.09, wpb=64778, bsz=128, num_updates=16718, lr=9.98742e-05, gnorm=1.966, loss_scale=2, train_wall=10, gb_free=2.8, wall=192524
2021-06-21 00:07:40 | INFO | train_inner | epoch 006: 1809 / 3002 loss=2.385, ppl=5.22, wps=5801.7, ups=0.09, wpb=64834, bsz=128, num_updates=16719, lr=9.98742e-05, gnorm=1.851, loss_scale=2, train_wall=11, gb_free=2.8, wall=192535
2021-06-21 00:07:52 | INFO | train_inner | epoch 006: 1810 / 3002 loss=2.379, ppl=5.2, wps=5828.8, ups=0.09, wpb=64862, bsz=128, num_updates=16720, lr=9.98742e-05, gnorm=1.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=192546
2021-06-21 00:08:03 | INFO | train_inner | epoch 006: 1811 / 3002 loss=2.522, ppl=5.75, wps=5791.5, ups=0.09, wpb=64844, bsz=128, num_updates=16721, lr=9.98742e-05, gnorm=1.932, loss_scale=2, train_wall=11, gb_free=2.8, wall=192557
2021-06-21 00:08:14 | INFO | train_inner | epoch 006: 1812 / 3002 loss=2.51, ppl=5.7, wps=5821, ups=0.09, wpb=64830, bsz=128, num_updates=16722, lr=9.98742e-05, gnorm=1.926, loss_scale=2, train_wall=11, gb_free=2.8, wall=192568
2021-06-21 00:08:25 | INFO | train_inner | epoch 006: 1813 / 3002 loss=2.573, ppl=5.95, wps=5813.7, ups=0.09, wpb=64811, bsz=128, num_updates=16723, lr=9.98742e-05, gnorm=1.911, loss_scale=2, train_wall=11, gb_free=2.8, wall=192579
2021-06-21 00:08:36 | INFO | train_inner | epoch 006: 1814 / 3002 loss=2.367, ppl=5.16, wps=6078.3, ups=0.09, wpb=64799, bsz=128, num_updates=16724, lr=9.98742e-05, gnorm=1.832, loss_scale=2, train_wall=10, gb_free=2.8, wall=192590
2021-06-21 00:08:47 | INFO | train_inner | epoch 006: 1815 / 3002 loss=2.566, ppl=5.92, wps=5874.2, ups=0.09, wpb=64852, bsz=128, num_updates=16725, lr=9.98742e-05, gnorm=1.926, loss_scale=2, train_wall=11, gb_free=2.8, wall=192601
2021-06-21 00:08:58 | INFO | train_inner | epoch 006: 1816 / 3002 loss=2.698, ppl=6.49, wps=5876.5, ups=0.09, wpb=64791, bsz=128, num_updates=16726, lr=9.98742e-05, gnorm=1.916, loss_scale=2, train_wall=11, gb_free=2.8, wall=192612
2021-06-21 00:09:09 | INFO | train_inner | epoch 006: 1817 / 3002 loss=2.495, ppl=5.64, wps=5751.7, ups=0.09, wpb=64914, bsz=128, num_updates=16727, lr=9.98742e-05, gnorm=1.891, loss_scale=2, train_wall=11, gb_free=2.8, wall=192623
2021-06-21 00:09:20 | INFO | train_inner | epoch 006: 1818 / 3002 loss=2.427, ppl=5.38, wps=5799, ups=0.09, wpb=64805, bsz=128, num_updates=16728, lr=9.98742e-05, gnorm=1.903, loss_scale=2, train_wall=11, gb_free=2.8, wall=192635
2021-06-21 00:09:31 | INFO | train_inner | epoch 006: 1819 / 3002 loss=2.637, ppl=6.22, wps=5894, ups=0.09, wpb=64897, bsz=128, num_updates=16729, lr=9.98742e-05, gnorm=1.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=192646
2021-06-21 00:09:42 | INFO | train_inner | epoch 006: 1820 / 3002 loss=2.366, ppl=5.15, wps=5783.7, ups=0.09, wpb=64891, bsz=128, num_updates=16730, lr=9.98741e-05, gnorm=1.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=192657
2021-06-21 00:09:54 | INFO | train_inner | epoch 006: 1821 / 3002 loss=2.581, ppl=5.98, wps=5832.6, ups=0.09, wpb=64757, bsz=128, num_updates=16731, lr=9.98741e-05, gnorm=2.492, loss_scale=2, train_wall=11, gb_free=2.8, wall=192668
2021-06-21 00:10:04 | INFO | train_inner | epoch 006: 1822 / 3002 loss=2.435, ppl=5.41, wps=5915.8, ups=0.09, wpb=64805, bsz=128, num_updates=16732, lr=9.98741e-05, gnorm=1.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=192679
2021-06-21 00:10:16 | INFO | train_inner | epoch 006: 1823 / 3002 loss=2.223, ppl=4.67, wps=5796.3, ups=0.09, wpb=64831, bsz=128, num_updates=16733, lr=9.98741e-05, gnorm=1.903, loss_scale=2, train_wall=11, gb_free=2.8, wall=192690
2021-06-21 00:10:27 | INFO | train_inner | epoch 006: 1824 / 3002 loss=2.421, ppl=5.36, wps=5963.9, ups=0.09, wpb=64893, bsz=128, num_updates=16734, lr=9.98741e-05, gnorm=2.145, loss_scale=2, train_wall=10, gb_free=2.8, wall=192701
2021-06-21 00:10:38 | INFO | train_inner | epoch 006: 1825 / 3002 loss=2.45, ppl=5.46, wps=5892.5, ups=0.09, wpb=64865, bsz=128, num_updates=16735, lr=9.98741e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=192712
2021-06-21 00:10:49 | INFO | train_inner | epoch 006: 1826 / 3002 loss=2.489, ppl=5.61, wps=5781.3, ups=0.09, wpb=64875, bsz=128, num_updates=16736, lr=9.98741e-05, gnorm=2.016, loss_scale=2, train_wall=11, gb_free=2.8, wall=192723
2021-06-21 00:11:00 | INFO | train_inner | epoch 006: 1827 / 3002 loss=2.539, ppl=5.81, wps=5947.4, ups=0.09, wpb=64840, bsz=128, num_updates=16737, lr=9.98741e-05, gnorm=2.02, loss_scale=2, train_wall=10, gb_free=2.8, wall=192734
2021-06-21 00:11:11 | INFO | train_inner | epoch 006: 1828 / 3002 loss=2.375, ppl=5.19, wps=5713.2, ups=0.09, wpb=64816, bsz=128, num_updates=16738, lr=9.98741e-05, gnorm=1.843, loss_scale=2, train_wall=11, gb_free=2.8, wall=192745
2021-06-21 00:11:22 | INFO | train_inner | epoch 006: 1829 / 3002 loss=2.469, ppl=5.54, wps=5950.8, ups=0.09, wpb=64758, bsz=128, num_updates=16739, lr=9.98741e-05, gnorm=2.035, loss_scale=2, train_wall=10, gb_free=2.8, wall=192756
2021-06-21 00:11:33 | INFO | train_inner | epoch 006: 1830 / 3002 loss=2.425, ppl=5.37, wps=5962, ups=0.09, wpb=64810, bsz=128, num_updates=16740, lr=9.98741e-05, gnorm=1.894, loss_scale=2, train_wall=10, gb_free=2.8, wall=192767
2021-06-21 00:11:44 | INFO | train_inner | epoch 006: 1831 / 3002 loss=2.528, ppl=5.77, wps=5874.5, ups=0.09, wpb=64827, bsz=128, num_updates=16741, lr=9.98741e-05, gnorm=1.863, loss_scale=2, train_wall=11, gb_free=2.8, wall=192778
2021-06-21 00:11:55 | INFO | train_inner | epoch 006: 1832 / 3002 loss=2.546, ppl=5.84, wps=5850, ups=0.09, wpb=64743, bsz=128, num_updates=16742, lr=9.98741e-05, gnorm=2.61, loss_scale=2, train_wall=11, gb_free=2.8, wall=192789
2021-06-21 00:12:06 | INFO | train_inner | epoch 006: 1833 / 3002 loss=2.497, ppl=5.65, wps=5841, ups=0.09, wpb=64815, bsz=128, num_updates=16743, lr=9.9874e-05, gnorm=1.91, loss_scale=2, train_wall=11, gb_free=2.8, wall=192800
2021-06-21 00:12:17 | INFO | train_inner | epoch 006: 1834 / 3002 loss=2.547, ppl=5.84, wps=5807.9, ups=0.09, wpb=64821, bsz=128, num_updates=16744, lr=9.9874e-05, gnorm=1.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=192811
2021-06-21 00:12:28 | INFO | train_inner | epoch 006: 1835 / 3002 loss=2.471, ppl=5.54, wps=5843.6, ups=0.09, wpb=64779, bsz=128, num_updates=16745, lr=9.9874e-05, gnorm=1.827, loss_scale=2, train_wall=11, gb_free=2.8, wall=192823
2021-06-21 00:12:39 | INFO | train_inner | epoch 006: 1836 / 3002 loss=2.362, ppl=5.14, wps=5821.6, ups=0.09, wpb=64831, bsz=128, num_updates=16746, lr=9.9874e-05, gnorm=1.904, loss_scale=2, train_wall=11, gb_free=2.8, wall=192834
2021-06-21 00:12:50 | INFO | train_inner | epoch 006: 1837 / 3002 loss=2.512, ppl=5.7, wps=5899.2, ups=0.09, wpb=64838, bsz=128, num_updates=16747, lr=9.9874e-05, gnorm=2.095, loss_scale=2, train_wall=11, gb_free=2.8, wall=192845
2021-06-21 00:13:01 | INFO | train_inner | epoch 006: 1838 / 3002 loss=2.563, ppl=5.91, wps=5886.8, ups=0.09, wpb=64774, bsz=128, num_updates=16748, lr=9.9874e-05, gnorm=1.98, loss_scale=2, train_wall=11, gb_free=2.8, wall=192856
2021-06-21 00:13:13 | INFO | train_inner | epoch 006: 1839 / 3002 loss=2.348, ppl=5.09, wps=5773.6, ups=0.09, wpb=64867, bsz=128, num_updates=16749, lr=9.9874e-05, gnorm=1.967, loss_scale=2, train_wall=11, gb_free=2.8, wall=192867
2021-06-21 00:13:24 | INFO | train_inner | epoch 006: 1840 / 3002 loss=2.448, ppl=5.46, wps=5880.1, ups=0.09, wpb=64830, bsz=128, num_updates=16750, lr=9.9874e-05, gnorm=7.481, loss_scale=2, train_wall=11, gb_free=2.8, wall=192878
2021-06-21 00:13:34 | INFO | train_inner | epoch 006: 1841 / 3002 loss=2.437, ppl=5.42, wps=5958.2, ups=0.09, wpb=64790, bsz=128, num_updates=16751, lr=9.9874e-05, gnorm=1.942, loss_scale=2, train_wall=10, gb_free=2.8, wall=192889
2021-06-21 00:13:45 | INFO | train_inner | epoch 006: 1842 / 3002 loss=2.508, ppl=5.69, wps=5927.2, ups=0.09, wpb=64818, bsz=128, num_updates=16752, lr=9.9874e-05, gnorm=2.358, loss_scale=2, train_wall=10, gb_free=2.8, wall=192900
2021-06-21 00:13:57 | INFO | train_inner | epoch 006: 1843 / 3002 loss=2.421, ppl=5.35, wps=5797.5, ups=0.09, wpb=64904, bsz=128, num_updates=16753, lr=9.9874e-05, gnorm=1.88, loss_scale=2, train_wall=11, gb_free=2.8, wall=192911
2021-06-21 00:14:08 | INFO | train_inner | epoch 006: 1844 / 3002 loss=2.497, ppl=5.64, wps=5842.5, ups=0.09, wpb=64760, bsz=128, num_updates=16754, lr=9.9874e-05, gnorm=3.611, loss_scale=2, train_wall=11, gb_free=2.8, wall=192922
2021-06-21 00:14:19 | INFO | train_inner | epoch 006: 1845 / 3002 loss=2.361, ppl=5.14, wps=5775.4, ups=0.09, wpb=64826, bsz=128, num_updates=16755, lr=9.98739e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=192933
2021-06-21 00:14:30 | INFO | train_inner | epoch 006: 1846 / 3002 loss=2.535, ppl=5.8, wps=5796.1, ups=0.09, wpb=64714, bsz=128, num_updates=16756, lr=9.98739e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=192944
2021-06-21 00:14:41 | INFO | train_inner | epoch 006: 1847 / 3002 loss=2.461, ppl=5.5, wps=5759.3, ups=0.09, wpb=64761, bsz=128, num_updates=16757, lr=9.98739e-05, gnorm=2.305, loss_scale=2, train_wall=11, gb_free=2.8, wall=192956
2021-06-21 00:14:53 | INFO | train_inner | epoch 006: 1848 / 3002 loss=2.493, ppl=5.63, wps=5791.3, ups=0.09, wpb=64857, bsz=128, num_updates=16758, lr=9.98739e-05, gnorm=1.963, loss_scale=2, train_wall=11, gb_free=2.8, wall=192967
2021-06-21 00:15:04 | INFO | train_inner | epoch 006: 1849 / 3002 loss=2.454, ppl=5.48, wps=5866.9, ups=0.09, wpb=64832, bsz=128, num_updates=16759, lr=9.98739e-05, gnorm=1.889, loss_scale=2, train_wall=11, gb_free=2.8, wall=192978
2021-06-21 00:15:15 | INFO | train_inner | epoch 006: 1850 / 3002 loss=2.459, ppl=5.5, wps=5891, ups=0.09, wpb=64805, bsz=128, num_updates=16760, lr=9.98739e-05, gnorm=2.241, loss_scale=2, train_wall=11, gb_free=2.8, wall=192989
2021-06-21 00:15:26 | INFO | train_inner | epoch 006: 1851 / 3002 loss=2.428, ppl=5.38, wps=5885.6, ups=0.09, wpb=64861, bsz=128, num_updates=16761, lr=9.98739e-05, gnorm=1.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=193000
2021-06-21 00:15:37 | INFO | train_inner | epoch 006: 1852 / 3002 loss=2.425, ppl=5.37, wps=5902.7, ups=0.09, wpb=64761, bsz=128, num_updates=16762, lr=9.98739e-05, gnorm=1.902, loss_scale=2, train_wall=11, gb_free=2.8, wall=193011
2021-06-21 00:15:48 | INFO | train_inner | epoch 006: 1853 / 3002 loss=2.388, ppl=5.23, wps=5883.8, ups=0.09, wpb=64857, bsz=128, num_updates=16763, lr=9.98739e-05, gnorm=1.869, loss_scale=2, train_wall=11, gb_free=2.8, wall=193022
2021-06-21 00:15:59 | INFO | train_inner | epoch 006: 1854 / 3002 loss=2.549, ppl=5.85, wps=5790.6, ups=0.09, wpb=64802, bsz=128, num_updates=16764, lr=9.98739e-05, gnorm=1.911, loss_scale=2, train_wall=11, gb_free=2.8, wall=193033
2021-06-21 00:16:10 | INFO | train_inner | epoch 006: 1855 / 3002 loss=2.495, ppl=5.64, wps=5783.6, ups=0.09, wpb=64857, bsz=128, num_updates=16765, lr=9.98739e-05, gnorm=1.962, loss_scale=2, train_wall=11, gb_free=2.8, wall=193044
2021-06-21 00:16:21 | INFO | train_inner | epoch 006: 1856 / 3002 loss=2.403, ppl=5.29, wps=5880.9, ups=0.09, wpb=64921, bsz=128, num_updates=16766, lr=9.98739e-05, gnorm=1.99, loss_scale=2, train_wall=11, gb_free=2.8, wall=193055
2021-06-21 00:16:32 | INFO | train_inner | epoch 006: 1857 / 3002 loss=2.49, ppl=5.62, wps=5862.2, ups=0.09, wpb=64916, bsz=128, num_updates=16767, lr=9.98739e-05, gnorm=2.027, loss_scale=2, train_wall=11, gb_free=2.8, wall=193066
2021-06-21 00:16:43 | INFO | train_inner | epoch 006: 1858 / 3002 loss=2.412, ppl=5.32, wps=5905.6, ups=0.09, wpb=64894, bsz=128, num_updates=16768, lr=9.98738e-05, gnorm=2.419, loss_scale=2, train_wall=11, gb_free=2.8, wall=193077
2021-06-21 00:16:54 | INFO | train_inner | epoch 006: 1859 / 3002 loss=2.478, ppl=5.57, wps=5814.8, ups=0.09, wpb=64837, bsz=128, num_updates=16769, lr=9.98738e-05, gnorm=1.88, loss_scale=2, train_wall=11, gb_free=2.8, wall=193089
2021-06-21 00:17:06 | INFO | train_inner | epoch 006: 1860 / 3002 loss=2.437, ppl=5.41, wps=5762.8, ups=0.09, wpb=64826, bsz=128, num_updates=16770, lr=9.98738e-05, gnorm=1.857, loss_scale=2, train_wall=11, gb_free=2.8, wall=193100
2021-06-21 00:17:17 | INFO | train_inner | epoch 006: 1861 / 3002 loss=2.476, ppl=5.56, wps=5795.5, ups=0.09, wpb=64772, bsz=128, num_updates=16771, lr=9.98738e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=193111
2021-06-21 00:17:28 | INFO | train_inner | epoch 006: 1862 / 3002 loss=2.235, ppl=4.71, wps=5912.3, ups=0.09, wpb=64844, bsz=128, num_updates=16772, lr=9.98738e-05, gnorm=1.866, loss_scale=2, train_wall=11, gb_free=2.8, wall=193122
2021-06-21 00:17:39 | INFO | train_inner | epoch 006: 1863 / 3002 loss=2.495, ppl=5.64, wps=5792.2, ups=0.09, wpb=64699, bsz=128, num_updates=16773, lr=9.98738e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=193133
2021-06-21 00:17:50 | INFO | train_inner | epoch 006: 1864 / 3002 loss=2.541, ppl=5.82, wps=5855.4, ups=0.09, wpb=64826, bsz=128, num_updates=16774, lr=9.98738e-05, gnorm=1.906, loss_scale=2, train_wall=11, gb_free=2.8, wall=193144
2021-06-21 00:18:01 | INFO | train_inner | epoch 006: 1865 / 3002 loss=2.541, ppl=5.82, wps=5774.5, ups=0.09, wpb=64863, bsz=128, num_updates=16775, lr=9.98738e-05, gnorm=1.982, loss_scale=2, train_wall=11, gb_free=2.8, wall=193155
2021-06-21 00:18:12 | INFO | train_inner | epoch 006: 1866 / 3002 loss=2.421, ppl=5.35, wps=5828.7, ups=0.09, wpb=64877, bsz=128, num_updates=16776, lr=9.98738e-05, gnorm=2.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=193167
2021-06-21 00:18:23 | INFO | train_inner | epoch 006: 1867 / 3002 loss=2.467, ppl=5.53, wps=5818.8, ups=0.09, wpb=64813, bsz=128, num_updates=16777, lr=9.98738e-05, gnorm=3.704, loss_scale=2, train_wall=11, gb_free=2.8, wall=193178
2021-06-21 00:18:34 | INFO | train_inner | epoch 006: 1868 / 3002 loss=2.388, ppl=5.23, wps=5892.2, ups=0.09, wpb=64903, bsz=128, num_updates=16778, lr=9.98738e-05, gnorm=1.92, loss_scale=2, train_wall=11, gb_free=2.8, wall=193189
2021-06-21 00:18:45 | INFO | train_inner | epoch 006: 1869 / 3002 loss=2.404, ppl=5.29, wps=5900.1, ups=0.09, wpb=64855, bsz=128, num_updates=16779, lr=9.98738e-05, gnorm=2.006, loss_scale=2, train_wall=11, gb_free=2.8, wall=193200
2021-06-21 00:18:57 | INFO | train_inner | epoch 006: 1870 / 3002 loss=2.469, ppl=5.54, wps=5793, ups=0.09, wpb=64808, bsz=128, num_updates=16780, lr=9.98737e-05, gnorm=1.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=193211
2021-06-21 00:19:08 | INFO | train_inner | epoch 006: 1871 / 3002 loss=2.515, ppl=5.72, wps=5826.9, ups=0.09, wpb=64801, bsz=128, num_updates=16781, lr=9.98737e-05, gnorm=4.936, loss_scale=2, train_wall=11, gb_free=2.8, wall=193222
2021-06-21 00:19:19 | INFO | train_inner | epoch 006: 1872 / 3002 loss=2.481, ppl=5.58, wps=5874.2, ups=0.09, wpb=64848, bsz=128, num_updates=16782, lr=9.98737e-05, gnorm=1.922, loss_scale=2, train_wall=11, gb_free=2.8, wall=193233
2021-06-21 00:19:30 | INFO | train_inner | epoch 006: 1873 / 3002 loss=2.506, ppl=5.68, wps=5852.6, ups=0.09, wpb=64832, bsz=128, num_updates=16783, lr=9.98737e-05, gnorm=2.613, loss_scale=2, train_wall=11, gb_free=2.8, wall=193244
2021-06-21 00:19:41 | INFO | train_inner | epoch 006: 1874 / 3002 loss=2.379, ppl=5.2, wps=5960.1, ups=0.09, wpb=64871, bsz=128, num_updates=16784, lr=9.98737e-05, gnorm=2.122, loss_scale=2, train_wall=10, gb_free=2.8, wall=193255
2021-06-21 00:19:52 | INFO | train_inner | epoch 006: 1875 / 3002 loss=2.483, ppl=5.59, wps=6010.8, ups=0.09, wpb=64789, bsz=128, num_updates=16785, lr=9.98737e-05, gnorm=1.904, loss_scale=2, train_wall=10, gb_free=2.8, wall=193266
2021-06-21 00:20:02 | INFO | train_inner | epoch 006: 1876 / 3002 loss=2.538, ppl=5.81, wps=5953.8, ups=0.09, wpb=64766, bsz=128, num_updates=16786, lr=9.98737e-05, gnorm=3.252, loss_scale=2, train_wall=10, gb_free=2.8, wall=193277
2021-06-21 00:20:13 | INFO | train_inner | epoch 006: 1877 / 3002 loss=2.446, ppl=5.45, wps=5875.8, ups=0.09, wpb=64832, bsz=128, num_updates=16787, lr=9.98737e-05, gnorm=2.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=193288
2021-06-21 00:20:25 | INFO | train_inner | epoch 006: 1878 / 3002 loss=2.284, ppl=4.87, wps=5810.8, ups=0.09, wpb=64796, bsz=128, num_updates=16788, lr=9.98737e-05, gnorm=1.872, loss_scale=2, train_wall=11, gb_free=2.8, wall=193299
2021-06-21 00:20:36 | INFO | train_inner | epoch 006: 1879 / 3002 loss=2.55, ppl=5.86, wps=5846.1, ups=0.09, wpb=64803, bsz=128, num_updates=16789, lr=9.98737e-05, gnorm=2.161, loss_scale=2, train_wall=11, gb_free=2.8, wall=193310
2021-06-21 00:20:47 | INFO | train_inner | epoch 006: 1880 / 3002 loss=2.451, ppl=5.47, wps=5832.2, ups=0.09, wpb=64782, bsz=128, num_updates=16790, lr=9.98737e-05, gnorm=1.918, loss_scale=2, train_wall=11, gb_free=2.8, wall=193321
2021-06-21 00:20:58 | INFO | train_inner | epoch 006: 1881 / 3002 loss=2.415, ppl=5.33, wps=5861.5, ups=0.09, wpb=64826, bsz=128, num_updates=16791, lr=9.98737e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=193332
2021-06-21 00:21:09 | INFO | train_inner | epoch 006: 1882 / 3002 loss=2.552, ppl=5.87, wps=5799.9, ups=0.09, wpb=64729, bsz=128, num_updates=16792, lr=9.98737e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=193343
2021-06-21 00:21:20 | INFO | train_inner | epoch 006: 1883 / 3002 loss=2.417, ppl=5.34, wps=5852.4, ups=0.09, wpb=64887, bsz=128, num_updates=16793, lr=9.98736e-05, gnorm=1.882, loss_scale=2, train_wall=11, gb_free=2.8, wall=193354
2021-06-21 00:21:31 | INFO | train_inner | epoch 006: 1884 / 3002 loss=2.431, ppl=5.39, wps=5984.1, ups=0.09, wpb=64861, bsz=128, num_updates=16794, lr=9.98736e-05, gnorm=1.944, loss_scale=2, train_wall=10, gb_free=2.8, wall=193365
2021-06-21 00:21:42 | INFO | train_inner | epoch 006: 1885 / 3002 loss=2.506, ppl=5.68, wps=5875.8, ups=0.09, wpb=64940, bsz=128, num_updates=16795, lr=9.98736e-05, gnorm=1.904, loss_scale=2, train_wall=11, gb_free=2.8, wall=193376
2021-06-21 00:21:53 | INFO | train_inner | epoch 006: 1886 / 3002 loss=2.595, ppl=6.04, wps=5818.3, ups=0.09, wpb=64844, bsz=128, num_updates=16796, lr=9.98736e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=193387
2021-06-21 00:22:04 | INFO | train_inner | epoch 006: 1887 / 3002 loss=2.435, ppl=5.41, wps=5841.1, ups=0.09, wpb=64786, bsz=128, num_updates=16797, lr=9.98736e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=193399
2021-06-21 00:22:15 | INFO | train_inner | epoch 006: 1888 / 3002 loss=2.525, ppl=5.75, wps=5787.3, ups=0.09, wpb=64827, bsz=128, num_updates=16798, lr=9.98736e-05, gnorm=1.891, loss_scale=2, train_wall=11, gb_free=2.8, wall=193410
2021-06-21 00:22:26 | INFO | train_inner | epoch 006: 1889 / 3002 loss=2.64, ppl=6.23, wps=5885.9, ups=0.09, wpb=64832, bsz=128, num_updates=16799, lr=9.98736e-05, gnorm=1.95, loss_scale=2, train_wall=11, gb_free=2.8, wall=193421
2021-06-21 00:22:37 | INFO | train_inner | epoch 006: 1890 / 3002 loss=2.434, ppl=5.4, wps=5909.8, ups=0.09, wpb=64867, bsz=128, num_updates=16800, lr=9.98736e-05, gnorm=1.894, loss_scale=2, train_wall=11, gb_free=2.8, wall=193432
2021-06-21 00:22:49 | INFO | train_inner | epoch 006: 1891 / 3002 loss=2.43, ppl=5.39, wps=5818.3, ups=0.09, wpb=64911, bsz=128, num_updates=16801, lr=9.98736e-05, gnorm=1.914, loss_scale=2, train_wall=11, gb_free=2.8, wall=193443
2021-06-21 00:22:59 | INFO | train_inner | epoch 006: 1892 / 3002 loss=2.339, ppl=5.06, wps=5935.8, ups=0.09, wpb=64834, bsz=128, num_updates=16802, lr=9.98736e-05, gnorm=1.857, loss_scale=2, train_wall=10, gb_free=2.8, wall=193454
2021-06-21 00:23:11 | INFO | train_inner | epoch 006: 1893 / 3002 loss=2.518, ppl=5.73, wps=5855.3, ups=0.09, wpb=64863, bsz=128, num_updates=16803, lr=9.98736e-05, gnorm=2.043, loss_scale=4, train_wall=11, gb_free=2.8, wall=193465
2021-06-21 00:23:22 | INFO | train_inner | epoch 006: 1894 / 3002 loss=2.478, ppl=5.57, wps=5884.1, ups=0.09, wpb=64863, bsz=128, num_updates=16804, lr=9.98736e-05, gnorm=3.896, loss_scale=4, train_wall=11, gb_free=2.8, wall=193476
2021-06-21 00:23:33 | INFO | train_inner | epoch 006: 1895 / 3002 loss=2.46, ppl=5.5, wps=5880.2, ups=0.09, wpb=64882, bsz=128, num_updates=16805, lr=9.98735e-05, gnorm=1.884, loss_scale=4, train_wall=11, gb_free=2.8, wall=193487
2021-06-21 00:23:43 | INFO | train_inner | epoch 006: 1896 / 3002 loss=2.296, ppl=4.91, wps=5959.9, ups=0.09, wpb=64779, bsz=128, num_updates=16806, lr=9.98735e-05, gnorm=1.888, loss_scale=4, train_wall=10, gb_free=2.8, wall=193498
2021-06-21 00:23:55 | INFO | train_inner | epoch 006: 1897 / 3002 loss=2.441, ppl=5.43, wps=5869.1, ups=0.09, wpb=64828, bsz=128, num_updates=16807, lr=9.98735e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=193509
2021-06-21 00:24:05 | INFO | train_inner | epoch 006: 1898 / 3002 loss=2.536, ppl=5.8, wps=5937.2, ups=0.09, wpb=64828, bsz=128, num_updates=16808, lr=9.98735e-05, gnorm=2.003, loss_scale=4, train_wall=10, gb_free=2.8, wall=193520
2021-06-21 00:24:16 | INFO | train_inner | epoch 006: 1899 / 3002 loss=2.464, ppl=5.52, wps=5964.3, ups=0.09, wpb=64916, bsz=128, num_updates=16809, lr=9.98735e-05, gnorm=1.932, loss_scale=4, train_wall=10, gb_free=2.8, wall=193531
2021-06-21 00:24:27 | INFO | train_inner | epoch 006: 1900 / 3002 loss=2.53, ppl=5.78, wps=5895.1, ups=0.09, wpb=64855, bsz=128, num_updates=16810, lr=9.98735e-05, gnorm=1.879, loss_scale=4, train_wall=11, gb_free=2.8, wall=193542
2021-06-21 00:24:38 | INFO | train_inner | epoch 006: 1901 / 3002 loss=2.427, ppl=5.38, wps=5880.3, ups=0.09, wpb=64870, bsz=128, num_updates=16811, lr=9.98735e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=193553
2021-06-21 00:24:50 | INFO | train_inner | epoch 006: 1902 / 3002 loss=2.403, ppl=5.29, wps=5733.9, ups=0.09, wpb=64794, bsz=128, num_updates=16812, lr=9.98735e-05, gnorm=2.035, loss_scale=4, train_wall=11, gb_free=2.8, wall=193564
2021-06-21 00:25:01 | INFO | train_inner | epoch 006: 1903 / 3002 loss=2.588, ppl=6.01, wps=5840.5, ups=0.09, wpb=64763, bsz=128, num_updates=16813, lr=9.98735e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=193575
2021-06-21 00:25:12 | INFO | train_inner | epoch 006: 1904 / 3002 loss=2.337, ppl=5.05, wps=5904.5, ups=0.09, wpb=64837, bsz=128, num_updates=16814, lr=9.98735e-05, gnorm=1.94, loss_scale=4, train_wall=11, gb_free=2.8, wall=193586
2021-06-21 00:25:23 | INFO | train_inner | epoch 006: 1905 / 3002 loss=2.556, ppl=5.88, wps=5869.8, ups=0.09, wpb=64788, bsz=128, num_updates=16815, lr=9.98735e-05, gnorm=1.995, loss_scale=4, train_wall=11, gb_free=2.8, wall=193597
2021-06-21 00:25:34 | INFO | train_inner | epoch 006: 1906 / 3002 loss=2.426, ppl=5.37, wps=5825, ups=0.09, wpb=64850, bsz=128, num_updates=16816, lr=9.98735e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=193608
2021-06-21 00:25:45 | INFO | train_inner | epoch 006: 1907 / 3002 loss=2.5, ppl=5.66, wps=5792.2, ups=0.09, wpb=64744, bsz=128, num_updates=16817, lr=9.98735e-05, gnorm=1.907, loss_scale=4, train_wall=11, gb_free=2.8, wall=193619
2021-06-21 00:25:56 | INFO | train_inner | epoch 006: 1908 / 3002 loss=2.485, ppl=5.6, wps=5911.1, ups=0.09, wpb=64837, bsz=128, num_updates=16818, lr=9.98734e-05, gnorm=1.94, loss_scale=4, train_wall=10, gb_free=2.8, wall=193630
2021-06-21 00:26:07 | INFO | train_inner | epoch 006: 1909 / 3002 loss=2.482, ppl=5.59, wps=5931.9, ups=0.09, wpb=64894, bsz=128, num_updates=16819, lr=9.98734e-05, gnorm=1.942, loss_scale=4, train_wall=10, gb_free=2.8, wall=193641
2021-06-21 00:26:18 | INFO | train_inner | epoch 006: 1910 / 3002 loss=2.414, ppl=5.33, wps=6039.3, ups=0.09, wpb=64814, bsz=128, num_updates=16820, lr=9.98734e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=193652
2021-06-21 00:26:29 | INFO | train_inner | epoch 006: 1911 / 3002 loss=2.45, ppl=5.46, wps=5939.1, ups=0.09, wpb=64808, bsz=128, num_updates=16821, lr=9.98734e-05, gnorm=2.103, loss_scale=4, train_wall=10, gb_free=2.8, wall=193663
2021-06-21 00:26:40 | INFO | train_inner | epoch 006: 1912 / 3002 loss=2.38, ppl=5.2, wps=5816.3, ups=0.09, wpb=64806, bsz=128, num_updates=16822, lr=9.98734e-05, gnorm=2.572, loss_scale=4, train_wall=11, gb_free=2.8, wall=193674
2021-06-21 00:26:51 | INFO | train_inner | epoch 006: 1913 / 3002 loss=2.438, ppl=5.42, wps=5914.9, ups=0.09, wpb=64809, bsz=128, num_updates=16823, lr=9.98734e-05, gnorm=1.86, loss_scale=4, train_wall=10, gb_free=2.8, wall=193685
2021-06-21 00:27:02 | INFO | train_inner | epoch 006: 1914 / 3002 loss=2.525, ppl=5.76, wps=5839.1, ups=0.09, wpb=64858, bsz=128, num_updates=16824, lr=9.98734e-05, gnorm=2.132, loss_scale=4, train_wall=11, gb_free=2.8, wall=193696
2021-06-21 00:27:13 | INFO | train_inner | epoch 006: 1915 / 3002 loss=2.39, ppl=5.24, wps=5970.5, ups=0.09, wpb=64883, bsz=128, num_updates=16825, lr=9.98734e-05, gnorm=1.972, loss_scale=4, train_wall=10, gb_free=2.8, wall=193707
2021-06-21 00:27:24 | INFO | train_inner | epoch 006: 1916 / 3002 loss=2.49, ppl=5.62, wps=5944.8, ups=0.09, wpb=64877, bsz=128, num_updates=16826, lr=9.98734e-05, gnorm=2.074, loss_scale=4, train_wall=10, gb_free=2.8, wall=193718
2021-06-21 00:27:35 | INFO | train_inner | epoch 006: 1917 / 3002 loss=2.406, ppl=5.3, wps=5894.2, ups=0.09, wpb=64769, bsz=128, num_updates=16827, lr=9.98734e-05, gnorm=1.95, loss_scale=4, train_wall=11, gb_free=2.8, wall=193729
2021-06-21 00:27:46 | INFO | train_inner | epoch 006: 1918 / 3002 loss=2.561, ppl=5.9, wps=5918.1, ups=0.09, wpb=64819, bsz=128, num_updates=16828, lr=9.98734e-05, gnorm=1.908, loss_scale=4, train_wall=10, gb_free=2.8, wall=193740
2021-06-21 00:27:57 | INFO | train_inner | epoch 006: 1919 / 3002 loss=2.415, ppl=5.33, wps=5822.3, ups=0.09, wpb=64815, bsz=128, num_updates=16829, lr=9.98734e-05, gnorm=1.959, loss_scale=4, train_wall=11, gb_free=2.8, wall=193751
2021-06-21 00:28:08 | INFO | train_inner | epoch 006: 1920 / 3002 loss=2.422, ppl=5.36, wps=5808.1, ups=0.09, wpb=64832, bsz=128, num_updates=16830, lr=9.98733e-05, gnorm=1.95, loss_scale=4, train_wall=11, gb_free=2.8, wall=193762
2021-06-21 00:28:19 | INFO | train_inner | epoch 006: 1921 / 3002 loss=2.416, ppl=5.34, wps=5857.5, ups=0.09, wpb=64895, bsz=128, num_updates=16831, lr=9.98733e-05, gnorm=3.794, loss_scale=4, train_wall=11, gb_free=2.8, wall=193773
2021-06-21 00:28:30 | INFO | train_inner | epoch 006: 1922 / 3002 loss=2.448, ppl=5.46, wps=5845.5, ups=0.09, wpb=64829, bsz=128, num_updates=16832, lr=9.98733e-05, gnorm=1.893, loss_scale=4, train_wall=11, gb_free=2.8, wall=193784
2021-06-21 00:28:41 | INFO | train_inner | epoch 006: 1923 / 3002 loss=2.435, ppl=5.41, wps=5967.4, ups=0.09, wpb=64861, bsz=128, num_updates=16833, lr=9.98733e-05, gnorm=1.997, loss_scale=4, train_wall=10, gb_free=2.8, wall=193795
2021-06-21 00:28:52 | INFO | train_inner | epoch 006: 1924 / 3002 loss=2.473, ppl=5.55, wps=5820.7, ups=0.09, wpb=64843, bsz=128, num_updates=16834, lr=9.98733e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=193806
2021-06-21 00:29:03 | INFO | train_inner | epoch 006: 1925 / 3002 loss=2.508, ppl=5.69, wps=5885, ups=0.09, wpb=64771, bsz=128, num_updates=16835, lr=9.98733e-05, gnorm=1.99, loss_scale=4, train_wall=11, gb_free=2.8, wall=193817
2021-06-21 00:29:14 | INFO | train_inner | epoch 006: 1926 / 3002 loss=2.471, ppl=5.55, wps=5884.6, ups=0.09, wpb=64835, bsz=128, num_updates=16836, lr=9.98733e-05, gnorm=1.837, loss_scale=4, train_wall=11, gb_free=2.8, wall=193828
2021-06-21 00:29:25 | INFO | train_inner | epoch 006: 1927 / 3002 loss=2.614, ppl=6.12, wps=5803.7, ups=0.09, wpb=64804, bsz=128, num_updates=16837, lr=9.98733e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=193840
2021-06-21 00:29:36 | INFO | train_inner | epoch 006: 1928 / 3002 loss=2.477, ppl=5.57, wps=5908.4, ups=0.09, wpb=64927, bsz=128, num_updates=16838, lr=9.98733e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=193851
2021-06-21 00:29:47 | INFO | train_inner | epoch 006: 1929 / 3002 loss=2.529, ppl=5.77, wps=5780.8, ups=0.09, wpb=64848, bsz=128, num_updates=16839, lr=9.98733e-05, gnorm=2.053, loss_scale=4, train_wall=11, gb_free=2.8, wall=193862
2021-06-21 00:29:58 | INFO | train_inner | epoch 006: 1930 / 3002 loss=2.392, ppl=5.25, wps=5905.5, ups=0.09, wpb=64729, bsz=128, num_updates=16840, lr=9.98733e-05, gnorm=1.89, loss_scale=4, train_wall=10, gb_free=2.8, wall=193873
2021-06-21 00:30:10 | INFO | train_inner | epoch 006: 1931 / 3002 loss=2.348, ppl=5.09, wps=5786.1, ups=0.09, wpb=64848, bsz=128, num_updates=16841, lr=9.98733e-05, gnorm=2.747, loss_scale=4, train_wall=11, gb_free=2.8, wall=193884
2021-06-21 00:30:21 | INFO | train_inner | epoch 006: 1932 / 3002 loss=2.351, ppl=5.1, wps=5718.4, ups=0.09, wpb=64813, bsz=128, num_updates=16842, lr=9.98733e-05, gnorm=1.948, loss_scale=4, train_wall=11, gb_free=2.8, wall=193895
2021-06-21 00:30:32 | INFO | train_inner | epoch 006: 1933 / 3002 loss=2.564, ppl=5.91, wps=5836, ups=0.09, wpb=64899, bsz=128, num_updates=16843, lr=9.98732e-05, gnorm=1.906, loss_scale=4, train_wall=11, gb_free=2.8, wall=193906
2021-06-21 00:30:43 | INFO | train_inner | epoch 006: 1934 / 3002 loss=2.424, ppl=5.37, wps=5903.5, ups=0.09, wpb=64861, bsz=128, num_updates=16844, lr=9.98732e-05, gnorm=5.873, loss_scale=4, train_wall=11, gb_free=2.8, wall=193917
2021-06-21 00:30:54 | INFO | train_inner | epoch 006: 1935 / 3002 loss=2.35, ppl=5.1, wps=5871.1, ups=0.09, wpb=64867, bsz=128, num_updates=16845, lr=9.98732e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=193928
2021-06-21 00:31:05 | INFO | train_inner | epoch 006: 1936 / 3002 loss=2.527, ppl=5.76, wps=5860.1, ups=0.09, wpb=64875, bsz=128, num_updates=16846, lr=9.98732e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=193940
2021-06-21 00:31:16 | INFO | train_inner | epoch 006: 1937 / 3002 loss=2.344, ppl=5.08, wps=5881, ups=0.09, wpb=64891, bsz=128, num_updates=16847, lr=9.98732e-05, gnorm=1.876, loss_scale=4, train_wall=11, gb_free=2.8, wall=193951
2021-06-21 00:31:27 | INFO | train_inner | epoch 006: 1938 / 3002 loss=2.412, ppl=5.32, wps=5857.3, ups=0.09, wpb=64790, bsz=128, num_updates=16848, lr=9.98732e-05, gnorm=1.902, loss_scale=4, train_wall=11, gb_free=2.8, wall=193962
2021-06-21 00:31:38 | INFO | train_inner | epoch 006: 1939 / 3002 loss=2.445, ppl=5.45, wps=5883.2, ups=0.09, wpb=64790, bsz=128, num_updates=16849, lr=9.98732e-05, gnorm=1.888, loss_scale=4, train_wall=11, gb_free=2.8, wall=193973
2021-06-21 00:31:49 | INFO | train_inner | epoch 006: 1940 / 3002 loss=2.4, ppl=5.28, wps=5972.5, ups=0.09, wpb=64859, bsz=128, num_updates=16850, lr=9.98732e-05, gnorm=1.896, loss_scale=4, train_wall=10, gb_free=2.8, wall=193983
2021-06-21 00:32:00 | INFO | train_inner | epoch 006: 1941 / 3002 loss=2.403, ppl=5.29, wps=5948.8, ups=0.09, wpb=64797, bsz=128, num_updates=16851, lr=9.98732e-05, gnorm=1.98, loss_scale=4, train_wall=10, gb_free=2.8, wall=193994
2021-06-21 00:32:11 | INFO | train_inner | epoch 006: 1942 / 3002 loss=2.509, ppl=5.69, wps=5832.1, ups=0.09, wpb=64857, bsz=128, num_updates=16852, lr=9.98732e-05, gnorm=2.582, loss_scale=4, train_wall=11, gb_free=2.8, wall=194005
2021-06-21 00:32:22 | INFO | train_inner | epoch 006: 1943 / 3002 loss=2.42, ppl=5.35, wps=5811, ups=0.09, wpb=64810, bsz=128, num_updates=16853, lr=9.98732e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=194017
2021-06-21 00:32:33 | INFO | train_inner | epoch 006: 1944 / 3002 loss=2.54, ppl=5.82, wps=5845.8, ups=0.09, wpb=64894, bsz=128, num_updates=16854, lr=9.98732e-05, gnorm=1.939, loss_scale=4, train_wall=11, gb_free=2.8, wall=194028
2021-06-21 00:32:44 | INFO | train_inner | epoch 006: 1945 / 3002 loss=2.397, ppl=5.27, wps=5891.7, ups=0.09, wpb=64838, bsz=128, num_updates=16855, lr=9.98731e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=194039
2021-06-21 00:32:55 | INFO | train_inner | epoch 006: 1946 / 3002 loss=2.466, ppl=5.52, wps=5881.8, ups=0.09, wpb=64845, bsz=128, num_updates=16856, lr=9.98731e-05, gnorm=2.33, loss_scale=4, train_wall=11, gb_free=2.8, wall=194050
2021-06-21 00:33:07 | INFO | train_inner | epoch 006: 1947 / 3002 loss=2.366, ppl=5.15, wps=5864.7, ups=0.09, wpb=64898, bsz=128, num_updates=16857, lr=9.98731e-05, gnorm=1.905, loss_scale=4, train_wall=11, gb_free=2.8, wall=194061
2021-06-21 00:33:18 | INFO | train_inner | epoch 006: 1948 / 3002 loss=2.517, ppl=5.72, wps=5828.8, ups=0.09, wpb=64817, bsz=128, num_updates=16858, lr=9.98731e-05, gnorm=2.111, loss_scale=4, train_wall=11, gb_free=2.8, wall=194072
2021-06-21 00:33:29 | INFO | train_inner | epoch 006: 1949 / 3002 loss=2.418, ppl=5.35, wps=5821.5, ups=0.09, wpb=64863, bsz=128, num_updates=16859, lr=9.98731e-05, gnorm=4.359, loss_scale=4, train_wall=11, gb_free=2.8, wall=194083
2021-06-21 00:33:40 | INFO | train_inner | epoch 006: 1950 / 3002 loss=2.319, ppl=4.99, wps=5805.9, ups=0.09, wpb=64847, bsz=128, num_updates=16860, lr=9.98731e-05, gnorm=1.899, loss_scale=4, train_wall=11, gb_free=2.8, wall=194094
2021-06-21 00:33:51 | INFO | train_inner | epoch 006: 1951 / 3002 loss=2.504, ppl=5.67, wps=5730.7, ups=0.09, wpb=64825, bsz=128, num_updates=16861, lr=9.98731e-05, gnorm=1.864, loss_scale=4, train_wall=11, gb_free=2.8, wall=194106
2021-06-21 00:34:02 | INFO | train_inner | epoch 006: 1952 / 3002 loss=2.619, ppl=6.14, wps=5768.7, ups=0.09, wpb=64808, bsz=128, num_updates=16862, lr=9.98731e-05, gnorm=2.409, loss_scale=4, train_wall=11, gb_free=2.8, wall=194117
2021-06-21 00:34:14 | INFO | train_inner | epoch 006: 1953 / 3002 loss=2.455, ppl=5.48, wps=5773.7, ups=0.09, wpb=64758, bsz=128, num_updates=16863, lr=9.98731e-05, gnorm=2.154, loss_scale=4, train_wall=11, gb_free=2.8, wall=194128
2021-06-21 00:34:25 | INFO | train_inner | epoch 006: 1954 / 3002 loss=2.413, ppl=5.33, wps=5849.7, ups=0.09, wpb=64841, bsz=128, num_updates=16864, lr=9.98731e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=194139
2021-06-21 00:34:36 | INFO | train_inner | epoch 006: 1955 / 3002 loss=2.365, ppl=5.15, wps=5827.2, ups=0.09, wpb=64729, bsz=128, num_updates=16865, lr=9.98731e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=194150
2021-06-21 00:34:47 | INFO | train_inner | epoch 006: 1956 / 3002 loss=2.474, ppl=5.56, wps=5862, ups=0.09, wpb=64764, bsz=128, num_updates=16866, lr=9.98731e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=194161
2021-06-21 00:34:58 | INFO | train_inner | epoch 006: 1957 / 3002 loss=2.539, ppl=5.81, wps=5869, ups=0.09, wpb=64767, bsz=128, num_updates=16867, lr=9.98731e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=194172
2021-06-21 00:35:09 | INFO | train_inner | epoch 006: 1958 / 3002 loss=2.457, ppl=5.49, wps=5891.2, ups=0.09, wpb=64901, bsz=128, num_updates=16868, lr=9.9873e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=194183
2021-06-21 00:35:20 | INFO | train_inner | epoch 006: 1959 / 3002 loss=2.47, ppl=5.54, wps=5867.4, ups=0.09, wpb=64836, bsz=128, num_updates=16869, lr=9.9873e-05, gnorm=2.047, loss_scale=4, train_wall=11, gb_free=2.8, wall=194194
2021-06-21 00:35:31 | INFO | train_inner | epoch 006: 1960 / 3002 loss=2.401, ppl=5.28, wps=5914.7, ups=0.09, wpb=64867, bsz=128, num_updates=16870, lr=9.9873e-05, gnorm=2.069, loss_scale=4, train_wall=11, gb_free=2.8, wall=194205
2021-06-21 00:35:42 | INFO | train_inner | epoch 006: 1961 / 3002 loss=2.271, ppl=4.83, wps=5807.8, ups=0.09, wpb=64905, bsz=128, num_updates=16871, lr=9.9873e-05, gnorm=2.501, loss_scale=4, train_wall=11, gb_free=2.8, wall=194217
2021-06-21 00:35:53 | INFO | train_inner | epoch 006: 1962 / 3002 loss=2.457, ppl=5.49, wps=5844.4, ups=0.09, wpb=64833, bsz=128, num_updates=16872, lr=9.9873e-05, gnorm=3.688, loss_scale=4, train_wall=11, gb_free=2.8, wall=194228
2021-06-21 00:36:05 | INFO | train_inner | epoch 006: 1963 / 3002 loss=2.584, ppl=6, wps=5750.6, ups=0.09, wpb=64817, bsz=128, num_updates=16873, lr=9.9873e-05, gnorm=2.145, loss_scale=4, train_wall=11, gb_free=2.8, wall=194239
2021-06-21 00:36:16 | INFO | train_inner | epoch 006: 1964 / 3002 loss=2.464, ppl=5.52, wps=5827.8, ups=0.09, wpb=64777, bsz=128, num_updates=16874, lr=9.9873e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=194250
2021-06-21 00:36:27 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-21 00:36:38 | INFO | train_inner | epoch 006: 1966 / 3002 loss=2.455, ppl=5.48, wps=2928.9, ups=0.05, wpb=64861, bsz=128, num_updates=16875, lr=9.9873e-05, gnorm=3.118, loss_scale=2, train_wall=21, gb_free=2.8, wall=194272
2021-06-21 00:36:49 | INFO | train_inner | epoch 006: 1967 / 3002 loss=2.497, ppl=5.64, wps=5752.6, ups=0.09, wpb=64824, bsz=128, num_updates=16876, lr=9.9873e-05, gnorm=2.66, loss_scale=2, train_wall=11, gb_free=2.8, wall=194283
2021-06-21 00:37:00 | INFO | train_inner | epoch 006: 1968 / 3002 loss=2.583, ppl=5.99, wps=5906.8, ups=0.09, wpb=64729, bsz=128, num_updates=16877, lr=9.9873e-05, gnorm=2.32, loss_scale=2, train_wall=10, gb_free=2.8, wall=194294
2021-06-21 00:37:11 | INFO | train_inner | epoch 006: 1969 / 3002 loss=2.511, ppl=5.7, wps=5735.9, ups=0.09, wpb=64811, bsz=128, num_updates=16878, lr=9.9873e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=194306
2021-06-21 00:37:22 | INFO | train_inner | epoch 006: 1970 / 3002 loss=2.38, ppl=5.21, wps=5983.1, ups=0.09, wpb=64911, bsz=128, num_updates=16879, lr=9.9873e-05, gnorm=4.079, loss_scale=2, train_wall=10, gb_free=2.8, wall=194317
2021-06-21 00:37:33 | INFO | train_inner | epoch 006: 1971 / 3002 loss=2.53, ppl=5.78, wps=5805.1, ups=0.09, wpb=64796, bsz=128, num_updates=16880, lr=9.98729e-05, gnorm=2.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=194328
2021-06-21 00:37:44 | INFO | train_inner | epoch 006: 1972 / 3002 loss=2.318, ppl=4.98, wps=5856.9, ups=0.09, wpb=64949, bsz=128, num_updates=16881, lr=9.98729e-05, gnorm=2.404, loss_scale=2, train_wall=11, gb_free=2.8, wall=194339
2021-06-21 00:37:55 | INFO | train_inner | epoch 006: 1973 / 3002 loss=2.393, ppl=5.25, wps=5910.7, ups=0.09, wpb=64782, bsz=128, num_updates=16882, lr=9.98729e-05, gnorm=2.113, loss_scale=2, train_wall=11, gb_free=2.8, wall=194350
2021-06-21 00:38:06 | INFO | train_inner | epoch 006: 1974 / 3002 loss=2.432, ppl=5.4, wps=5844.4, ups=0.09, wpb=64839, bsz=128, num_updates=16883, lr=9.98729e-05, gnorm=2.024, loss_scale=2, train_wall=11, gb_free=2.8, wall=194361
2021-06-21 00:38:18 | INFO | train_inner | epoch 006: 1975 / 3002 loss=2.522, ppl=5.74, wps=5732.3, ups=0.09, wpb=64865, bsz=128, num_updates=16884, lr=9.98729e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=194372
2021-06-21 00:38:29 | INFO | train_inner | epoch 006: 1976 / 3002 loss=2.431, ppl=5.39, wps=5836.2, ups=0.09, wpb=64803, bsz=128, num_updates=16885, lr=9.98729e-05, gnorm=2.018, loss_scale=2, train_wall=11, gb_free=2.8, wall=194383
2021-06-21 00:38:40 | INFO | train_inner | epoch 006: 1977 / 3002 loss=2.338, ppl=5.06, wps=5793.1, ups=0.09, wpb=64858, bsz=128, num_updates=16886, lr=9.98729e-05, gnorm=3.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=194394
2021-06-21 00:38:51 | INFO | train_inner | epoch 006: 1978 / 3002 loss=2.567, ppl=5.92, wps=5831.5, ups=0.09, wpb=64838, bsz=128, num_updates=16887, lr=9.98729e-05, gnorm=2.195, loss_scale=2, train_wall=11, gb_free=2.8, wall=194406
2021-06-21 00:39:02 | INFO | train_inner | epoch 006: 1979 / 3002 loss=2.492, ppl=5.63, wps=5858.6, ups=0.09, wpb=64743, bsz=128, num_updates=16888, lr=9.98729e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=194417
2021-06-21 00:39:13 | INFO | train_inner | epoch 006: 1980 / 3002 loss=2.418, ppl=5.35, wps=5829.3, ups=0.09, wpb=64789, bsz=128, num_updates=16889, lr=9.98729e-05, gnorm=2.02, loss_scale=2, train_wall=11, gb_free=2.8, wall=194428
2021-06-21 00:39:24 | INFO | train_inner | epoch 006: 1981 / 3002 loss=2.532, ppl=5.78, wps=5911.2, ups=0.09, wpb=64813, bsz=128, num_updates=16890, lr=9.98729e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=194439
2021-06-21 00:39:35 | INFO | train_inner | epoch 006: 1982 / 3002 loss=2.457, ppl=5.49, wps=5882, ups=0.09, wpb=64894, bsz=128, num_updates=16891, lr=9.98729e-05, gnorm=2.122, loss_scale=2, train_wall=11, gb_free=2.8, wall=194450
2021-06-21 00:39:47 | INFO | train_inner | epoch 006: 1983 / 3002 loss=2.445, ppl=5.45, wps=5797.3, ups=0.09, wpb=64883, bsz=128, num_updates=16892, lr=9.98729e-05, gnorm=1.978, loss_scale=2, train_wall=11, gb_free=2.8, wall=194461
2021-06-21 00:39:58 | INFO | train_inner | epoch 006: 1984 / 3002 loss=2.429, ppl=5.39, wps=5851.7, ups=0.09, wpb=64867, bsz=128, num_updates=16893, lr=9.98728e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=194472
2021-06-21 00:40:09 | INFO | train_inner | epoch 006: 1985 / 3002 loss=2.467, ppl=5.53, wps=5893.6, ups=0.09, wpb=64884, bsz=128, num_updates=16894, lr=9.98728e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=194483
2021-06-21 00:40:20 | INFO | train_inner | epoch 006: 1986 / 3002 loss=2.46, ppl=5.5, wps=5870.5, ups=0.09, wpb=64896, bsz=128, num_updates=16895, lr=9.98728e-05, gnorm=2.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=194494
2021-06-21 00:40:31 | INFO | train_inner | epoch 006: 1987 / 3002 loss=2.594, ppl=6.04, wps=5871.8, ups=0.09, wpb=64898, bsz=128, num_updates=16896, lr=9.98728e-05, gnorm=2.041, loss_scale=2, train_wall=11, gb_free=2.8, wall=194505
2021-06-21 00:40:42 | INFO | train_inner | epoch 006: 1988 / 3002 loss=2.332, ppl=5.03, wps=5786.4, ups=0.09, wpb=64810, bsz=128, num_updates=16897, lr=9.98728e-05, gnorm=2.055, loss_scale=2, train_wall=11, gb_free=2.8, wall=194516
2021-06-21 00:40:53 | INFO | train_inner | epoch 006: 1989 / 3002 loss=2.58, ppl=5.98, wps=5855.3, ups=0.09, wpb=64816, bsz=128, num_updates=16898, lr=9.98728e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=194527
2021-06-21 00:41:04 | INFO | train_inner | epoch 006: 1990 / 3002 loss=2.447, ppl=5.45, wps=5926.6, ups=0.09, wpb=64865, bsz=128, num_updates=16899, lr=9.98728e-05, gnorm=2.029, loss_scale=2, train_wall=10, gb_free=2.8, wall=194538
2021-06-21 00:41:15 | INFO | train_inner | epoch 006: 1991 / 3002 loss=2.511, ppl=5.7, wps=5758.7, ups=0.09, wpb=64834, bsz=128, num_updates=16900, lr=9.98728e-05, gnorm=3.423, loss_scale=2, train_wall=11, gb_free=2.8, wall=194550
2021-06-21 00:41:26 | INFO | train_inner | epoch 006: 1992 / 3002 loss=2.411, ppl=5.32, wps=5900.4, ups=0.09, wpb=64875, bsz=128, num_updates=16901, lr=9.98728e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=194561
2021-06-21 00:41:37 | INFO | train_inner | epoch 006: 1993 / 3002 loss=2.488, ppl=5.61, wps=6011.7, ups=0.09, wpb=64807, bsz=128, num_updates=16902, lr=9.98728e-05, gnorm=2.05, loss_scale=2, train_wall=10, gb_free=2.8, wall=194571
2021-06-21 00:41:48 | INFO | train_inner | epoch 006: 1994 / 3002 loss=2.44, ppl=5.43, wps=5931.9, ups=0.09, wpb=64785, bsz=128, num_updates=16903, lr=9.98728e-05, gnorm=2.052, loss_scale=2, train_wall=10, gb_free=2.8, wall=194582
2021-06-21 00:41:59 | INFO | train_inner | epoch 006: 1995 / 3002 loss=2.513, ppl=5.71, wps=5903.6, ups=0.09, wpb=64774, bsz=128, num_updates=16904, lr=9.98728e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=194593
2021-06-21 00:42:10 | INFO | train_inner | epoch 006: 1996 / 3002 loss=2.515, ppl=5.71, wps=5955.9, ups=0.09, wpb=64827, bsz=128, num_updates=16905, lr=9.98727e-05, gnorm=2.02, loss_scale=2, train_wall=10, gb_free=2.8, wall=194604
2021-06-21 00:42:21 | INFO | train_inner | epoch 006: 1997 / 3002 loss=2.445, ppl=5.45, wps=5722.6, ups=0.09, wpb=64800, bsz=128, num_updates=16906, lr=9.98727e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=194615
2021-06-21 00:42:32 | INFO | train_inner | epoch 006: 1998 / 3002 loss=2.534, ppl=5.79, wps=5788, ups=0.09, wpb=64834, bsz=128, num_updates=16907, lr=9.98727e-05, gnorm=1.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=194627
2021-06-21 00:42:43 | INFO | train_inner | epoch 006: 1999 / 3002 loss=2.392, ppl=5.25, wps=5831.8, ups=0.09, wpb=64769, bsz=128, num_updates=16908, lr=9.98727e-05, gnorm=2.053, loss_scale=2, train_wall=11, gb_free=2.8, wall=194638
2021-06-21 00:42:55 | INFO | train_inner | epoch 006: 2000 / 3002 loss=2.495, ppl=5.64, wps=5778.1, ups=0.09, wpb=64955, bsz=128, num_updates=16909, lr=9.98727e-05, gnorm=2.009, loss_scale=2, train_wall=11, gb_free=2.8, wall=194649
2021-06-21 00:43:06 | INFO | train_inner | epoch 006: 2001 / 3002 loss=2.715, ppl=6.56, wps=5858.6, ups=0.09, wpb=64833, bsz=128, num_updates=16910, lr=9.98727e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=194660
2021-06-21 00:43:17 | INFO | train_inner | epoch 006: 2002 / 3002 loss=2.412, ppl=5.32, wps=5840.4, ups=0.09, wpb=64844, bsz=128, num_updates=16911, lr=9.98727e-05, gnorm=2.08, loss_scale=2, train_wall=11, gb_free=2.8, wall=194671
2021-06-21 00:43:28 | INFO | train_inner | epoch 006: 2003 / 3002 loss=2.601, ppl=6.07, wps=5817.1, ups=0.09, wpb=64781, bsz=128, num_updates=16912, lr=9.98727e-05, gnorm=2.015, loss_scale=2, train_wall=11, gb_free=2.8, wall=194682
2021-06-21 00:43:39 | INFO | train_inner | epoch 006: 2004 / 3002 loss=2.365, ppl=5.15, wps=5875, ups=0.09, wpb=64852, bsz=128, num_updates=16913, lr=9.98727e-05, gnorm=1.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=194693
2021-06-21 00:43:50 | INFO | train_inner | epoch 006: 2005 / 3002 loss=2.581, ppl=5.99, wps=5827.6, ups=0.09, wpb=64785, bsz=128, num_updates=16914, lr=9.98727e-05, gnorm=2.029, loss_scale=2, train_wall=11, gb_free=2.8, wall=194704
2021-06-21 00:44:01 | INFO | train_inner | epoch 006: 2006 / 3002 loss=2.429, ppl=5.39, wps=5882, ups=0.09, wpb=64813, bsz=128, num_updates=16915, lr=9.98727e-05, gnorm=2.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=194716
2021-06-21 00:44:12 | INFO | train_inner | epoch 006: 2007 / 3002 loss=2.665, ppl=6.34, wps=5909.2, ups=0.09, wpb=64809, bsz=128, num_updates=16916, lr=9.98727e-05, gnorm=2.582, loss_scale=2, train_wall=11, gb_free=2.8, wall=194726
2021-06-21 00:44:23 | INFO | train_inner | epoch 006: 2008 / 3002 loss=2.516, ppl=5.72, wps=5832.7, ups=0.09, wpb=64844, bsz=128, num_updates=16917, lr=9.98727e-05, gnorm=1.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=194738
2021-06-21 00:44:34 | INFO | train_inner | epoch 006: 2009 / 3002 loss=2.563, ppl=5.91, wps=5798.3, ups=0.09, wpb=64838, bsz=128, num_updates=16918, lr=9.98726e-05, gnorm=1.962, loss_scale=2, train_wall=11, gb_free=2.8, wall=194749
2021-06-21 00:44:46 | INFO | train_inner | epoch 006: 2010 / 3002 loss=2.445, ppl=5.45, wps=5849.9, ups=0.09, wpb=64852, bsz=128, num_updates=16919, lr=9.98726e-05, gnorm=5.413, loss_scale=2, train_wall=11, gb_free=2.8, wall=194760
2021-06-21 00:44:57 | INFO | train_inner | epoch 006: 2011 / 3002 loss=2.422, ppl=5.36, wps=5838.4, ups=0.09, wpb=64873, bsz=128, num_updates=16920, lr=9.98726e-05, gnorm=2.203, loss_scale=2, train_wall=11, gb_free=2.8, wall=194771
2021-06-21 00:45:08 | INFO | train_inner | epoch 006: 2012 / 3002 loss=2.596, ppl=6.05, wps=5944.2, ups=0.09, wpb=64872, bsz=128, num_updates=16921, lr=9.98726e-05, gnorm=2.031, loss_scale=2, train_wall=10, gb_free=2.8, wall=194782
2021-06-21 00:45:19 | INFO | train_inner | epoch 006: 2013 / 3002 loss=2.461, ppl=5.51, wps=5875.7, ups=0.09, wpb=64899, bsz=128, num_updates=16922, lr=9.98726e-05, gnorm=2.114, loss_scale=2, train_wall=11, gb_free=2.8, wall=194793
2021-06-21 00:45:30 | INFO | train_inner | epoch 006: 2014 / 3002 loss=2.473, ppl=5.55, wps=5826.7, ups=0.09, wpb=64850, bsz=128, num_updates=16923, lr=9.98726e-05, gnorm=2.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=194804
2021-06-21 00:45:41 | INFO | train_inner | epoch 006: 2015 / 3002 loss=2.563, ppl=5.91, wps=5872.2, ups=0.09, wpb=64677, bsz=128, num_updates=16924, lr=9.98726e-05, gnorm=2.045, loss_scale=2, train_wall=11, gb_free=2.8, wall=194815
2021-06-21 00:45:52 | INFO | train_inner | epoch 006: 2016 / 3002 loss=2.531, ppl=5.78, wps=5978.4, ups=0.09, wpb=64858, bsz=128, num_updates=16925, lr=9.98726e-05, gnorm=2.035, loss_scale=2, train_wall=10, gb_free=2.8, wall=194826
2021-06-21 00:46:03 | INFO | train_inner | epoch 006: 2017 / 3002 loss=2.411, ppl=5.32, wps=5757.7, ups=0.09, wpb=64793, bsz=128, num_updates=16926, lr=9.98726e-05, gnorm=1.921, loss_scale=2, train_wall=11, gb_free=2.8, wall=194837
2021-06-21 00:46:14 | INFO | train_inner | epoch 006: 2018 / 3002 loss=2.495, ppl=5.64, wps=5875.8, ups=0.09, wpb=64858, bsz=128, num_updates=16927, lr=9.98726e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=194848
2021-06-21 00:46:25 | INFO | train_inner | epoch 006: 2019 / 3002 loss=2.645, ppl=6.25, wps=5838.3, ups=0.09, wpb=64753, bsz=128, num_updates=16928, lr=9.98726e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=194859
2021-06-21 00:46:36 | INFO | train_inner | epoch 006: 2020 / 3002 loss=2.483, ppl=5.59, wps=6016.9, ups=0.09, wpb=64837, bsz=128, num_updates=16929, lr=9.98726e-05, gnorm=1.99, loss_scale=2, train_wall=10, gb_free=2.8, wall=194870
2021-06-21 00:46:47 | INFO | train_inner | epoch 006: 2021 / 3002 loss=2.584, ppl=6, wps=6020.1, ups=0.09, wpb=64839, bsz=128, num_updates=16930, lr=9.98725e-05, gnorm=2.013, loss_scale=2, train_wall=10, gb_free=2.8, wall=194881
2021-06-21 00:46:58 | INFO | train_inner | epoch 006: 2022 / 3002 loss=2.404, ppl=5.29, wps=5846.4, ups=0.09, wpb=64759, bsz=128, num_updates=16931, lr=9.98725e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=194892
2021-06-21 00:47:09 | INFO | train_inner | epoch 006: 2023 / 3002 loss=2.476, ppl=5.56, wps=5769.7, ups=0.09, wpb=64820, bsz=128, num_updates=16932, lr=9.98725e-05, gnorm=2.042, loss_scale=2, train_wall=11, gb_free=2.8, wall=194903
2021-06-21 00:47:20 | INFO | train_inner | epoch 006: 2024 / 3002 loss=2.413, ppl=5.32, wps=5803.9, ups=0.09, wpb=64808, bsz=128, num_updates=16933, lr=9.98725e-05, gnorm=2.617, loss_scale=2, train_wall=11, gb_free=2.8, wall=194914
2021-06-21 00:47:31 | INFO | train_inner | epoch 006: 2025 / 3002 loss=2.601, ppl=6.07, wps=5870.8, ups=0.09, wpb=64807, bsz=128, num_updates=16934, lr=9.98725e-05, gnorm=2.042, loss_scale=2, train_wall=11, gb_free=2.8, wall=194925
2021-06-21 00:47:42 | INFO | train_inner | epoch 006: 2026 / 3002 loss=2.388, ppl=5.23, wps=5876.6, ups=0.09, wpb=64868, bsz=128, num_updates=16935, lr=9.98725e-05, gnorm=2.001, loss_scale=2, train_wall=11, gb_free=2.8, wall=194936
2021-06-21 00:47:53 | INFO | train_inner | epoch 006: 2027 / 3002 loss=2.458, ppl=5.49, wps=5877.3, ups=0.09, wpb=64855, bsz=128, num_updates=16936, lr=9.98725e-05, gnorm=2.142, loss_scale=2, train_wall=11, gb_free=2.8, wall=194947
2021-06-21 00:48:04 | INFO | train_inner | epoch 006: 2028 / 3002 loss=2.358, ppl=5.12, wps=5876.2, ups=0.09, wpb=64921, bsz=128, num_updates=16937, lr=9.98725e-05, gnorm=1.951, loss_scale=2, train_wall=11, gb_free=2.8, wall=194958
2021-06-21 00:48:15 | INFO | train_inner | epoch 006: 2029 / 3002 loss=2.351, ppl=5.1, wps=5902.1, ups=0.09, wpb=64846, bsz=128, num_updates=16938, lr=9.98725e-05, gnorm=2.018, loss_scale=2, train_wall=11, gb_free=2.8, wall=194969
2021-06-21 00:48:26 | INFO | train_inner | epoch 006: 2030 / 3002 loss=2.53, ppl=5.78, wps=6057.6, ups=0.09, wpb=64825, bsz=128, num_updates=16939, lr=9.98725e-05, gnorm=1.971, loss_scale=2, train_wall=10, gb_free=2.8, wall=194980
2021-06-21 00:48:37 | INFO | train_inner | epoch 006: 2031 / 3002 loss=2.612, ppl=6.11, wps=5904.6, ups=0.09, wpb=64771, bsz=128, num_updates=16940, lr=9.98725e-05, gnorm=2.095, loss_scale=2, train_wall=11, gb_free=2.8, wall=194991
2021-06-21 00:48:48 | INFO | train_inner | epoch 006: 2032 / 3002 loss=2.592, ppl=6.03, wps=5803.7, ups=0.09, wpb=64781, bsz=128, num_updates=16941, lr=9.98725e-05, gnorm=2.042, loss_scale=2, train_wall=11, gb_free=2.8, wall=195002
2021-06-21 00:48:59 | INFO | train_inner | epoch 006: 2033 / 3002 loss=2.536, ppl=5.8, wps=5845.7, ups=0.09, wpb=64734, bsz=128, num_updates=16942, lr=9.98725e-05, gnorm=1.965, loss_scale=2, train_wall=11, gb_free=2.8, wall=195013
2021-06-21 00:49:10 | INFO | train_inner | epoch 006: 2034 / 3002 loss=2.558, ppl=5.89, wps=5812.3, ups=0.09, wpb=64830, bsz=128, num_updates=16943, lr=9.98724e-05, gnorm=1.924, loss_scale=2, train_wall=11, gb_free=2.8, wall=195025
2021-06-21 00:49:21 | INFO | train_inner | epoch 006: 2035 / 3002 loss=2.66, ppl=6.32, wps=5936.4, ups=0.09, wpb=64808, bsz=128, num_updates=16944, lr=9.98724e-05, gnorm=2.084, loss_scale=2, train_wall=10, gb_free=2.8, wall=195035
2021-06-21 00:49:32 | INFO | train_inner | epoch 006: 2036 / 3002 loss=2.477, ppl=5.57, wps=5839.3, ups=0.09, wpb=64811, bsz=128, num_updates=16945, lr=9.98724e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=195047
2021-06-21 00:49:43 | INFO | train_inner | epoch 006: 2037 / 3002 loss=2.496, ppl=5.64, wps=5920.5, ups=0.09, wpb=64819, bsz=128, num_updates=16946, lr=9.98724e-05, gnorm=2.204, loss_scale=2, train_wall=11, gb_free=2.8, wall=195058
2021-06-21 00:49:54 | INFO | train_inner | epoch 006: 2038 / 3002 loss=2.484, ppl=5.6, wps=5881.3, ups=0.09, wpb=64842, bsz=128, num_updates=16947, lr=9.98724e-05, gnorm=44.939, loss_scale=2, train_wall=11, gb_free=2.8, wall=195069
2021-06-21 00:50:05 | INFO | train_inner | epoch 006: 2039 / 3002 loss=2.549, ppl=5.85, wps=5855.6, ups=0.09, wpb=64818, bsz=128, num_updates=16948, lr=9.98724e-05, gnorm=1.938, loss_scale=2, train_wall=11, gb_free=2.8, wall=195080
2021-06-21 00:50:16 | INFO | train_inner | epoch 006: 2040 / 3002 loss=2.41, ppl=5.31, wps=5903.7, ups=0.09, wpb=64838, bsz=128, num_updates=16949, lr=9.98724e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=195091
2021-06-21 00:50:27 | INFO | train_inner | epoch 006: 2041 / 3002 loss=2.516, ppl=5.72, wps=5878.9, ups=0.09, wpb=64819, bsz=128, num_updates=16950, lr=9.98724e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=195102
2021-06-21 00:50:39 | INFO | train_inner | epoch 006: 2042 / 3002 loss=2.486, ppl=5.6, wps=5738.5, ups=0.09, wpb=64835, bsz=128, num_updates=16951, lr=9.98724e-05, gnorm=2.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=195113
2021-06-21 00:50:50 | INFO | train_inner | epoch 006: 2043 / 3002 loss=2.526, ppl=5.76, wps=5936.2, ups=0.09, wpb=64880, bsz=128, num_updates=16952, lr=9.98724e-05, gnorm=1.986, loss_scale=2, train_wall=10, gb_free=2.8, wall=195124
2021-06-21 00:51:00 | INFO | train_inner | epoch 006: 2044 / 3002 loss=2.49, ppl=5.62, wps=5948.2, ups=0.09, wpb=64924, bsz=128, num_updates=16953, lr=9.98724e-05, gnorm=2.012, loss_scale=2, train_wall=10, gb_free=2.8, wall=195135
2021-06-21 00:51:11 | INFO | train_inner | epoch 006: 2045 / 3002 loss=2.584, ppl=5.99, wps=5901.5, ups=0.09, wpb=64775, bsz=128, num_updates=16954, lr=9.98724e-05, gnorm=2.39, loss_scale=2, train_wall=11, gb_free=2.8, wall=195146
2021-06-21 00:51:22 | INFO | train_inner | epoch 006: 2046 / 3002 loss=2.372, ppl=5.18, wps=5925.9, ups=0.09, wpb=64884, bsz=128, num_updates=16955, lr=9.98723e-05, gnorm=2.034, loss_scale=2, train_wall=11, gb_free=2.8, wall=195157
2021-06-21 00:51:33 | INFO | train_inner | epoch 006: 2047 / 3002 loss=2.508, ppl=5.69, wps=5836.9, ups=0.09, wpb=64830, bsz=128, num_updates=16956, lr=9.98723e-05, gnorm=2.015, loss_scale=2, train_wall=11, gb_free=2.8, wall=195168
2021-06-21 00:51:45 | INFO | train_inner | epoch 006: 2048 / 3002 loss=2.395, ppl=5.26, wps=5795.5, ups=0.09, wpb=64778, bsz=128, num_updates=16957, lr=9.98723e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=195179
2021-06-21 00:51:56 | INFO | train_inner | epoch 006: 2049 / 3002 loss=2.563, ppl=5.91, wps=5852.6, ups=0.09, wpb=64855, bsz=128, num_updates=16958, lr=9.98723e-05, gnorm=3.843, loss_scale=2, train_wall=11, gb_free=2.8, wall=195190
2021-06-21 00:52:07 | INFO | train_inner | epoch 006: 2050 / 3002 loss=2.463, ppl=5.51, wps=5774.6, ups=0.09, wpb=64856, bsz=128, num_updates=16959, lr=9.98723e-05, gnorm=2.038, loss_scale=2, train_wall=11, gb_free=2.8, wall=195201
2021-06-21 00:52:18 | INFO | train_inner | epoch 006: 2051 / 3002 loss=2.565, ppl=5.92, wps=5825.3, ups=0.09, wpb=64840, bsz=128, num_updates=16960, lr=9.98723e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=195212
2021-06-21 00:52:29 | INFO | train_inner | epoch 006: 2052 / 3002 loss=2.477, ppl=5.57, wps=5947.4, ups=0.09, wpb=64897, bsz=128, num_updates=16961, lr=9.98723e-05, gnorm=2.003, loss_scale=2, train_wall=10, gb_free=2.8, wall=195223
2021-06-21 00:52:40 | INFO | train_inner | epoch 006: 2053 / 3002 loss=2.463, ppl=5.51, wps=5863.9, ups=0.09, wpb=64806, bsz=128, num_updates=16962, lr=9.98723e-05, gnorm=2.328, loss_scale=2, train_wall=11, gb_free=2.8, wall=195234
2021-06-21 00:52:51 | INFO | train_inner | epoch 006: 2054 / 3002 loss=2.499, ppl=5.65, wps=5809.2, ups=0.09, wpb=64754, bsz=128, num_updates=16963, lr=9.98723e-05, gnorm=2.616, loss_scale=2, train_wall=11, gb_free=2.8, wall=195246
2021-06-21 00:53:02 | INFO | train_inner | epoch 006: 2055 / 3002 loss=2.372, ppl=5.18, wps=5799.6, ups=0.09, wpb=64876, bsz=128, num_updates=16964, lr=9.98723e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=195257
2021-06-21 00:53:13 | INFO | train_inner | epoch 006: 2056 / 3002 loss=2.381, ppl=5.21, wps=5870, ups=0.09, wpb=64816, bsz=128, num_updates=16965, lr=9.98723e-05, gnorm=9.084, loss_scale=2, train_wall=11, gb_free=2.8, wall=195268
2021-06-21 00:53:25 | INFO | train_inner | epoch 006: 2057 / 3002 loss=2.462, ppl=5.51, wps=5832.8, ups=0.09, wpb=64826, bsz=128, num_updates=16966, lr=9.98723e-05, gnorm=2.516, loss_scale=2, train_wall=11, gb_free=2.8, wall=195279
2021-06-21 00:53:35 | INFO | train_inner | epoch 006: 2058 / 3002 loss=2.423, ppl=5.36, wps=5957.9, ups=0.09, wpb=64819, bsz=128, num_updates=16967, lr=9.98723e-05, gnorm=2.075, loss_scale=2, train_wall=10, gb_free=2.8, wall=195290
2021-06-21 00:53:46 | INFO | train_inner | epoch 006: 2059 / 3002 loss=2.395, ppl=5.26, wps=5943.1, ups=0.09, wpb=64843, bsz=128, num_updates=16968, lr=9.98722e-05, gnorm=2.891, loss_scale=2, train_wall=10, gb_free=2.8, wall=195301
2021-06-21 00:53:58 | INFO | train_inner | epoch 006: 2060 / 3002 loss=2.656, ppl=6.3, wps=5725.5, ups=0.09, wpb=64781, bsz=128, num_updates=16969, lr=9.98722e-05, gnorm=2.884, loss_scale=2, train_wall=11, gb_free=2.8, wall=195312
2021-06-21 00:54:09 | INFO | train_inner | epoch 006: 2061 / 3002 loss=2.537, ppl=5.81, wps=5805.4, ups=0.09, wpb=64847, bsz=128, num_updates=16970, lr=9.98722e-05, gnorm=2.293, loss_scale=2, train_wall=11, gb_free=2.8, wall=195323
2021-06-21 00:54:20 | INFO | train_inner | epoch 006: 2062 / 3002 loss=2.485, ppl=5.6, wps=5842.3, ups=0.09, wpb=64890, bsz=128, num_updates=16971, lr=9.98722e-05, gnorm=2.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=195334
2021-06-21 00:54:31 | INFO | train_inner | epoch 006: 2063 / 3002 loss=2.41, ppl=5.31, wps=5854.1, ups=0.09, wpb=64836, bsz=128, num_updates=16972, lr=9.98722e-05, gnorm=2.308, loss_scale=2, train_wall=11, gb_free=2.8, wall=195345
2021-06-21 00:54:42 | INFO | train_inner | epoch 006: 2064 / 3002 loss=2.44, ppl=5.43, wps=5803.4, ups=0.09, wpb=64795, bsz=128, num_updates=16973, lr=9.98722e-05, gnorm=2.446, loss_scale=2, train_wall=11, gb_free=2.8, wall=195356
2021-06-21 00:54:53 | INFO | train_inner | epoch 006: 2065 / 3002 loss=2.594, ppl=6.04, wps=5888.6, ups=0.09, wpb=64796, bsz=128, num_updates=16974, lr=9.98722e-05, gnorm=2.214, loss_scale=2, train_wall=11, gb_free=2.8, wall=195367
2021-06-21 00:55:04 | INFO | train_inner | epoch 006: 2066 / 3002 loss=2.502, ppl=5.67, wps=5730.2, ups=0.09, wpb=64914, bsz=128, num_updates=16975, lr=9.98722e-05, gnorm=2.398, loss_scale=2, train_wall=11, gb_free=2.8, wall=195379
2021-06-21 00:55:15 | INFO | train_inner | epoch 006: 2067 / 3002 loss=2.474, ppl=5.55, wps=5943, ups=0.09, wpb=64753, bsz=128, num_updates=16976, lr=9.98722e-05, gnorm=2.372, loss_scale=2, train_wall=10, gb_free=2.8, wall=195390
2021-06-21 00:55:26 | INFO | train_inner | epoch 006: 2068 / 3002 loss=2.477, ppl=5.57, wps=5836.2, ups=0.09, wpb=64794, bsz=128, num_updates=16977, lr=9.98722e-05, gnorm=2.18, loss_scale=2, train_wall=11, gb_free=2.8, wall=195401
2021-06-21 00:55:38 | INFO | train_inner | epoch 006: 2069 / 3002 loss=2.516, ppl=5.72, wps=5857, ups=0.09, wpb=64811, bsz=128, num_updates=16978, lr=9.98722e-05, gnorm=21.403, loss_scale=2, train_wall=11, gb_free=2.8, wall=195412
2021-06-21 00:55:49 | INFO | train_inner | epoch 006: 2070 / 3002 loss=2.451, ppl=5.47, wps=5834.3, ups=0.09, wpb=64788, bsz=128, num_updates=16979, lr=9.98722e-05, gnorm=2.199, loss_scale=2, train_wall=11, gb_free=2.8, wall=195423
2021-06-21 00:56:00 | INFO | train_inner | epoch 006: 2071 / 3002 loss=2.473, ppl=5.55, wps=5919.6, ups=0.09, wpb=64768, bsz=128, num_updates=16980, lr=9.98721e-05, gnorm=2.098, loss_scale=2, train_wall=10, gb_free=2.8, wall=195434
2021-06-21 00:56:11 | INFO | train_inner | epoch 006: 2072 / 3002 loss=2.338, ppl=5.06, wps=5773.4, ups=0.09, wpb=64817, bsz=128, num_updates=16981, lr=9.98721e-05, gnorm=2.778, loss_scale=2, train_wall=11, gb_free=2.8, wall=195445
2021-06-21 00:56:22 | INFO | train_inner | epoch 006: 2073 / 3002 loss=2.507, ppl=5.68, wps=5780.6, ups=0.09, wpb=64745, bsz=128, num_updates=16982, lr=9.98721e-05, gnorm=2.022, loss_scale=2, train_wall=11, gb_free=2.8, wall=195456
2021-06-21 00:56:33 | INFO | train_inner | epoch 006: 2074 / 3002 loss=2.576, ppl=5.96, wps=5933.3, ups=0.09, wpb=64865, bsz=128, num_updates=16983, lr=9.98721e-05, gnorm=2.16, loss_scale=2, train_wall=10, gb_free=2.8, wall=195467
2021-06-21 00:56:44 | INFO | train_inner | epoch 006: 2075 / 3002 loss=2.592, ppl=6.03, wps=6010, ups=0.09, wpb=64867, bsz=128, num_updates=16984, lr=9.98721e-05, gnorm=2.163, loss_scale=2, train_wall=10, gb_free=2.8, wall=195478
2021-06-21 00:56:55 | INFO | train_inner | epoch 006: 2076 / 3002 loss=2.437, ppl=5.41, wps=5863.2, ups=0.09, wpb=64841, bsz=128, num_updates=16985, lr=9.98721e-05, gnorm=2.06, loss_scale=2, train_wall=11, gb_free=2.8, wall=195489
2021-06-21 00:57:06 | INFO | train_inner | epoch 006: 2077 / 3002 loss=2.588, ppl=6.01, wps=5918, ups=0.09, wpb=64772, bsz=128, num_updates=16986, lr=9.98721e-05, gnorm=2.266, loss_scale=2, train_wall=11, gb_free=2.8, wall=195500
2021-06-21 00:57:17 | INFO | train_inner | epoch 006: 2078 / 3002 loss=2.449, ppl=5.46, wps=5853, ups=0.09, wpb=64825, bsz=128, num_updates=16987, lr=9.98721e-05, gnorm=2.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=195511
2021-06-21 00:57:28 | INFO | train_inner | epoch 006: 2079 / 3002 loss=2.428, ppl=5.38, wps=5958.4, ups=0.09, wpb=64924, bsz=128, num_updates=16988, lr=9.98721e-05, gnorm=2.031, loss_scale=2, train_wall=10, gb_free=2.8, wall=195522
2021-06-21 00:57:39 | INFO | train_inner | epoch 006: 2080 / 3002 loss=2.477, ppl=5.57, wps=5823, ups=0.09, wpb=64754, bsz=128, num_updates=16989, lr=9.98721e-05, gnorm=2.086, loss_scale=2, train_wall=11, gb_free=2.8, wall=195533
2021-06-21 00:57:50 | INFO | train_inner | epoch 006: 2081 / 3002 loss=2.472, ppl=5.55, wps=5783.1, ups=0.09, wpb=64822, bsz=128, num_updates=16990, lr=9.98721e-05, gnorm=1.969, loss_scale=2, train_wall=11, gb_free=2.8, wall=195544
2021-06-21 00:58:01 | INFO | train_inner | epoch 006: 2082 / 3002 loss=2.437, ppl=5.42, wps=5824.1, ups=0.09, wpb=64857, bsz=128, num_updates=16991, lr=9.98721e-05, gnorm=2.26, loss_scale=2, train_wall=11, gb_free=2.8, wall=195556
2021-06-21 00:58:12 | INFO | train_inner | epoch 006: 2083 / 3002 loss=2.64, ppl=6.23, wps=5932.1, ups=0.09, wpb=64742, bsz=128, num_updates=16992, lr=9.98721e-05, gnorm=2.341, loss_scale=2, train_wall=10, gb_free=2.8, wall=195566
2021-06-21 00:58:23 | INFO | train_inner | epoch 006: 2084 / 3002 loss=2.603, ppl=6.07, wps=5766.6, ups=0.09, wpb=64820, bsz=128, num_updates=16993, lr=9.9872e-05, gnorm=2.646, loss_scale=2, train_wall=11, gb_free=2.8, wall=195578
2021-06-21 00:58:34 | INFO | train_inner | epoch 006: 2085 / 3002 loss=2.49, ppl=5.62, wps=5848.8, ups=0.09, wpb=64848, bsz=128, num_updates=16994, lr=9.9872e-05, gnorm=1.925, loss_scale=2, train_wall=11, gb_free=2.8, wall=195589
2021-06-21 00:58:45 | INFO | train_inner | epoch 006: 2086 / 3002 loss=2.449, ppl=5.46, wps=5989, ups=0.09, wpb=64923, bsz=128, num_updates=16995, lr=9.9872e-05, gnorm=1.988, loss_scale=2, train_wall=10, gb_free=2.8, wall=195600
2021-06-21 00:58:56 | INFO | train_inner | epoch 006: 2087 / 3002 loss=2.512, ppl=5.7, wps=5819.8, ups=0.09, wpb=64808, bsz=128, num_updates=16996, lr=9.9872e-05, gnorm=2.245, loss_scale=2, train_wall=11, gb_free=2.8, wall=195611
2021-06-21 00:59:07 | INFO | train_inner | epoch 006: 2088 / 3002 loss=2.497, ppl=5.64, wps=5892.9, ups=0.09, wpb=64864, bsz=128, num_updates=16997, lr=9.9872e-05, gnorm=2.035, loss_scale=2, train_wall=11, gb_free=2.8, wall=195622
2021-06-21 00:59:18 | INFO | train_inner | epoch 006: 2089 / 3002 loss=2.405, ppl=5.3, wps=5852.7, ups=0.09, wpb=64783, bsz=128, num_updates=16998, lr=9.9872e-05, gnorm=1.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=195633
2021-06-21 00:59:30 | INFO | train_inner | epoch 006: 2090 / 3002 loss=2.435, ppl=5.41, wps=5714.1, ups=0.09, wpb=64769, bsz=128, num_updates=16999, lr=9.9872e-05, gnorm=1.99, loss_scale=2, train_wall=11, gb_free=2.8, wall=195644
2021-06-21 00:59:41 | INFO | train_inner | epoch 006: 2091 / 3002 loss=2.498, ppl=5.65, wps=5843.6, ups=0.09, wpb=64807, bsz=128, num_updates=17000, lr=9.9872e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=195655
2021-06-21 00:59:52 | INFO | train_inner | epoch 006: 2092 / 3002 loss=2.491, ppl=5.62, wps=5811.2, ups=0.09, wpb=64751, bsz=128, num_updates=17001, lr=9.9872e-05, gnorm=2.063, loss_scale=2, train_wall=11, gb_free=2.8, wall=195666
2021-06-21 01:00:03 | INFO | train_inner | epoch 006: 2093 / 3002 loss=2.438, ppl=5.42, wps=5869, ups=0.09, wpb=64893, bsz=128, num_updates=17002, lr=9.9872e-05, gnorm=2.101, loss_scale=4, train_wall=11, gb_free=2.8, wall=195677
2021-06-21 01:00:14 | INFO | train_inner | epoch 006: 2094 / 3002 loss=2.401, ppl=5.28, wps=5840.4, ups=0.09, wpb=64862, bsz=128, num_updates=17003, lr=9.9872e-05, gnorm=1.993, loss_scale=4, train_wall=11, gb_free=2.8, wall=195689
2021-06-21 01:00:25 | INFO | train_inner | epoch 006: 2095 / 3002 loss=2.279, ppl=4.85, wps=5965.8, ups=0.09, wpb=64804, bsz=128, num_updates=17004, lr=9.9872e-05, gnorm=3.92, loss_scale=4, train_wall=10, gb_free=2.8, wall=195699
2021-06-21 01:00:36 | INFO | train_inner | epoch 006: 2096 / 3002 loss=2.495, ppl=5.64, wps=5827.3, ups=0.09, wpb=64848, bsz=128, num_updates=17005, lr=9.98719e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=195711
2021-06-21 01:00:48 | INFO | train_inner | epoch 006: 2097 / 3002 loss=2.45, ppl=5.46, wps=5690.2, ups=0.09, wpb=64868, bsz=128, num_updates=17006, lr=9.98719e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=195722
2021-06-21 01:00:59 | INFO | train_inner | epoch 006: 2098 / 3002 loss=2.478, ppl=5.57, wps=5870.4, ups=0.09, wpb=64819, bsz=128, num_updates=17007, lr=9.98719e-05, gnorm=1.916, loss_scale=4, train_wall=11, gb_free=2.8, wall=195733
2021-06-21 01:01:10 | INFO | train_inner | epoch 006: 2099 / 3002 loss=2.411, ppl=5.32, wps=5769.4, ups=0.09, wpb=64842, bsz=128, num_updates=17008, lr=9.98719e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=195744
2021-06-21 01:01:21 | INFO | train_inner | epoch 006: 2100 / 3002 loss=2.613, ppl=6.12, wps=5910.3, ups=0.09, wpb=64809, bsz=128, num_updates=17009, lr=9.98719e-05, gnorm=1.907, loss_scale=4, train_wall=10, gb_free=2.8, wall=195755
2021-06-21 01:01:32 | INFO | train_inner | epoch 006: 2101 / 3002 loss=2.55, ppl=5.86, wps=5967.3, ups=0.09, wpb=64765, bsz=128, num_updates=17010, lr=9.98719e-05, gnorm=1.998, loss_scale=4, train_wall=10, gb_free=2.8, wall=195766
2021-06-21 01:01:43 | INFO | train_inner | epoch 006: 2102 / 3002 loss=2.515, ppl=5.72, wps=5791.4, ups=0.09, wpb=64803, bsz=128, num_updates=17011, lr=9.98719e-05, gnorm=2.65, loss_scale=4, train_wall=11, gb_free=2.8, wall=195777
2021-06-21 01:01:54 | INFO | train_inner | epoch 006: 2103 / 3002 loss=2.516, ppl=5.72, wps=5815.5, ups=0.09, wpb=64764, bsz=128, num_updates=17012, lr=9.98719e-05, gnorm=2.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=195788
2021-06-21 01:02:05 | INFO | train_inner | epoch 006: 2104 / 3002 loss=2.523, ppl=5.75, wps=5830, ups=0.09, wpb=64784, bsz=128, num_updates=17013, lr=9.98719e-05, gnorm=2.134, loss_scale=4, train_wall=11, gb_free=2.8, wall=195799
2021-06-21 01:02:16 | INFO | train_inner | epoch 006: 2105 / 3002 loss=2.53, ppl=5.78, wps=5886.7, ups=0.09, wpb=64817, bsz=128, num_updates=17014, lr=9.98719e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=195811
2021-06-21 01:02:27 | INFO | train_inner | epoch 006: 2106 / 3002 loss=2.529, ppl=5.77, wps=5844.5, ups=0.09, wpb=64784, bsz=128, num_updates=17015, lr=9.98719e-05, gnorm=1.951, loss_scale=4, train_wall=11, gb_free=2.8, wall=195822
2021-06-21 01:02:38 | INFO | train_inner | epoch 006: 2107 / 3002 loss=2.421, ppl=5.36, wps=5812.2, ups=0.09, wpb=64763, bsz=128, num_updates=17016, lr=9.98719e-05, gnorm=1.9, loss_scale=4, train_wall=11, gb_free=2.8, wall=195833
2021-06-21 01:02:50 | INFO | train_inner | epoch 006: 2108 / 3002 loss=2.63, ppl=6.19, wps=5727.5, ups=0.09, wpb=64822, bsz=128, num_updates=17017, lr=9.98719e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=195844
2021-06-21 01:03:01 | INFO | train_inner | epoch 006: 2109 / 3002 loss=2.333, ppl=5.04, wps=5844.8, ups=0.09, wpb=64865, bsz=128, num_updates=17018, lr=9.98718e-05, gnorm=1.961, loss_scale=4, train_wall=11, gb_free=2.8, wall=195855
2021-06-21 01:03:12 | INFO | train_inner | epoch 006: 2110 / 3002 loss=2.58, ppl=5.98, wps=5823.3, ups=0.09, wpb=64767, bsz=128, num_updates=17019, lr=9.98718e-05, gnorm=1.918, loss_scale=4, train_wall=11, gb_free=2.8, wall=195866
2021-06-21 01:03:23 | INFO | train_inner | epoch 006: 2111 / 3002 loss=2.54, ppl=5.81, wps=5765.1, ups=0.09, wpb=64795, bsz=128, num_updates=17020, lr=9.98718e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=195878
2021-06-21 01:03:34 | INFO | train_inner | epoch 006: 2112 / 3002 loss=2.6, ppl=6.06, wps=5844.5, ups=0.09, wpb=64866, bsz=128, num_updates=17021, lr=9.98718e-05, gnorm=1.921, loss_scale=4, train_wall=11, gb_free=2.8, wall=195889
2021-06-21 01:03:45 | INFO | train_inner | epoch 006: 2113 / 3002 loss=2.649, ppl=6.27, wps=5840.4, ups=0.09, wpb=64838, bsz=128, num_updates=17022, lr=9.98718e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=195900
2021-06-21 01:03:56 | INFO | train_inner | epoch 006: 2114 / 3002 loss=2.459, ppl=5.5, wps=5842.6, ups=0.09, wpb=64768, bsz=128, num_updates=17023, lr=9.98718e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=195911
2021-06-21 01:04:08 | INFO | train_inner | epoch 006: 2115 / 3002 loss=2.455, ppl=5.48, wps=5807.3, ups=0.09, wpb=64849, bsz=128, num_updates=17024, lr=9.98718e-05, gnorm=2.013, loss_scale=4, train_wall=11, gb_free=2.8, wall=195922
2021-06-21 01:04:19 | INFO | train_inner | epoch 006: 2116 / 3002 loss=2.508, ppl=5.69, wps=5918.3, ups=0.09, wpb=64812, bsz=128, num_updates=17025, lr=9.98718e-05, gnorm=1.879, loss_scale=4, train_wall=10, gb_free=2.8, wall=195933
2021-06-21 01:04:30 | INFO | train_inner | epoch 006: 2117 / 3002 loss=2.514, ppl=5.71, wps=5773.1, ups=0.09, wpb=64779, bsz=128, num_updates=17026, lr=9.98718e-05, gnorm=1.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=195944
2021-06-21 01:04:41 | INFO | train_inner | epoch 006: 2118 / 3002 loss=2.48, ppl=5.58, wps=5805.7, ups=0.09, wpb=64779, bsz=128, num_updates=17027, lr=9.98718e-05, gnorm=1.944, loss_scale=4, train_wall=11, gb_free=2.8, wall=195955
2021-06-21 01:04:52 | INFO | train_inner | epoch 006: 2119 / 3002 loss=2.507, ppl=5.68, wps=5923.4, ups=0.09, wpb=64835, bsz=128, num_updates=17028, lr=9.98718e-05, gnorm=1.996, loss_scale=4, train_wall=10, gb_free=2.8, wall=195966
2021-06-21 01:05:03 | INFO | train_inner | epoch 006: 2120 / 3002 loss=2.466, ppl=5.52, wps=5853.9, ups=0.09, wpb=64807, bsz=128, num_updates=17029, lr=9.98718e-05, gnorm=1.917, loss_scale=4, train_wall=11, gb_free=2.8, wall=195977
2021-06-21 01:05:14 | INFO | train_inner | epoch 006: 2121 / 3002 loss=2.61, ppl=6.11, wps=5883.8, ups=0.09, wpb=64821, bsz=128, num_updates=17030, lr=9.98717e-05, gnorm=1.941, loss_scale=4, train_wall=11, gb_free=2.8, wall=195988
2021-06-21 01:05:25 | INFO | train_inner | epoch 006: 2122 / 3002 loss=2.557, ppl=5.88, wps=5852.8, ups=0.09, wpb=64890, bsz=128, num_updates=17031, lr=9.98717e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=195999
2021-06-21 01:05:36 | INFO | train_inner | epoch 006: 2123 / 3002 loss=2.714, ppl=6.56, wps=5888.4, ups=0.09, wpb=64920, bsz=128, num_updates=17032, lr=9.98717e-05, gnorm=3.036, loss_scale=4, train_wall=11, gb_free=2.8, wall=196010
2021-06-21 01:05:47 | INFO | train_inner | epoch 006: 2124 / 3002 loss=2.554, ppl=5.87, wps=5969.5, ups=0.09, wpb=64859, bsz=128, num_updates=17033, lr=9.98717e-05, gnorm=1.955, loss_scale=4, train_wall=10, gb_free=2.8, wall=196021
2021-06-21 01:05:58 | INFO | train_inner | epoch 006: 2125 / 3002 loss=2.408, ppl=5.31, wps=5760.4, ups=0.09, wpb=64798, bsz=128, num_updates=17034, lr=9.98717e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=196033
2021-06-21 01:06:09 | INFO | train_inner | epoch 006: 2126 / 3002 loss=2.507, ppl=5.69, wps=5828, ups=0.09, wpb=64804, bsz=128, num_updates=17035, lr=9.98717e-05, gnorm=1.97, loss_scale=4, train_wall=11, gb_free=2.8, wall=196044
2021-06-21 01:06:20 | INFO | train_inner | epoch 006: 2127 / 3002 loss=2.383, ppl=5.22, wps=5842.7, ups=0.09, wpb=64801, bsz=128, num_updates=17036, lr=9.98717e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=196055
2021-06-21 01:06:32 | INFO | train_inner | epoch 006: 2128 / 3002 loss=2.494, ppl=5.63, wps=5837.8, ups=0.09, wpb=64773, bsz=128, num_updates=17037, lr=9.98717e-05, gnorm=1.946, loss_scale=4, train_wall=11, gb_free=2.8, wall=196066
2021-06-21 01:06:43 | INFO | train_inner | epoch 006: 2129 / 3002 loss=2.567, ppl=5.93, wps=5906.2, ups=0.09, wpb=64863, bsz=128, num_updates=17038, lr=9.98717e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=196077
2021-06-21 01:06:54 | INFO | train_inner | epoch 006: 2130 / 3002 loss=2.367, ppl=5.16, wps=5808.5, ups=0.09, wpb=64835, bsz=128, num_updates=17039, lr=9.98717e-05, gnorm=1.889, loss_scale=4, train_wall=11, gb_free=2.8, wall=196088
2021-06-21 01:07:05 | INFO | train_inner | epoch 006: 2131 / 3002 loss=2.407, ppl=5.3, wps=5699.1, ups=0.09, wpb=64819, bsz=128, num_updates=17040, lr=9.98717e-05, gnorm=2.033, loss_scale=4, train_wall=11, gb_free=2.8, wall=196099
2021-06-21 01:07:16 | INFO | train_inner | epoch 006: 2132 / 3002 loss=2.597, ppl=6.05, wps=5860.7, ups=0.09, wpb=64765, bsz=128, num_updates=17041, lr=9.98717e-05, gnorm=1.971, loss_scale=4, train_wall=11, gb_free=2.8, wall=196110
2021-06-21 01:07:27 | INFO | train_inner | epoch 006: 2133 / 3002 loss=2.59, ppl=6.02, wps=5848.1, ups=0.09, wpb=64839, bsz=128, num_updates=17042, lr=9.98717e-05, gnorm=2.125, loss_scale=4, train_wall=11, gb_free=2.8, wall=196122
2021-06-21 01:07:38 | INFO | train_inner | epoch 006: 2134 / 3002 loss=2.495, ppl=5.64, wps=5819.1, ups=0.09, wpb=64781, bsz=128, num_updates=17043, lr=9.98716e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=196133
2021-06-21 01:07:49 | INFO | train_inner | epoch 006: 2135 / 3002 loss=2.43, ppl=5.39, wps=5858.2, ups=0.09, wpb=64894, bsz=128, num_updates=17044, lr=9.98716e-05, gnorm=1.927, loss_scale=4, train_wall=11, gb_free=2.8, wall=196144
2021-06-21 01:08:01 | INFO | train_inner | epoch 006: 2136 / 3002 loss=2.435, ppl=5.41, wps=5788.7, ups=0.09, wpb=64937, bsz=128, num_updates=17045, lr=9.98716e-05, gnorm=2.017, loss_scale=4, train_wall=11, gb_free=2.8, wall=196155
2021-06-21 01:08:11 | INFO | train_inner | epoch 006: 2137 / 3002 loss=2.503, ppl=5.67, wps=5956.8, ups=0.09, wpb=64851, bsz=128, num_updates=17046, lr=9.98716e-05, gnorm=1.909, loss_scale=4, train_wall=10, gb_free=2.8, wall=196166
2021-06-21 01:08:22 | INFO | train_inner | epoch 006: 2138 / 3002 loss=2.527, ppl=5.76, wps=6038.9, ups=0.09, wpb=64880, bsz=128, num_updates=17047, lr=9.98716e-05, gnorm=2.051, loss_scale=4, train_wall=10, gb_free=2.8, wall=196177
2021-06-21 01:08:33 | INFO | train_inner | epoch 006: 2139 / 3002 loss=2.608, ppl=6.09, wps=5833.3, ups=0.09, wpb=64826, bsz=128, num_updates=17048, lr=9.98716e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=196188
2021-06-21 01:08:44 | INFO | train_inner | epoch 006: 2140 / 3002 loss=2.485, ppl=5.6, wps=5871.7, ups=0.09, wpb=64848, bsz=128, num_updates=17049, lr=9.98716e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=196199
2021-06-21 01:08:55 | INFO | train_inner | epoch 006: 2141 / 3002 loss=2.345, ppl=5.08, wps=5918.8, ups=0.09, wpb=64906, bsz=128, num_updates=17050, lr=9.98716e-05, gnorm=2.004, loss_scale=4, train_wall=10, gb_free=2.8, wall=196210
2021-06-21 01:09:07 | INFO | train_inner | epoch 006: 2142 / 3002 loss=2.442, ppl=5.43, wps=5812.1, ups=0.09, wpb=64828, bsz=128, num_updates=17051, lr=9.98716e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=196221
2021-06-21 01:09:18 | INFO | train_inner | epoch 006: 2143 / 3002 loss=2.47, ppl=5.54, wps=5759.7, ups=0.09, wpb=64739, bsz=128, num_updates=17052, lr=9.98716e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=196232
2021-06-21 01:09:29 | INFO | train_inner | epoch 006: 2144 / 3002 loss=2.422, ppl=5.36, wps=5707.8, ups=0.09, wpb=64872, bsz=128, num_updates=17053, lr=9.98716e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=196243
2021-06-21 01:09:40 | INFO | train_inner | epoch 006: 2145 / 3002 loss=2.448, ppl=5.46, wps=5726.6, ups=0.09, wpb=64777, bsz=128, num_updates=17054, lr=9.98716e-05, gnorm=1.906, loss_scale=4, train_wall=11, gb_free=2.8, wall=196255
2021-06-21 01:09:52 | INFO | train_inner | epoch 006: 2146 / 3002 loss=2.682, ppl=6.42, wps=5863.2, ups=0.09, wpb=64948, bsz=128, num_updates=17055, lr=9.98715e-05, gnorm=1.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=196266
2021-06-21 01:10:03 | INFO | train_inner | epoch 006: 2147 / 3002 loss=2.341, ppl=5.07, wps=5780.9, ups=0.09, wpb=64790, bsz=128, num_updates=17056, lr=9.98715e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=196277
2021-06-21 01:10:14 | INFO | train_inner | epoch 006: 2148 / 3002 loss=2.549, ppl=5.85, wps=5783.7, ups=0.09, wpb=64827, bsz=128, num_updates=17057, lr=9.98715e-05, gnorm=2.096, loss_scale=4, train_wall=11, gb_free=2.8, wall=196288
2021-06-21 01:10:25 | INFO | train_inner | epoch 006: 2149 / 3002 loss=2.606, ppl=6.09, wps=5870.1, ups=0.09, wpb=64807, bsz=128, num_updates=17058, lr=9.98715e-05, gnorm=2.075, loss_scale=4, train_wall=11, gb_free=2.8, wall=196299
2021-06-21 01:10:36 | INFO | train_inner | epoch 006: 2150 / 3002 loss=2.593, ppl=6.03, wps=5913.8, ups=0.09, wpb=64823, bsz=128, num_updates=17059, lr=9.98715e-05, gnorm=1.998, loss_scale=4, train_wall=10, gb_free=2.8, wall=196310
2021-06-21 01:10:47 | INFO | train_inner | epoch 006: 2151 / 3002 loss=2.675, ppl=6.39, wps=5713, ups=0.09, wpb=64785, bsz=128, num_updates=17060, lr=9.98715e-05, gnorm=1.974, loss_scale=4, train_wall=11, gb_free=2.8, wall=196322
2021-06-21 01:10:58 | INFO | train_inner | epoch 006: 2152 / 3002 loss=2.494, ppl=5.63, wps=5806, ups=0.09, wpb=64767, bsz=128, num_updates=17061, lr=9.98715e-05, gnorm=2, loss_scale=4, train_wall=11, gb_free=2.8, wall=196333
2021-06-21 01:11:10 | INFO | train_inner | epoch 006: 2153 / 3002 loss=2.533, ppl=5.79, wps=5715.2, ups=0.09, wpb=64874, bsz=128, num_updates=17062, lr=9.98715e-05, gnorm=1.937, loss_scale=4, train_wall=11, gb_free=2.8, wall=196344
2021-06-21 01:11:21 | INFO | train_inner | epoch 006: 2154 / 3002 loss=2.334, ppl=5.04, wps=5867.8, ups=0.09, wpb=64860, bsz=128, num_updates=17063, lr=9.98715e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=196355
2021-06-21 01:11:32 | INFO | train_inner | epoch 006: 2155 / 3002 loss=2.412, ppl=5.32, wps=5778.4, ups=0.09, wpb=64848, bsz=128, num_updates=17064, lr=9.98715e-05, gnorm=1.967, loss_scale=4, train_wall=11, gb_free=2.8, wall=196366
2021-06-21 01:11:43 | INFO | train_inner | epoch 006: 2156 / 3002 loss=2.348, ppl=5.09, wps=5877.6, ups=0.09, wpb=64831, bsz=128, num_updates=17065, lr=9.98715e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=196377
2021-06-21 01:11:54 | INFO | train_inner | epoch 006: 2157 / 3002 loss=2.488, ppl=5.61, wps=5886.5, ups=0.09, wpb=64905, bsz=128, num_updates=17066, lr=9.98715e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=196388
2021-06-21 01:12:05 | INFO | train_inner | epoch 006: 2158 / 3002 loss=2.549, ppl=5.85, wps=5833, ups=0.09, wpb=64795, bsz=128, num_updates=17067, lr=9.98715e-05, gnorm=1.939, loss_scale=4, train_wall=11, gb_free=2.8, wall=196400
2021-06-21 01:12:16 | INFO | train_inner | epoch 006: 2159 / 3002 loss=2.499, ppl=5.65, wps=5937, ups=0.09, wpb=64865, bsz=128, num_updates=17068, lr=9.98714e-05, gnorm=2.001, loss_scale=4, train_wall=10, gb_free=2.8, wall=196410
2021-06-21 01:12:27 | INFO | train_inner | epoch 006: 2160 / 3002 loss=2.594, ppl=6.04, wps=5787.2, ups=0.09, wpb=64762, bsz=128, num_updates=17069, lr=9.98714e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=196422
2021-06-21 01:12:38 | INFO | train_inner | epoch 006: 2161 / 3002 loss=2.559, ppl=5.89, wps=5864.2, ups=0.09, wpb=64856, bsz=128, num_updates=17070, lr=9.98714e-05, gnorm=1.928, loss_scale=4, train_wall=11, gb_free=2.8, wall=196433
2021-06-21 01:12:50 | INFO | train_inner | epoch 006: 2162 / 3002 loss=2.506, ppl=5.68, wps=5823.9, ups=0.09, wpb=64851, bsz=128, num_updates=17071, lr=9.98714e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=196444
2021-06-21 01:13:01 | INFO | train_inner | epoch 006: 2163 / 3002 loss=2.404, ppl=5.29, wps=5860.5, ups=0.09, wpb=64821, bsz=128, num_updates=17072, lr=9.98714e-05, gnorm=1.948, loss_scale=4, train_wall=11, gb_free=2.8, wall=196455
2021-06-21 01:13:12 | INFO | train_inner | epoch 006: 2164 / 3002 loss=2.451, ppl=5.47, wps=5825.2, ups=0.09, wpb=64840, bsz=128, num_updates=17073, lr=9.98714e-05, gnorm=2.098, loss_scale=4, train_wall=11, gb_free=2.8, wall=196466
2021-06-21 01:13:23 | INFO | train_inner | epoch 006: 2165 / 3002 loss=2.484, ppl=5.59, wps=5896.8, ups=0.09, wpb=64807, bsz=128, num_updates=17074, lr=9.98714e-05, gnorm=1.9, loss_scale=4, train_wall=11, gb_free=2.8, wall=196477
2021-06-21 01:13:34 | INFO | train_inner | epoch 006: 2166 / 3002 loss=2.352, ppl=5.11, wps=5872.8, ups=0.09, wpb=64763, bsz=128, num_updates=17075, lr=9.98714e-05, gnorm=1.909, loss_scale=4, train_wall=11, gb_free=2.8, wall=196488
2021-06-21 01:13:45 | INFO | train_inner | epoch 006: 2167 / 3002 loss=2.503, ppl=5.67, wps=5848.4, ups=0.09, wpb=64804, bsz=128, num_updates=17076, lr=9.98714e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=196499
2021-06-21 01:13:56 | INFO | train_inner | epoch 006: 2168 / 3002 loss=2.38, ppl=5.2, wps=5865.6, ups=0.09, wpb=64774, bsz=128, num_updates=17077, lr=9.98714e-05, gnorm=2.045, loss_scale=4, train_wall=11, gb_free=2.8, wall=196510
2021-06-21 01:14:07 | INFO | train_inner | epoch 006: 2169 / 3002 loss=2.482, ppl=5.59, wps=5769, ups=0.09, wpb=64830, bsz=128, num_updates=17078, lr=9.98714e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=196521
2021-06-21 01:14:18 | INFO | train_inner | epoch 006: 2170 / 3002 loss=2.456, ppl=5.49, wps=5858.5, ups=0.09, wpb=64805, bsz=128, num_updates=17079, lr=9.98714e-05, gnorm=2.236, loss_scale=4, train_wall=11, gb_free=2.8, wall=196533
2021-06-21 01:14:29 | INFO | train_inner | epoch 006: 2171 / 3002 loss=2.443, ppl=5.44, wps=5849.3, ups=0.09, wpb=64752, bsz=128, num_updates=17080, lr=9.98713e-05, gnorm=2.038, loss_scale=4, train_wall=11, gb_free=2.8, wall=196544
2021-06-21 01:14:40 | INFO | train_inner | epoch 006: 2172 / 3002 loss=2.453, ppl=5.47, wps=5874.6, ups=0.09, wpb=64777, bsz=128, num_updates=17081, lr=9.98713e-05, gnorm=2.003, loss_scale=4, train_wall=11, gb_free=2.8, wall=196555
2021-06-21 01:14:51 | INFO | train_inner | epoch 006: 2173 / 3002 loss=2.37, ppl=5.17, wps=5928.7, ups=0.09, wpb=64802, bsz=128, num_updates=17082, lr=9.98713e-05, gnorm=1.933, loss_scale=4, train_wall=10, gb_free=2.8, wall=196566
2021-06-21 01:15:03 | INFO | train_inner | epoch 006: 2174 / 3002 loss=2.477, ppl=5.57, wps=5731.4, ups=0.09, wpb=64783, bsz=128, num_updates=17083, lr=9.98713e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=196577
2021-06-21 01:15:13 | INFO | train_inner | epoch 006: 2175 / 3002 loss=2.609, ppl=6.1, wps=5906.9, ups=0.09, wpb=64860, bsz=128, num_updates=17084, lr=9.98713e-05, gnorm=1.937, loss_scale=4, train_wall=10, gb_free=2.8, wall=196588
2021-06-21 01:15:24 | INFO | train_inner | epoch 006: 2176 / 3002 loss=2.654, ppl=6.29, wps=5915.6, ups=0.09, wpb=64880, bsz=128, num_updates=17085, lr=9.98713e-05, gnorm=2.049, loss_scale=4, train_wall=10, gb_free=2.8, wall=196599
2021-06-21 01:15:36 | INFO | train_inner | epoch 006: 2177 / 3002 loss=2.403, ppl=5.29, wps=5825.6, ups=0.09, wpb=64815, bsz=128, num_updates=17086, lr=9.98713e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=196610
2021-06-21 01:15:47 | INFO | train_inner | epoch 006: 2178 / 3002 loss=2.623, ppl=6.16, wps=5788.2, ups=0.09, wpb=64849, bsz=128, num_updates=17087, lr=9.98713e-05, gnorm=2.022, loss_scale=4, train_wall=11, gb_free=2.8, wall=196621
2021-06-21 01:15:58 | INFO | train_inner | epoch 006: 2179 / 3002 loss=2.456, ppl=5.49, wps=5925.3, ups=0.09, wpb=64927, bsz=128, num_updates=17088, lr=9.98713e-05, gnorm=1.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=196632
2021-06-21 01:16:09 | INFO | train_inner | epoch 006: 2180 / 3002 loss=2.311, ppl=4.96, wps=5822.5, ups=0.09, wpb=64837, bsz=128, num_updates=17089, lr=9.98713e-05, gnorm=4.337, loss_scale=4, train_wall=11, gb_free=2.8, wall=196643
2021-06-21 01:16:20 | INFO | train_inner | epoch 006: 2181 / 3002 loss=2.679, ppl=6.4, wps=5863.4, ups=0.09, wpb=64762, bsz=128, num_updates=17090, lr=9.98713e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=196654
2021-06-21 01:16:31 | INFO | train_inner | epoch 006: 2182 / 3002 loss=2.579, ppl=5.97, wps=5775.6, ups=0.09, wpb=64758, bsz=128, num_updates=17091, lr=9.98713e-05, gnorm=1.93, loss_scale=4, train_wall=11, gb_free=2.8, wall=196665
2021-06-21 01:16:42 | INFO | train_inner | epoch 006: 2183 / 3002 loss=2.42, ppl=5.35, wps=5879.1, ups=0.09, wpb=64782, bsz=128, num_updates=17092, lr=9.98713e-05, gnorm=2.019, loss_scale=4, train_wall=11, gb_free=2.8, wall=196676
2021-06-21 01:16:53 | INFO | train_inner | epoch 006: 2184 / 3002 loss=2.335, ppl=5.05, wps=5822.9, ups=0.09, wpb=64806, bsz=128, num_updates=17093, lr=9.98712e-05, gnorm=1.879, loss_scale=4, train_wall=11, gb_free=2.8, wall=196688
2021-06-21 01:17:04 | INFO | train_inner | epoch 006: 2185 / 3002 loss=2.498, ppl=5.65, wps=5918, ups=0.09, wpb=64909, bsz=128, num_updates=17094, lr=9.98712e-05, gnorm=2.008, loss_scale=4, train_wall=11, gb_free=2.8, wall=196699
2021-06-21 01:17:15 | INFO | train_inner | epoch 006: 2186 / 3002 loss=2.49, ppl=5.62, wps=5773.2, ups=0.09, wpb=64798, bsz=128, num_updates=17095, lr=9.98712e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=196710
2021-06-21 01:17:27 | INFO | train_inner | epoch 006: 2187 / 3002 loss=2.486, ppl=5.6, wps=5790.9, ups=0.09, wpb=64876, bsz=128, num_updates=17096, lr=9.98712e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=196721
2021-06-21 01:17:38 | INFO | train_inner | epoch 006: 2188 / 3002 loss=2.662, ppl=6.33, wps=5843.1, ups=0.09, wpb=64821, bsz=128, num_updates=17097, lr=9.98712e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=196732
2021-06-21 01:17:49 | INFO | train_inner | epoch 006: 2189 / 3002 loss=2.525, ppl=5.76, wps=5775.3, ups=0.09, wpb=64880, bsz=128, num_updates=17098, lr=9.98712e-05, gnorm=2.31, loss_scale=4, train_wall=11, gb_free=2.8, wall=196743
2021-06-21 01:18:00 | INFO | train_inner | epoch 006: 2190 / 3002 loss=2.406, ppl=5.3, wps=5844.6, ups=0.09, wpb=64900, bsz=128, num_updates=17099, lr=9.98712e-05, gnorm=2.522, loss_scale=4, train_wall=11, gb_free=2.8, wall=196754
2021-06-21 01:18:11 | INFO | train_inner | epoch 006: 2191 / 3002 loss=2.453, ppl=5.47, wps=5921.9, ups=0.09, wpb=64819, bsz=128, num_updates=17100, lr=9.98712e-05, gnorm=5.847, loss_scale=4, train_wall=10, gb_free=2.8, wall=196765
2021-06-21 01:18:22 | INFO | train_inner | epoch 006: 2192 / 3002 loss=2.474, ppl=5.55, wps=5880, ups=0.09, wpb=64842, bsz=128, num_updates=17101, lr=9.98712e-05, gnorm=2.016, loss_scale=4, train_wall=11, gb_free=2.8, wall=196776
2021-06-21 01:18:33 | INFO | train_inner | epoch 006: 2193 / 3002 loss=2.566, ppl=5.92, wps=5786.2, ups=0.09, wpb=64793, bsz=128, num_updates=17102, lr=9.98712e-05, gnorm=2.162, loss_scale=4, train_wall=11, gb_free=2.8, wall=196788
2021-06-21 01:18:44 | INFO | train_inner | epoch 006: 2194 / 3002 loss=2.363, ppl=5.15, wps=5922.4, ups=0.09, wpb=64904, bsz=128, num_updates=17103, lr=9.98712e-05, gnorm=3.027, loss_scale=4, train_wall=11, gb_free=2.8, wall=196799
2021-06-21 01:18:55 | INFO | train_inner | epoch 006: 2195 / 3002 loss=2.536, ppl=5.8, wps=5798.7, ups=0.09, wpb=64822, bsz=128, num_updates=17104, lr=9.98712e-05, gnorm=3.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=196810
2021-06-21 01:19:07 | INFO | train_inner | epoch 006: 2196 / 3002 loss=2.488, ppl=5.61, wps=5822.8, ups=0.09, wpb=64874, bsz=128, num_updates=17105, lr=9.98711e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=196821
2021-06-21 01:19:17 | INFO | train_inner | epoch 006: 2197 / 3002 loss=2.475, ppl=5.56, wps=5941.8, ups=0.09, wpb=64801, bsz=128, num_updates=17106, lr=9.98711e-05, gnorm=35.871, loss_scale=4, train_wall=10, gb_free=2.8, wall=196832
2021-06-21 01:19:29 | INFO | train_inner | epoch 006: 2198 / 3002 loss=2.372, ppl=5.18, wps=5787.8, ups=0.09, wpb=64929, bsz=128, num_updates=17107, lr=9.98711e-05, gnorm=2.124, loss_scale=4, train_wall=11, gb_free=2.8, wall=196843
2021-06-21 01:19:40 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-21 01:19:51 | INFO | train_inner | epoch 006: 2200 / 3002 loss=2.429, ppl=5.38, wps=2895.4, ups=0.04, wpb=64831, bsz=128, num_updates=17108, lr=9.98711e-05, gnorm=1.952, loss_scale=2, train_wall=21, gb_free=2.8, wall=196865
2021-06-21 01:20:02 | INFO | train_inner | epoch 006: 2201 / 3002 loss=2.453, ppl=5.48, wps=5885.9, ups=0.09, wpb=64818, bsz=128, num_updates=17109, lr=9.98711e-05, gnorm=2.008, loss_scale=2, train_wall=11, gb_free=2.8, wall=196876
2021-06-21 01:20:13 | INFO | train_inner | epoch 006: 2202 / 3002 loss=2.38, ppl=5.21, wps=5827.1, ups=0.09, wpb=64830, bsz=128, num_updates=17110, lr=9.98711e-05, gnorm=1.927, loss_scale=2, train_wall=11, gb_free=2.8, wall=196888
2021-06-21 01:20:24 | INFO | train_inner | epoch 006: 2203 / 3002 loss=2.554, ppl=5.87, wps=5889, ups=0.09, wpb=64914, bsz=128, num_updates=17111, lr=9.98711e-05, gnorm=2.412, loss_scale=2, train_wall=11, gb_free=2.8, wall=196899
2021-06-21 01:20:35 | INFO | train_inner | epoch 006: 2204 / 3002 loss=2.405, ppl=5.29, wps=5959.8, ups=0.09, wpb=64872, bsz=128, num_updates=17112, lr=9.98711e-05, gnorm=1.957, loss_scale=2, train_wall=10, gb_free=2.8, wall=196909
2021-06-21 01:20:46 | INFO | train_inner | epoch 006: 2205 / 3002 loss=2.394, ppl=5.26, wps=5961.5, ups=0.09, wpb=64823, bsz=128, num_updates=17113, lr=9.98711e-05, gnorm=11.636, loss_scale=2, train_wall=10, gb_free=2.8, wall=196920
2021-06-21 01:20:57 | INFO | train_inner | epoch 006: 2206 / 3002 loss=2.629, ppl=6.19, wps=5724, ups=0.09, wpb=64835, bsz=128, num_updates=17114, lr=9.98711e-05, gnorm=2.243, loss_scale=2, train_wall=11, gb_free=2.8, wall=196932
2021-06-21 01:21:08 | INFO | train_inner | epoch 006: 2207 / 3002 loss=2.631, ppl=6.2, wps=5851.5, ups=0.09, wpb=64805, bsz=128, num_updates=17115, lr=9.98711e-05, gnorm=2.36, loss_scale=2, train_wall=11, gb_free=2.8, wall=196943
2021-06-21 01:21:19 | INFO | train_inner | epoch 006: 2208 / 3002 loss=2.341, ppl=5.06, wps=5886.4, ups=0.09, wpb=64870, bsz=128, num_updates=17116, lr=9.98711e-05, gnorm=2.054, loss_scale=2, train_wall=11, gb_free=2.8, wall=196954
2021-06-21 01:21:30 | INFO | train_inner | epoch 006: 2209 / 3002 loss=2.633, ppl=6.2, wps=5967.8, ups=0.09, wpb=64814, bsz=128, num_updates=17117, lr=9.98711e-05, gnorm=2.586, loss_scale=2, train_wall=10, gb_free=2.8, wall=196965
2021-06-21 01:21:41 | INFO | train_inner | epoch 006: 2210 / 3002 loss=2.591, ppl=6.02, wps=5915.4, ups=0.09, wpb=64773, bsz=128, num_updates=17118, lr=9.9871e-05, gnorm=2.13, loss_scale=2, train_wall=11, gb_free=2.8, wall=196976
2021-06-21 01:21:52 | INFO | train_inner | epoch 006: 2211 / 3002 loss=2.471, ppl=5.54, wps=5840.4, ups=0.09, wpb=64823, bsz=128, num_updates=17119, lr=9.9871e-05, gnorm=2.196, loss_scale=2, train_wall=11, gb_free=2.8, wall=196987
2021-06-21 01:22:03 | INFO | train_inner | epoch 006: 2212 / 3002 loss=2.51, ppl=5.7, wps=5926.5, ups=0.09, wpb=64853, bsz=128, num_updates=17120, lr=9.9871e-05, gnorm=2.266, loss_scale=2, train_wall=10, gb_free=2.8, wall=196998
2021-06-21 01:22:14 | INFO | train_inner | epoch 006: 2213 / 3002 loss=2.462, ppl=5.51, wps=5929.5, ups=0.09, wpb=64825, bsz=128, num_updates=17121, lr=9.9871e-05, gnorm=2.258, loss_scale=2, train_wall=10, gb_free=2.8, wall=197009
2021-06-21 01:22:25 | INFO | train_inner | epoch 006: 2214 / 3002 loss=2.507, ppl=5.69, wps=5778, ups=0.09, wpb=64774, bsz=128, num_updates=17122, lr=9.9871e-05, gnorm=2.378, loss_scale=2, train_wall=11, gb_free=2.8, wall=197020
2021-06-21 01:22:37 | INFO | train_inner | epoch 006: 2215 / 3002 loss=2.609, ppl=6.1, wps=5844.3, ups=0.09, wpb=64845, bsz=128, num_updates=17123, lr=9.9871e-05, gnorm=2.379, loss_scale=2, train_wall=11, gb_free=2.8, wall=197031
2021-06-21 01:22:48 | INFO | train_inner | epoch 006: 2216 / 3002 loss=2.317, ppl=4.98, wps=5858.7, ups=0.09, wpb=64919, bsz=128, num_updates=17124, lr=9.9871e-05, gnorm=2.272, loss_scale=2, train_wall=11, gb_free=2.8, wall=197042
2021-06-21 01:22:59 | INFO | train_inner | epoch 006: 2217 / 3002 loss=2.491, ppl=5.62, wps=5865.4, ups=0.09, wpb=64858, bsz=128, num_updates=17125, lr=9.9871e-05, gnorm=2.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=197053
2021-06-21 01:23:10 | INFO | train_inner | epoch 006: 2218 / 3002 loss=2.564, ppl=5.91, wps=5882.6, ups=0.09, wpb=64846, bsz=128, num_updates=17126, lr=9.9871e-05, gnorm=2.049, loss_scale=2, train_wall=11, gb_free=2.8, wall=197064
2021-06-21 01:23:21 | INFO | train_inner | epoch 006: 2219 / 3002 loss=2.669, ppl=6.36, wps=5762.6, ups=0.09, wpb=64830, bsz=128, num_updates=17127, lr=9.9871e-05, gnorm=2.492, loss_scale=2, train_wall=11, gb_free=2.8, wall=197075
2021-06-21 01:23:32 | INFO | train_inner | epoch 006: 2220 / 3002 loss=2.528, ppl=5.77, wps=5922.8, ups=0.09, wpb=64809, bsz=128, num_updates=17128, lr=9.9871e-05, gnorm=2.097, loss_scale=2, train_wall=10, gb_free=2.8, wall=197086
2021-06-21 01:23:43 | INFO | train_inner | epoch 006: 2221 / 3002 loss=2.509, ppl=5.69, wps=5856.6, ups=0.09, wpb=64818, bsz=128, num_updates=17129, lr=9.9871e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=197097
2021-06-21 01:23:54 | INFO | train_inner | epoch 006: 2222 / 3002 loss=2.602, ppl=6.07, wps=5800.4, ups=0.09, wpb=64801, bsz=128, num_updates=17130, lr=9.98709e-05, gnorm=6.487, loss_scale=2, train_wall=11, gb_free=2.8, wall=197108
2021-06-21 01:24:05 | INFO | train_inner | epoch 006: 2223 / 3002 loss=2.521, ppl=5.74, wps=5847.4, ups=0.09, wpb=64813, bsz=128, num_updates=17131, lr=9.98709e-05, gnorm=2.098, loss_scale=2, train_wall=11, gb_free=2.8, wall=197120
2021-06-21 01:24:16 | INFO | train_inner | epoch 006: 2224 / 3002 loss=2.388, ppl=5.23, wps=5884.9, ups=0.09, wpb=64864, bsz=128, num_updates=17132, lr=9.98709e-05, gnorm=1.957, loss_scale=2, train_wall=11, gb_free=2.8, wall=197131
2021-06-21 01:24:27 | INFO | train_inner | epoch 006: 2225 / 3002 loss=2.574, ppl=5.95, wps=5956.4, ups=0.09, wpb=64709, bsz=128, num_updates=17133, lr=9.98709e-05, gnorm=1.978, loss_scale=2, train_wall=10, gb_free=2.8, wall=197141
2021-06-21 01:24:38 | INFO | train_inner | epoch 006: 2226 / 3002 loss=2.446, ppl=5.45, wps=5836.2, ups=0.09, wpb=64859, bsz=128, num_updates=17134, lr=9.98709e-05, gnorm=2.061, loss_scale=2, train_wall=11, gb_free=2.8, wall=197153
2021-06-21 01:24:49 | INFO | train_inner | epoch 006: 2227 / 3002 loss=2.476, ppl=5.57, wps=5812, ups=0.09, wpb=64839, bsz=128, num_updates=17135, lr=9.98709e-05, gnorm=2.05, loss_scale=2, train_wall=11, gb_free=2.8, wall=197164
2021-06-21 01:25:00 | INFO | train_inner | epoch 006: 2228 / 3002 loss=2.328, ppl=5.02, wps=5827.4, ups=0.09, wpb=64841, bsz=128, num_updates=17136, lr=9.98709e-05, gnorm=1.951, loss_scale=2, train_wall=11, gb_free=2.8, wall=197175
2021-06-21 01:25:11 | INFO | train_inner | epoch 006: 2229 / 3002 loss=2.547, ppl=5.84, wps=5918.4, ups=0.09, wpb=64807, bsz=128, num_updates=17137, lr=9.98709e-05, gnorm=2.31, loss_scale=2, train_wall=10, gb_free=2.8, wall=197186
2021-06-21 01:25:22 | INFO | train_inner | epoch 006: 2230 / 3002 loss=2.367, ppl=5.16, wps=5877.8, ups=0.09, wpb=64778, bsz=128, num_updates=17138, lr=9.98709e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=197197
2021-06-21 01:25:34 | INFO | train_inner | epoch 006: 2231 / 3002 loss=2.4, ppl=5.28, wps=5833.9, ups=0.09, wpb=64788, bsz=128, num_updates=17139, lr=9.98709e-05, gnorm=2.087, loss_scale=2, train_wall=11, gb_free=2.8, wall=197208
2021-06-21 01:25:45 | INFO | train_inner | epoch 006: 2232 / 3002 loss=2.538, ppl=5.81, wps=5835.2, ups=0.09, wpb=64729, bsz=128, num_updates=17140, lr=9.98709e-05, gnorm=2.409, loss_scale=2, train_wall=11, gb_free=2.8, wall=197219
2021-06-21 01:25:56 | INFO | train_inner | epoch 006: 2233 / 3002 loss=2.428, ppl=5.38, wps=5771.3, ups=0.09, wpb=64856, bsz=128, num_updates=17141, lr=9.98709e-05, gnorm=1.958, loss_scale=2, train_wall=11, gb_free=2.8, wall=197230
2021-06-21 01:26:07 | INFO | train_inner | epoch 006: 2234 / 3002 loss=2.512, ppl=5.7, wps=5905.1, ups=0.09, wpb=64866, bsz=128, num_updates=17142, lr=9.98709e-05, gnorm=2.181, loss_scale=2, train_wall=11, gb_free=2.8, wall=197241
2021-06-21 01:26:18 | INFO | train_inner | epoch 006: 2235 / 3002 loss=2.463, ppl=5.51, wps=5816.7, ups=0.09, wpb=64844, bsz=128, num_updates=17143, lr=9.98708e-05, gnorm=1.974, loss_scale=2, train_wall=11, gb_free=2.8, wall=197252
2021-06-21 01:26:29 | INFO | train_inner | epoch 006: 2236 / 3002 loss=2.53, ppl=5.77, wps=5843.1, ups=0.09, wpb=64909, bsz=128, num_updates=17144, lr=9.98708e-05, gnorm=35.888, loss_scale=2, train_wall=11, gb_free=2.8, wall=197263
2021-06-21 01:26:40 | INFO | train_inner | epoch 006: 2237 / 3002 loss=2.458, ppl=5.49, wps=5822.6, ups=0.09, wpb=64927, bsz=128, num_updates=17145, lr=9.98708e-05, gnorm=2.193, loss_scale=2, train_wall=11, gb_free=2.8, wall=197275
2021-06-21 01:26:51 | INFO | train_inner | epoch 006: 2238 / 3002 loss=2.379, ppl=5.2, wps=5870.4, ups=0.09, wpb=64796, bsz=128, num_updates=17146, lr=9.98708e-05, gnorm=4.402, loss_scale=2, train_wall=11, gb_free=2.8, wall=197286
2021-06-21 01:27:02 | INFO | train_inner | epoch 006: 2239 / 3002 loss=2.494, ppl=5.63, wps=5808.5, ups=0.09, wpb=64836, bsz=128, num_updates=17147, lr=9.98708e-05, gnorm=2.901, loss_scale=2, train_wall=11, gb_free=2.8, wall=197297
2021-06-21 01:27:13 | INFO | train_inner | epoch 006: 2240 / 3002 loss=2.499, ppl=5.65, wps=5923.6, ups=0.09, wpb=64819, bsz=128, num_updates=17148, lr=9.98708e-05, gnorm=5.692, loss_scale=2, train_wall=10, gb_free=2.8, wall=197308
2021-06-21 01:27:25 | INFO | train_inner | epoch 006: 2241 / 3002 loss=2.36, ppl=5.14, wps=5847.1, ups=0.09, wpb=64842, bsz=128, num_updates=17149, lr=9.98708e-05, gnorm=3.733, loss_scale=2, train_wall=11, gb_free=2.8, wall=197319
2021-06-21 01:27:36 | INFO | train_inner | epoch 006: 2242 / 3002 loss=2.447, ppl=5.45, wps=5814.4, ups=0.09, wpb=64729, bsz=128, num_updates=17150, lr=9.98708e-05, gnorm=4.58, loss_scale=2, train_wall=11, gb_free=2.8, wall=197330
2021-06-21 01:27:47 | INFO | train_inner | epoch 006: 2243 / 3002 loss=2.48, ppl=5.58, wps=5879.2, ups=0.09, wpb=64821, bsz=128, num_updates=17151, lr=9.98708e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=197341
2021-06-21 01:27:58 | INFO | train_inner | epoch 006: 2244 / 3002 loss=2.511, ppl=5.7, wps=5858.7, ups=0.09, wpb=64848, bsz=128, num_updates=17152, lr=9.98708e-05, gnorm=2.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=197352
2021-06-21 01:28:09 | INFO | train_inner | epoch 006: 2245 / 3002 loss=2.564, ppl=5.91, wps=5764.6, ups=0.09, wpb=64840, bsz=128, num_updates=17153, lr=9.98708e-05, gnorm=2.068, loss_scale=2, train_wall=11, gb_free=2.8, wall=197363
2021-06-21 01:28:20 | INFO | train_inner | epoch 006: 2246 / 3002 loss=2.533, ppl=5.79, wps=5963.3, ups=0.09, wpb=64798, bsz=128, num_updates=17154, lr=9.98708e-05, gnorm=2.025, loss_scale=2, train_wall=10, gb_free=2.8, wall=197374
2021-06-21 01:28:31 | INFO | train_inner | epoch 006: 2247 / 3002 loss=2.396, ppl=5.26, wps=5912.6, ups=0.09, wpb=64820, bsz=128, num_updates=17155, lr=9.98707e-05, gnorm=2.862, loss_scale=2, train_wall=11, gb_free=2.8, wall=197385
2021-06-21 01:28:42 | INFO | train_inner | epoch 006: 2248 / 3002 loss=2.602, ppl=6.07, wps=5879.5, ups=0.09, wpb=64787, bsz=128, num_updates=17156, lr=9.98707e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=197396
2021-06-21 01:28:53 | INFO | train_inner | epoch 006: 2249 / 3002 loss=2.612, ppl=6.11, wps=5887.2, ups=0.09, wpb=64744, bsz=128, num_updates=17157, lr=9.98707e-05, gnorm=2.014, loss_scale=2, train_wall=11, gb_free=2.8, wall=197407
2021-06-21 01:29:04 | INFO | train_inner | epoch 006: 2250 / 3002 loss=2.489, ppl=5.61, wps=5908.1, ups=0.09, wpb=64947, bsz=128, num_updates=17158, lr=9.98707e-05, gnorm=2.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=197418
2021-06-21 01:29:15 | INFO | train_inner | epoch 006: 2251 / 3002 loss=2.337, ppl=5.05, wps=5901.5, ups=0.09, wpb=64835, bsz=128, num_updates=17159, lr=9.98707e-05, gnorm=2.143, loss_scale=2, train_wall=11, gb_free=2.8, wall=197429
2021-06-21 01:29:26 | INFO | train_inner | epoch 006: 2252 / 3002 loss=2.457, ppl=5.49, wps=5867.4, ups=0.09, wpb=64801, bsz=128, num_updates=17160, lr=9.98707e-05, gnorm=2.16, loss_scale=2, train_wall=11, gb_free=2.8, wall=197440
2021-06-21 01:29:37 | INFO | train_inner | epoch 006: 2253 / 3002 loss=2.497, ppl=5.65, wps=5915.4, ups=0.09, wpb=64833, bsz=128, num_updates=17161, lr=9.98707e-05, gnorm=1.995, loss_scale=2, train_wall=11, gb_free=2.8, wall=197451
2021-06-21 01:29:48 | INFO | train_inner | epoch 006: 2254 / 3002 loss=2.389, ppl=5.24, wps=5848.5, ups=0.09, wpb=64775, bsz=128, num_updates=17162, lr=9.98707e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=197462
2021-06-21 01:29:59 | INFO | train_inner | epoch 006: 2255 / 3002 loss=2.456, ppl=5.49, wps=5841.4, ups=0.09, wpb=64830, bsz=128, num_updates=17163, lr=9.98707e-05, gnorm=2.417, loss_scale=2, train_wall=11, gb_free=2.8, wall=197473
2021-06-21 01:30:10 | INFO | train_inner | epoch 006: 2256 / 3002 loss=2.504, ppl=5.67, wps=5940.9, ups=0.09, wpb=64748, bsz=128, num_updates=17164, lr=9.98707e-05, gnorm=1.918, loss_scale=2, train_wall=10, gb_free=2.8, wall=197484
2021-06-21 01:30:21 | INFO | train_inner | epoch 006: 2257 / 3002 loss=2.465, ppl=5.52, wps=5712, ups=0.09, wpb=64835, bsz=128, num_updates=17165, lr=9.98707e-05, gnorm=2.223, loss_scale=2, train_wall=11, gb_free=2.8, wall=197496
2021-06-21 01:30:32 | INFO | train_inner | epoch 006: 2258 / 3002 loss=2.395, ppl=5.26, wps=5774.1, ups=0.09, wpb=64850, bsz=128, num_updates=17166, lr=9.98707e-05, gnorm=1.902, loss_scale=2, train_wall=11, gb_free=2.8, wall=197507
2021-06-21 01:30:44 | INFO | train_inner | epoch 006: 2259 / 3002 loss=2.418, ppl=5.34, wps=5857.4, ups=0.09, wpb=64901, bsz=128, num_updates=17167, lr=9.98707e-05, gnorm=2.945, loss_scale=2, train_wall=11, gb_free=2.8, wall=197518
2021-06-21 01:30:54 | INFO | train_inner | epoch 006: 2260 / 3002 loss=2.671, ppl=6.37, wps=5983.7, ups=0.09, wpb=64787, bsz=128, num_updates=17168, lr=9.98706e-05, gnorm=2.101, loss_scale=2, train_wall=10, gb_free=2.8, wall=197529
2021-06-21 01:31:06 | INFO | train_inner | epoch 006: 2261 / 3002 loss=2.471, ppl=5.54, wps=5794.4, ups=0.09, wpb=64764, bsz=128, num_updates=17169, lr=9.98706e-05, gnorm=2.112, loss_scale=2, train_wall=11, gb_free=2.8, wall=197540
2021-06-21 01:31:17 | INFO | train_inner | epoch 006: 2262 / 3002 loss=2.676, ppl=6.39, wps=5779.6, ups=0.09, wpb=64872, bsz=128, num_updates=17170, lr=9.98706e-05, gnorm=2.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=197551
2021-06-21 01:31:28 | INFO | train_inner | epoch 006: 2263 / 3002 loss=2.599, ppl=6.06, wps=5975, ups=0.09, wpb=64898, bsz=128, num_updates=17171, lr=9.98706e-05, gnorm=2.014, loss_scale=2, train_wall=10, gb_free=2.8, wall=197562
2021-06-21 01:31:39 | INFO | train_inner | epoch 006: 2264 / 3002 loss=2.416, ppl=5.34, wps=5855.9, ups=0.09, wpb=64786, bsz=128, num_updates=17172, lr=9.98706e-05, gnorm=2.033, loss_scale=2, train_wall=11, gb_free=2.8, wall=197573
2021-06-21 01:31:50 | INFO | train_inner | epoch 006: 2265 / 3002 loss=2.567, ppl=5.92, wps=5884.4, ups=0.09, wpb=64817, bsz=128, num_updates=17173, lr=9.98706e-05, gnorm=2.005, loss_scale=2, train_wall=11, gb_free=2.8, wall=197584
2021-06-21 01:32:01 | INFO | train_inner | epoch 006: 2266 / 3002 loss=2.658, ppl=6.31, wps=5773.5, ups=0.09, wpb=64817, bsz=128, num_updates=17174, lr=9.98706e-05, gnorm=2.177, loss_scale=2, train_wall=11, gb_free=2.8, wall=197595
2021-06-21 01:32:12 | INFO | train_inner | epoch 006: 2267 / 3002 loss=2.363, ppl=5.14, wps=5835.1, ups=0.09, wpb=64804, bsz=128, num_updates=17175, lr=9.98706e-05, gnorm=1.975, loss_scale=2, train_wall=11, gb_free=2.8, wall=197606
2021-06-21 01:32:23 | INFO | train_inner | epoch 006: 2268 / 3002 loss=2.657, ppl=6.31, wps=5858.7, ups=0.09, wpb=64708, bsz=128, num_updates=17176, lr=9.98706e-05, gnorm=1.989, loss_scale=2, train_wall=11, gb_free=2.8, wall=197617
2021-06-21 01:32:34 | INFO | train_inner | epoch 006: 2269 / 3002 loss=2.453, ppl=5.47, wps=5813.4, ups=0.09, wpb=64809, bsz=128, num_updates=17177, lr=9.98706e-05, gnorm=2.011, loss_scale=2, train_wall=11, gb_free=2.8, wall=197629
2021-06-21 01:32:45 | INFO | train_inner | epoch 006: 2270 / 3002 loss=2.446, ppl=5.45, wps=5897.5, ups=0.09, wpb=64890, bsz=128, num_updates=17178, lr=9.98706e-05, gnorm=2.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=197640
2021-06-21 01:32:56 | INFO | train_inner | epoch 006: 2271 / 3002 loss=2.508, ppl=5.69, wps=5770.5, ups=0.09, wpb=64845, bsz=128, num_updates=17179, lr=9.98706e-05, gnorm=2.33, loss_scale=2, train_wall=11, gb_free=2.8, wall=197651
2021-06-21 01:33:08 | INFO | train_inner | epoch 006: 2272 / 3002 loss=2.479, ppl=5.58, wps=5888.3, ups=0.09, wpb=64911, bsz=128, num_updates=17180, lr=9.98705e-05, gnorm=1.873, loss_scale=2, train_wall=11, gb_free=2.8, wall=197662
2021-06-21 01:33:19 | INFO | train_inner | epoch 006: 2273 / 3002 loss=2.489, ppl=5.62, wps=5752, ups=0.09, wpb=64827, bsz=128, num_updates=17181, lr=9.98705e-05, gnorm=2.083, loss_scale=2, train_wall=11, gb_free=2.8, wall=197673
2021-06-21 01:33:30 | INFO | train_inner | epoch 006: 2274 / 3002 loss=2.425, ppl=5.37, wps=5862, ups=0.09, wpb=64857, bsz=128, num_updates=17182, lr=9.98705e-05, gnorm=2.126, loss_scale=2, train_wall=11, gb_free=2.8, wall=197684
2021-06-21 01:33:41 | INFO | train_inner | epoch 006: 2275 / 3002 loss=2.629, ppl=6.19, wps=5860, ups=0.09, wpb=64860, bsz=128, num_updates=17183, lr=9.98705e-05, gnorm=1.998, loss_scale=2, train_wall=11, gb_free=2.8, wall=197695
2021-06-21 01:33:52 | INFO | train_inner | epoch 006: 2276 / 3002 loss=2.416, ppl=5.34, wps=5872.4, ups=0.09, wpb=64850, bsz=128, num_updates=17184, lr=9.98705e-05, gnorm=1.95, loss_scale=2, train_wall=11, gb_free=2.8, wall=197706
2021-06-21 01:34:03 | INFO | train_inner | epoch 006: 2277 / 3002 loss=2.529, ppl=5.77, wps=5959.5, ups=0.09, wpb=64785, bsz=128, num_updates=17185, lr=9.98705e-05, gnorm=1.93, loss_scale=2, train_wall=10, gb_free=2.8, wall=197717
2021-06-21 01:34:14 | INFO | train_inner | epoch 006: 2278 / 3002 loss=2.431, ppl=5.39, wps=5766.5, ups=0.09, wpb=64823, bsz=128, num_updates=17186, lr=9.98705e-05, gnorm=2.27, loss_scale=2, train_wall=11, gb_free=2.8, wall=197728
2021-06-21 01:34:25 | INFO | train_inner | epoch 006: 2279 / 3002 loss=2.486, ppl=5.6, wps=5860.9, ups=0.09, wpb=64770, bsz=128, num_updates=17187, lr=9.98705e-05, gnorm=1.87, loss_scale=2, train_wall=11, gb_free=2.8, wall=197739
2021-06-21 01:34:36 | INFO | train_inner | epoch 006: 2280 / 3002 loss=2.427, ppl=5.38, wps=5879.5, ups=0.09, wpb=64853, bsz=128, num_updates=17188, lr=9.98705e-05, gnorm=1.986, loss_scale=2, train_wall=11, gb_free=2.8, wall=197751
2021-06-21 01:34:47 | INFO | train_inner | epoch 006: 2281 / 3002 loss=2.561, ppl=5.9, wps=5871.1, ups=0.09, wpb=64887, bsz=128, num_updates=17189, lr=9.98705e-05, gnorm=1.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=197762
2021-06-21 01:34:58 | INFO | train_inner | epoch 006: 2282 / 3002 loss=2.299, ppl=4.92, wps=5769.7, ups=0.09, wpb=64842, bsz=128, num_updates=17190, lr=9.98705e-05, gnorm=1.974, loss_scale=2, train_wall=11, gb_free=2.8, wall=197773
2021-06-21 01:35:10 | INFO | train_inner | epoch 006: 2283 / 3002 loss=2.39, ppl=5.24, wps=5875.5, ups=0.09, wpb=64939, bsz=128, num_updates=17191, lr=9.98705e-05, gnorm=1.964, loss_scale=2, train_wall=11, gb_free=2.8, wall=197784
2021-06-21 01:35:21 | INFO | train_inner | epoch 006: 2284 / 3002 loss=2.521, ppl=5.74, wps=5818.3, ups=0.09, wpb=64865, bsz=128, num_updates=17192, lr=9.98705e-05, gnorm=2.075, loss_scale=2, train_wall=11, gb_free=2.8, wall=197795
2021-06-21 01:35:31 | INFO | train_inner | epoch 006: 2285 / 3002 loss=2.485, ppl=5.6, wps=6016.1, ups=0.09, wpb=64856, bsz=128, num_updates=17193, lr=9.98704e-05, gnorm=2.687, loss_scale=2, train_wall=10, gb_free=2.8, wall=197806
2021-06-21 01:35:42 | INFO | train_inner | epoch 006: 2286 / 3002 loss=2.423, ppl=5.36, wps=5964.1, ups=0.09, wpb=64844, bsz=128, num_updates=17194, lr=9.98704e-05, gnorm=2.832, loss_scale=2, train_wall=10, gb_free=2.8, wall=197817
2021-06-21 01:35:54 | INFO | train_inner | epoch 006: 2287 / 3002 loss=2.443, ppl=5.44, wps=5755.7, ups=0.09, wpb=64818, bsz=128, num_updates=17195, lr=9.98704e-05, gnorm=1.867, loss_scale=2, train_wall=11, gb_free=2.8, wall=197828
2021-06-21 01:36:05 | INFO | train_inner | epoch 006: 2288 / 3002 loss=2.549, ppl=5.85, wps=5821.5, ups=0.09, wpb=64765, bsz=128, num_updates=17196, lr=9.98704e-05, gnorm=1.957, loss_scale=2, train_wall=11, gb_free=2.8, wall=197839
2021-06-21 01:36:16 | INFO | train_inner | epoch 006: 2289 / 3002 loss=2.546, ppl=5.84, wps=5991.5, ups=0.09, wpb=64871, bsz=128, num_updates=17197, lr=9.98704e-05, gnorm=1.948, loss_scale=2, train_wall=10, gb_free=2.8, wall=197850
2021-06-21 01:36:27 | INFO | train_inner | epoch 006: 2290 / 3002 loss=2.642, ppl=6.24, wps=5886.3, ups=0.09, wpb=64880, bsz=128, num_updates=17198, lr=9.98704e-05, gnorm=1.977, loss_scale=2, train_wall=11, gb_free=2.8, wall=197861
2021-06-21 01:36:38 | INFO | train_inner | epoch 006: 2291 / 3002 loss=2.531, ppl=5.78, wps=5841.8, ups=0.09, wpb=64829, bsz=128, num_updates=17199, lr=9.98704e-05, gnorm=2.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=197872
2021-06-21 01:36:49 | INFO | train_inner | epoch 006: 2292 / 3002 loss=2.594, ppl=6.04, wps=5847.2, ups=0.09, wpb=64783, bsz=128, num_updates=17200, lr=9.98704e-05, gnorm=2.164, loss_scale=2, train_wall=11, gb_free=2.8, wall=197883
2021-06-21 01:37:00 | INFO | train_inner | epoch 006: 2293 / 3002 loss=2.401, ppl=5.28, wps=5779.2, ups=0.09, wpb=64813, bsz=128, num_updates=17201, lr=9.98704e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=197894
2021-06-21 01:37:11 | INFO | train_inner | epoch 006: 2294 / 3002 loss=2.632, ppl=6.2, wps=5846.4, ups=0.09, wpb=64803, bsz=128, num_updates=17202, lr=9.98704e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=197905
2021-06-21 01:37:22 | INFO | train_inner | epoch 006: 2295 / 3002 loss=2.57, ppl=5.94, wps=5810.2, ups=0.09, wpb=64863, bsz=128, num_updates=17203, lr=9.98704e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=197917
2021-06-21 01:37:33 | INFO | train_inner | epoch 006: 2296 / 3002 loss=2.543, ppl=5.83, wps=5936.1, ups=0.09, wpb=64837, bsz=128, num_updates=17204, lr=9.98704e-05, gnorm=2.034, loss_scale=2, train_wall=10, gb_free=2.8, wall=197927
2021-06-21 01:37:44 | INFO | train_inner | epoch 006: 2297 / 3002 loss=2.497, ppl=5.64, wps=6025.7, ups=0.09, wpb=64814, bsz=128, num_updates=17205, lr=9.98703e-05, gnorm=3.519, loss_scale=2, train_wall=10, gb_free=2.8, wall=197938
2021-06-21 01:37:55 | INFO | train_inner | epoch 006: 2298 / 3002 loss=2.591, ppl=6.02, wps=5832.7, ups=0.09, wpb=64838, bsz=128, num_updates=17206, lr=9.98703e-05, gnorm=1.971, loss_scale=2, train_wall=11, gb_free=2.8, wall=197949
2021-06-21 01:38:06 | INFO | train_inner | epoch 006: 2299 / 3002 loss=2.29, ppl=4.89, wps=5906.7, ups=0.09, wpb=64900, bsz=128, num_updates=17207, lr=9.98703e-05, gnorm=1.881, loss_scale=2, train_wall=11, gb_free=2.8, wall=197960
2021-06-21 01:38:17 | INFO | train_inner | epoch 006: 2300 / 3002 loss=2.469, ppl=5.54, wps=5857.3, ups=0.09, wpb=64845, bsz=128, num_updates=17208, lr=9.98703e-05, gnorm=1.898, loss_scale=2, train_wall=11, gb_free=2.8, wall=197971
2021-06-21 01:38:28 | INFO | train_inner | epoch 006: 2301 / 3002 loss=2.444, ppl=5.44, wps=5800.9, ups=0.09, wpb=64910, bsz=128, num_updates=17209, lr=9.98703e-05, gnorm=2.023, loss_scale=2, train_wall=11, gb_free=2.8, wall=197983
2021-06-21 01:38:39 | INFO | train_inner | epoch 006: 2302 / 3002 loss=2.503, ppl=5.67, wps=5930.9, ups=0.09, wpb=64876, bsz=128, num_updates=17210, lr=9.98703e-05, gnorm=2.079, loss_scale=2, train_wall=10, gb_free=2.8, wall=197994
2021-06-21 01:38:50 | INFO | train_inner | epoch 006: 2303 / 3002 loss=2.337, ppl=5.05, wps=5895, ups=0.09, wpb=64797, bsz=128, num_updates=17211, lr=9.98703e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=198005
2021-06-21 01:39:01 | INFO | train_inner | epoch 006: 2304 / 3002 loss=2.43, ppl=5.39, wps=5841.1, ups=0.09, wpb=64862, bsz=128, num_updates=17212, lr=9.98703e-05, gnorm=1.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=198016
2021-06-21 01:39:12 | INFO | train_inner | epoch 006: 2305 / 3002 loss=2.509, ppl=5.69, wps=5820.5, ups=0.09, wpb=64810, bsz=128, num_updates=17213, lr=9.98703e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=198027
2021-06-21 01:39:23 | INFO | train_inner | epoch 006: 2306 / 3002 loss=2.565, ppl=5.92, wps=5893.2, ups=0.09, wpb=64764, bsz=128, num_updates=17214, lr=9.98703e-05, gnorm=1.959, loss_scale=2, train_wall=11, gb_free=2.8, wall=198038
2021-06-21 01:39:34 | INFO | train_inner | epoch 006: 2307 / 3002 loss=2.482, ppl=5.59, wps=5874.7, ups=0.09, wpb=64897, bsz=128, num_updates=17215, lr=9.98703e-05, gnorm=1.915, loss_scale=2, train_wall=11, gb_free=2.8, wall=198049
2021-06-21 01:39:46 | INFO | train_inner | epoch 006: 2308 / 3002 loss=2.468, ppl=5.53, wps=5749.3, ups=0.09, wpb=64797, bsz=128, num_updates=17216, lr=9.98703e-05, gnorm=1.884, loss_scale=2, train_wall=11, gb_free=2.8, wall=198060
2021-06-21 01:39:57 | INFO | train_inner | epoch 006: 2309 / 3002 loss=2.434, ppl=5.4, wps=5919.5, ups=0.09, wpb=64781, bsz=128, num_updates=17217, lr=9.98703e-05, gnorm=1.89, loss_scale=2, train_wall=10, gb_free=2.8, wall=198071
2021-06-21 01:40:08 | INFO | train_inner | epoch 006: 2310 / 3002 loss=2.477, ppl=5.57, wps=5841.3, ups=0.09, wpb=64873, bsz=128, num_updates=17218, lr=9.98702e-05, gnorm=1.942, loss_scale=2, train_wall=11, gb_free=2.8, wall=198082
2021-06-21 01:40:19 | INFO | train_inner | epoch 006: 2311 / 3002 loss=2.425, ppl=5.37, wps=5789.4, ups=0.09, wpb=64786, bsz=128, num_updates=17219, lr=9.98702e-05, gnorm=1.913, loss_scale=2, train_wall=11, gb_free=2.8, wall=198093
2021-06-21 01:40:30 | INFO | train_inner | epoch 006: 2312 / 3002 loss=2.444, ppl=5.44, wps=5863.4, ups=0.09, wpb=64862, bsz=128, num_updates=17220, lr=9.98702e-05, gnorm=2.045, loss_scale=2, train_wall=11, gb_free=2.8, wall=198104
2021-06-21 01:40:41 | INFO | train_inner | epoch 006: 2313 / 3002 loss=2.412, ppl=5.32, wps=6025.6, ups=0.09, wpb=64846, bsz=128, num_updates=17221, lr=9.98702e-05, gnorm=1.919, loss_scale=2, train_wall=10, gb_free=2.8, wall=198115
2021-06-21 01:40:52 | INFO | train_inner | epoch 006: 2314 / 3002 loss=2.476, ppl=5.57, wps=5891.5, ups=0.09, wpb=64814, bsz=128, num_updates=17222, lr=9.98702e-05, gnorm=1.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=198126
2021-06-21 01:41:03 | INFO | train_inner | epoch 006: 2315 / 3002 loss=2.607, ppl=6.09, wps=5756.5, ups=0.09, wpb=64891, bsz=128, num_updates=17223, lr=9.98702e-05, gnorm=2.273, loss_scale=2, train_wall=11, gb_free=2.8, wall=198137
2021-06-21 01:41:14 | INFO | train_inner | epoch 006: 2316 / 3002 loss=2.479, ppl=5.58, wps=5813.5, ups=0.09, wpb=64847, bsz=128, num_updates=17224, lr=9.98702e-05, gnorm=19.984, loss_scale=2, train_wall=11, gb_free=2.8, wall=198149
2021-06-21 01:41:25 | INFO | train_inner | epoch 006: 2317 / 3002 loss=2.75, ppl=6.73, wps=5802.3, ups=0.09, wpb=64799, bsz=128, num_updates=17225, lr=9.98702e-05, gnorm=1.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=198160
2021-06-21 01:41:36 | INFO | train_inner | epoch 006: 2318 / 3002 loss=2.434, ppl=5.41, wps=5853.4, ups=0.09, wpb=64850, bsz=128, num_updates=17226, lr=9.98702e-05, gnorm=1.928, loss_scale=2, train_wall=11, gb_free=2.8, wall=198171
2021-06-21 01:41:47 | INFO | train_inner | epoch 006: 2319 / 3002 loss=2.574, ppl=5.96, wps=5933.6, ups=0.09, wpb=64819, bsz=128, num_updates=17227, lr=9.98702e-05, gnorm=1.972, loss_scale=2, train_wall=10, gb_free=2.8, wall=198182
2021-06-21 01:41:58 | INFO | train_inner | epoch 006: 2320 / 3002 loss=2.566, ppl=5.92, wps=5886.1, ups=0.09, wpb=64786, bsz=128, num_updates=17228, lr=9.98702e-05, gnorm=2.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=198193
2021-06-21 01:42:09 | INFO | train_inner | epoch 006: 2321 / 3002 loss=2.483, ppl=5.59, wps=5870.1, ups=0.09, wpb=64782, bsz=128, num_updates=17229, lr=9.98702e-05, gnorm=2.013, loss_scale=2, train_wall=11, gb_free=2.8, wall=198204
2021-06-21 01:42:21 | INFO | train_inner | epoch 006: 2322 / 3002 loss=2.546, ppl=5.84, wps=5828.3, ups=0.09, wpb=64755, bsz=128, num_updates=17230, lr=9.98701e-05, gnorm=2.077, loss_scale=2, train_wall=11, gb_free=2.8, wall=198215
2021-06-21 01:42:32 | INFO | train_inner | epoch 006: 2323 / 3002 loss=2.403, ppl=5.29, wps=5852.3, ups=0.09, wpb=64893, bsz=128, num_updates=17231, lr=9.98701e-05, gnorm=1.987, loss_scale=2, train_wall=11, gb_free=2.8, wall=198226
2021-06-21 01:42:43 | INFO | train_inner | epoch 006: 2324 / 3002 loss=2.408, ppl=5.31, wps=5835.3, ups=0.09, wpb=64906, bsz=128, num_updates=17232, lr=9.98701e-05, gnorm=2.021, loss_scale=2, train_wall=11, gb_free=2.8, wall=198237
2021-06-21 01:42:54 | INFO | train_inner | epoch 006: 2325 / 3002 loss=2.468, ppl=5.53, wps=5780.6, ups=0.09, wpb=64745, bsz=128, num_updates=17233, lr=9.98701e-05, gnorm=2.043, loss_scale=2, train_wall=11, gb_free=2.8, wall=198248
2021-06-21 01:43:05 | INFO | train_inner | epoch 006: 2326 / 3002 loss=2.48, ppl=5.58, wps=5842.1, ups=0.09, wpb=64833, bsz=128, num_updates=17234, lr=9.98701e-05, gnorm=2.037, loss_scale=2, train_wall=11, gb_free=2.8, wall=198259
2021-06-21 01:43:16 | INFO | train_inner | epoch 006: 2327 / 3002 loss=2.352, ppl=5.11, wps=5952, ups=0.09, wpb=64963, bsz=128, num_updates=17235, lr=9.98701e-05, gnorm=1.953, loss_scale=4, train_wall=10, gb_free=2.8, wall=198270
2021-06-21 01:43:27 | INFO | train_inner | epoch 006: 2328 / 3002 loss=2.425, ppl=5.37, wps=5881.6, ups=0.09, wpb=64854, bsz=128, num_updates=17236, lr=9.98701e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=198281
2021-06-21 01:43:38 | INFO | train_inner | epoch 006: 2329 / 3002 loss=2.383, ppl=5.22, wps=5684.7, ups=0.09, wpb=64777, bsz=128, num_updates=17237, lr=9.98701e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=198293
2021-06-21 01:43:49 | INFO | train_inner | epoch 006: 2330 / 3002 loss=2.314, ppl=4.97, wps=5874.1, ups=0.09, wpb=64845, bsz=128, num_updates=17238, lr=9.98701e-05, gnorm=2.007, loss_scale=4, train_wall=11, gb_free=2.8, wall=198304
2021-06-21 01:44:01 | INFO | train_inner | epoch 006: 2331 / 3002 loss=2.577, ppl=5.97, wps=5779.7, ups=0.09, wpb=64864, bsz=128, num_updates=17239, lr=9.98701e-05, gnorm=2.332, loss_scale=4, train_wall=11, gb_free=2.8, wall=198315
2021-06-21 01:44:12 | INFO | train_inner | epoch 006: 2332 / 3002 loss=2.474, ppl=5.56, wps=5896.2, ups=0.09, wpb=64856, bsz=128, num_updates=17240, lr=9.98701e-05, gnorm=1.947, loss_scale=4, train_wall=11, gb_free=2.8, wall=198326
2021-06-21 01:44:23 | INFO | train_inner | epoch 006: 2333 / 3002 loss=2.365, ppl=5.15, wps=5817.7, ups=0.09, wpb=64882, bsz=128, num_updates=17241, lr=9.98701e-05, gnorm=1.914, loss_scale=4, train_wall=11, gb_free=2.8, wall=198337
2021-06-21 01:44:34 | INFO | train_inner | epoch 006: 2334 / 3002 loss=2.415, ppl=5.33, wps=5708.4, ups=0.09, wpb=64814, bsz=128, num_updates=17242, lr=9.98701e-05, gnorm=2.034, loss_scale=4, train_wall=11, gb_free=2.8, wall=198348
2021-06-21 01:44:45 | INFO | train_inner | epoch 006: 2335 / 3002 loss=2.7, ppl=6.5, wps=5867.6, ups=0.09, wpb=64817, bsz=128, num_updates=17243, lr=9.987e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=198360
2021-06-21 01:44:56 | INFO | train_inner | epoch 006: 2336 / 3002 loss=2.501, ppl=5.66, wps=5948.3, ups=0.09, wpb=64795, bsz=128, num_updates=17244, lr=9.987e-05, gnorm=2.013, loss_scale=4, train_wall=10, gb_free=2.8, wall=198370
2021-06-21 01:45:07 | INFO | train_inner | epoch 006: 2337 / 3002 loss=2.31, ppl=4.96, wps=5938.1, ups=0.09, wpb=64891, bsz=128, num_updates=17245, lr=9.987e-05, gnorm=1.938, loss_scale=4, train_wall=10, gb_free=2.8, wall=198381
2021-06-21 01:45:18 | INFO | train_inner | epoch 006: 2338 / 3002 loss=2.442, ppl=5.43, wps=5849.7, ups=0.09, wpb=64859, bsz=128, num_updates=17246, lr=9.987e-05, gnorm=2.336, loss_scale=4, train_wall=11, gb_free=2.8, wall=198392
2021-06-21 01:45:29 | INFO | train_inner | epoch 006: 2339 / 3002 loss=2.518, ppl=5.73, wps=6096.6, ups=0.09, wpb=64951, bsz=128, num_updates=17247, lr=9.987e-05, gnorm=1.98, loss_scale=4, train_wall=10, gb_free=2.8, wall=198403
2021-06-21 01:45:40 | INFO | train_inner | epoch 006: 2340 / 3002 loss=2.379, ppl=5.2, wps=5799.5, ups=0.09, wpb=64857, bsz=128, num_updates=17248, lr=9.987e-05, gnorm=2.443, loss_scale=4, train_wall=11, gb_free=2.8, wall=198414
2021-06-21 01:45:51 | INFO | train_inner | epoch 006: 2341 / 3002 loss=2.478, ppl=5.57, wps=5856.8, ups=0.09, wpb=64802, bsz=128, num_updates=17249, lr=9.987e-05, gnorm=5.134, loss_scale=4, train_wall=11, gb_free=2.8, wall=198425
2021-06-21 01:46:03 | INFO | train_inner | epoch 006: 2342 / 3002 loss=2.622, ppl=6.16, wps=5633.5, ups=0.09, wpb=64815, bsz=128, num_updates=17250, lr=9.987e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=198437
2021-06-21 01:46:14 | INFO | train_inner | epoch 006: 2343 / 3002 loss=2.432, ppl=5.4, wps=5871.9, ups=0.09, wpb=64847, bsz=128, num_updates=17251, lr=9.987e-05, gnorm=1.982, loss_scale=4, train_wall=11, gb_free=2.8, wall=198448
2021-06-21 01:46:25 | INFO | train_inner | epoch 006: 2344 / 3002 loss=2.537, ppl=5.8, wps=5867.4, ups=0.09, wpb=64928, bsz=128, num_updates=17252, lr=9.987e-05, gnorm=1.89, loss_scale=4, train_wall=11, gb_free=2.8, wall=198459
2021-06-21 01:46:36 | INFO | train_inner | epoch 006: 2345 / 3002 loss=2.474, ppl=5.56, wps=5882.3, ups=0.09, wpb=64848, bsz=128, num_updates=17253, lr=9.987e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=198470
2021-06-21 01:46:47 | INFO | train_inner | epoch 006: 2346 / 3002 loss=2.393, ppl=5.25, wps=5845.1, ups=0.09, wpb=64851, bsz=128, num_updates=17254, lr=9.987e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=198481
2021-06-21 01:46:57 | INFO | train_inner | epoch 006: 2347 / 3002 loss=2.454, ppl=5.48, wps=6092.9, ups=0.09, wpb=64831, bsz=128, num_updates=17255, lr=9.98699e-05, gnorm=1.921, loss_scale=4, train_wall=10, gb_free=2.8, wall=198492
2021-06-21 01:47:09 | INFO | train_inner | epoch 006: 2348 / 3002 loss=2.533, ppl=5.79, wps=5784.8, ups=0.09, wpb=64782, bsz=128, num_updates=17256, lr=9.98699e-05, gnorm=1.984, loss_scale=4, train_wall=11, gb_free=2.8, wall=198503
2021-06-21 01:47:20 | INFO | train_inner | epoch 006: 2349 / 3002 loss=2.399, ppl=5.27, wps=5782.1, ups=0.09, wpb=64752, bsz=128, num_updates=17257, lr=9.98699e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=198514
2021-06-21 01:47:31 | INFO | train_inner | epoch 006: 2350 / 3002 loss=2.493, ppl=5.63, wps=5811.7, ups=0.09, wpb=64797, bsz=128, num_updates=17258, lr=9.98699e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=198525
2021-06-21 01:47:42 | INFO | train_inner | epoch 006: 2351 / 3002 loss=2.407, ppl=5.3, wps=5839.9, ups=0.09, wpb=64829, bsz=128, num_updates=17259, lr=9.98699e-05, gnorm=1.954, loss_scale=4, train_wall=11, gb_free=2.8, wall=198536
2021-06-21 01:47:53 | INFO | train_inner | epoch 006: 2352 / 3002 loss=2.519, ppl=5.73, wps=5880.4, ups=0.09, wpb=64867, bsz=128, num_updates=17260, lr=9.98699e-05, gnorm=2.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=198547
2021-06-21 01:48:04 | INFO | train_inner | epoch 006: 2353 / 3002 loss=2.356, ppl=5.12, wps=5987.9, ups=0.09, wpb=64786, bsz=128, num_updates=17261, lr=9.98699e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=198558
2021-06-21 01:48:15 | INFO | train_inner | epoch 006: 2354 / 3002 loss=2.369, ppl=5.17, wps=5831.4, ups=0.09, wpb=64815, bsz=128, num_updates=17262, lr=9.98699e-05, gnorm=1.938, loss_scale=4, train_wall=11, gb_free=2.8, wall=198569
2021-06-21 01:48:26 | INFO | train_inner | epoch 006: 2355 / 3002 loss=2.529, ppl=5.77, wps=5953.7, ups=0.09, wpb=64842, bsz=128, num_updates=17263, lr=9.98699e-05, gnorm=2.036, loss_scale=4, train_wall=10, gb_free=2.8, wall=198580
2021-06-21 01:48:37 | INFO | train_inner | epoch 006: 2356 / 3002 loss=2.385, ppl=5.22, wps=5925.9, ups=0.09, wpb=64823, bsz=128, num_updates=17264, lr=9.98699e-05, gnorm=2.027, loss_scale=4, train_wall=10, gb_free=2.8, wall=198591
2021-06-21 01:48:48 | INFO | train_inner | epoch 006: 2357 / 3002 loss=2.5, ppl=5.66, wps=6059.4, ups=0.09, wpb=64852, bsz=128, num_updates=17265, lr=9.98699e-05, gnorm=1.939, loss_scale=4, train_wall=10, gb_free=2.8, wall=198602
2021-06-21 01:48:59 | INFO | train_inner | epoch 006: 2358 / 3002 loss=2.456, ppl=5.49, wps=5826, ups=0.09, wpb=64786, bsz=128, num_updates=17266, lr=9.98699e-05, gnorm=1.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=198613
2021-06-21 01:49:10 | INFO | train_inner | epoch 006: 2359 / 3002 loss=2.437, ppl=5.41, wps=5886.5, ups=0.09, wpb=64835, bsz=128, num_updates=17267, lr=9.98699e-05, gnorm=1.882, loss_scale=4, train_wall=11, gb_free=2.8, wall=198624
2021-06-21 01:49:21 | INFO | train_inner | epoch 006: 2360 / 3002 loss=2.527, ppl=5.76, wps=5879.4, ups=0.09, wpb=64816, bsz=128, num_updates=17268, lr=9.98698e-05, gnorm=1.979, loss_scale=4, train_wall=11, gb_free=2.8, wall=198635
2021-06-21 01:49:32 | INFO | train_inner | epoch 006: 2361 / 3002 loss=2.513, ppl=5.71, wps=5817.8, ups=0.09, wpb=64837, bsz=128, num_updates=17269, lr=9.98698e-05, gnorm=1.972, loss_scale=4, train_wall=11, gb_free=2.8, wall=198646
2021-06-21 01:49:43 | INFO | train_inner | epoch 006: 2362 / 3002 loss=2.57, ppl=5.94, wps=5777.1, ups=0.09, wpb=64758, bsz=128, num_updates=17270, lr=9.98698e-05, gnorm=2.186, loss_scale=4, train_wall=11, gb_free=2.8, wall=198657
2021-06-21 01:49:54 | INFO | train_inner | epoch 006: 2363 / 3002 loss=2.393, ppl=5.25, wps=5931.1, ups=0.09, wpb=64789, bsz=128, num_updates=17271, lr=9.98698e-05, gnorm=2.08, loss_scale=4, train_wall=10, gb_free=2.8, wall=198668
2021-06-21 01:50:05 | INFO | train_inner | epoch 006: 2364 / 3002 loss=2.39, ppl=5.24, wps=5832.7, ups=0.09, wpb=64877, bsz=128, num_updates=17272, lr=9.98698e-05, gnorm=2.004, loss_scale=4, train_wall=11, gb_free=2.8, wall=198679
2021-06-21 01:50:16 | INFO | train_inner | epoch 006: 2365 / 3002 loss=2.453, ppl=5.47, wps=5834.4, ups=0.09, wpb=64749, bsz=128, num_updates=17273, lr=9.98698e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=198691
2021-06-21 01:50:27 | INFO | train_inner | epoch 006: 2366 / 3002 loss=2.43, ppl=5.39, wps=5816.7, ups=0.09, wpb=64796, bsz=128, num_updates=17274, lr=9.98698e-05, gnorm=1.952, loss_scale=4, train_wall=11, gb_free=2.8, wall=198702
2021-06-21 01:50:38 | INFO | train_inner | epoch 006: 2367 / 3002 loss=2.558, ppl=5.89, wps=5806.6, ups=0.09, wpb=64790, bsz=128, num_updates=17275, lr=9.98698e-05, gnorm=1.996, loss_scale=4, train_wall=11, gb_free=2.8, wall=198713
2021-06-21 01:50:50 | INFO | train_inner | epoch 006: 2368 / 3002 loss=2.518, ppl=5.73, wps=5846.9, ups=0.09, wpb=64867, bsz=128, num_updates=17276, lr=9.98698e-05, gnorm=1.988, loss_scale=4, train_wall=11, gb_free=2.8, wall=198724
2021-06-21 01:51:01 | INFO | train_inner | epoch 006: 2369 / 3002 loss=2.72, ppl=6.59, wps=5809.7, ups=0.09, wpb=64834, bsz=128, num_updates=17277, lr=9.98698e-05, gnorm=2.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=198735
2021-06-21 01:51:12 | INFO | train_inner | epoch 006: 2370 / 3002 loss=2.656, ppl=6.3, wps=5877.9, ups=0.09, wpb=64929, bsz=128, num_updates=17278, lr=9.98698e-05, gnorm=1.932, loss_scale=4, train_wall=11, gb_free=2.8, wall=198746
2021-06-21 01:51:23 | INFO | train_inner | epoch 006: 2371 / 3002 loss=2.438, ppl=5.42, wps=5903.7, ups=0.09, wpb=64950, bsz=128, num_updates=17279, lr=9.98698e-05, gnorm=1.969, loss_scale=4, train_wall=11, gb_free=2.8, wall=198757
2021-06-21 01:51:34 | INFO | train_inner | epoch 006: 2372 / 3002 loss=2.525, ppl=5.76, wps=5907.5, ups=0.09, wpb=64812, bsz=128, num_updates=17280, lr=9.98697e-05, gnorm=1.945, loss_scale=4, train_wall=11, gb_free=2.8, wall=198768
2021-06-21 01:51:45 | INFO | train_inner | epoch 006: 2373 / 3002 loss=2.549, ppl=5.85, wps=6002.9, ups=0.09, wpb=64813, bsz=128, num_updates=17281, lr=9.98697e-05, gnorm=1.904, loss_scale=4, train_wall=10, gb_free=2.8, wall=198779
2021-06-21 01:51:56 | INFO | train_inner | epoch 006: 2374 / 3002 loss=2.528, ppl=5.77, wps=5924.7, ups=0.09, wpb=64843, bsz=128, num_updates=17282, lr=9.98697e-05, gnorm=1.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=198790
2021-06-21 01:52:06 | INFO | train_inner | epoch 006: 2375 / 3002 loss=2.409, ppl=5.31, wps=5909.3, ups=0.09, wpb=64792, bsz=128, num_updates=17283, lr=9.98697e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=198801
2021-06-21 01:52:18 | INFO | train_inner | epoch 006: 2376 / 3002 loss=2.553, ppl=5.87, wps=5827.9, ups=0.09, wpb=64786, bsz=128, num_updates=17284, lr=9.98697e-05, gnorm=1.985, loss_scale=4, train_wall=11, gb_free=2.8, wall=198812
2021-06-21 01:52:29 | INFO | train_inner | epoch 006: 2377 / 3002 loss=2.587, ppl=6.01, wps=5795.6, ups=0.09, wpb=64912, bsz=128, num_updates=17285, lr=9.98697e-05, gnorm=2.023, loss_scale=4, train_wall=11, gb_free=2.8, wall=198823
2021-06-21 01:52:40 | INFO | train_inner | epoch 006: 2378 / 3002 loss=2.518, ppl=5.73, wps=5771.7, ups=0.09, wpb=64753, bsz=128, num_updates=17286, lr=9.98697e-05, gnorm=1.925, loss_scale=4, train_wall=11, gb_free=2.8, wall=198834
2021-06-21 01:52:51 | INFO | train_inner | epoch 006: 2379 / 3002 loss=2.493, ppl=5.63, wps=5883.3, ups=0.09, wpb=64932, bsz=128, num_updates=17287, lr=9.98697e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=198845
2021-06-21 01:53:02 | INFO | train_inner | epoch 006: 2380 / 3002 loss=2.441, ppl=5.43, wps=5927, ups=0.09, wpb=64881, bsz=128, num_updates=17288, lr=9.98697e-05, gnorm=3.029, loss_scale=4, train_wall=11, gb_free=2.8, wall=198856
2021-06-21 01:53:13 | INFO | train_inner | epoch 006: 2381 / 3002 loss=2.654, ppl=6.3, wps=5914, ups=0.09, wpb=64813, bsz=128, num_updates=17289, lr=9.98697e-05, gnorm=2.002, loss_scale=4, train_wall=11, gb_free=2.8, wall=198867
2021-06-21 01:53:24 | INFO | train_inner | epoch 006: 2382 / 3002 loss=2.388, ppl=5.24, wps=5772.7, ups=0.09, wpb=64901, bsz=128, num_updates=17290, lr=9.98697e-05, gnorm=1.96, loss_scale=4, train_wall=11, gb_free=2.8, wall=198879
2021-06-21 01:53:35 | INFO | train_inner | epoch 006: 2383 / 3002 loss=2.375, ppl=5.19, wps=5875.4, ups=0.09, wpb=64950, bsz=128, num_updates=17291, lr=9.98697e-05, gnorm=1.937, loss_scale=4, train_wall=11, gb_free=2.8, wall=198890
2021-06-21 01:53:46 | INFO | train_inner | epoch 006: 2384 / 3002 loss=2.528, ppl=5.77, wps=5913.7, ups=0.09, wpb=64805, bsz=128, num_updates=17292, lr=9.98697e-05, gnorm=1.942, loss_scale=4, train_wall=11, gb_free=2.8, wall=198901
2021-06-21 01:53:57 | INFO | train_inner | epoch 006: 2385 / 3002 loss=2.444, ppl=5.44, wps=5922.2, ups=0.09, wpb=64893, bsz=128, num_updates=17293, lr=9.98696e-05, gnorm=2.023, loss_scale=4, train_wall=10, gb_free=2.8, wall=198912
2021-06-21 01:54:08 | INFO | train_inner | epoch 006: 2386 / 3002 loss=2.422, ppl=5.36, wps=5801.4, ups=0.09, wpb=64867, bsz=128, num_updates=17294, lr=9.98696e-05, gnorm=1.907, loss_scale=4, train_wall=11, gb_free=2.8, wall=198923
2021-06-21 01:54:19 | INFO | train_inner | epoch 006: 2387 / 3002 loss=2.379, ppl=5.2, wps=5838.6, ups=0.09, wpb=64816, bsz=128, num_updates=17295, lr=9.98696e-05, gnorm=1.902, loss_scale=4, train_wall=11, gb_free=2.8, wall=198934
2021-06-21 01:54:31 | INFO | train_inner | epoch 006: 2388 / 3002 loss=2.526, ppl=5.76, wps=5755, ups=0.09, wpb=64748, bsz=128, num_updates=17296, lr=9.98696e-05, gnorm=2.064, loss_scale=4, train_wall=11, gb_free=2.8, wall=198945
2021-06-21 01:54:42 | INFO | train_inner | epoch 006: 2389 / 3002 loss=2.605, ppl=6.08, wps=5872.7, ups=0.09, wpb=64811, bsz=128, num_updates=17297, lr=9.98696e-05, gnorm=2.032, loss_scale=4, train_wall=11, gb_free=2.8, wall=198956
2021-06-21 01:54:53 | INFO | train_inner | epoch 006: 2390 / 3002 loss=2.402, ppl=5.29, wps=5846.8, ups=0.09, wpb=64858, bsz=128, num_updates=17298, lr=9.98696e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=198967
2021-06-21 01:55:04 | INFO | train_inner | epoch 006: 2391 / 3002 loss=2.463, ppl=5.51, wps=5770.7, ups=0.09, wpb=64792, bsz=128, num_updates=17299, lr=9.98696e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=198978
2021-06-21 01:55:15 | INFO | train_inner | epoch 006: 2392 / 3002 loss=2.594, ppl=6.04, wps=5907.5, ups=0.09, wpb=64864, bsz=128, num_updates=17300, lr=9.98696e-05, gnorm=1.861, loss_scale=4, train_wall=11, gb_free=2.8, wall=198989
2021-06-21 01:55:26 | INFO | train_inner | epoch 006: 2393 / 3002 loss=2.491, ppl=5.62, wps=5959.4, ups=0.09, wpb=64847, bsz=128, num_updates=17301, lr=9.98696e-05, gnorm=1.946, loss_scale=4, train_wall=10, gb_free=2.8, wall=199000
2021-06-21 01:55:37 | INFO | train_inner | epoch 006: 2394 / 3002 loss=2.52, ppl=5.74, wps=5875.2, ups=0.09, wpb=64791, bsz=128, num_updates=17302, lr=9.98696e-05, gnorm=2.128, loss_scale=4, train_wall=11, gb_free=2.8, wall=199011
2021-06-21 01:55:48 | INFO | train_inner | epoch 006: 2395 / 3002 loss=2.593, ppl=6.03, wps=5891.5, ups=0.09, wpb=64804, bsz=128, num_updates=17303, lr=9.98696e-05, gnorm=2.331, loss_scale=4, train_wall=11, gb_free=2.8, wall=199022
2021-06-21 01:55:59 | INFO | train_inner | epoch 006: 2396 / 3002 loss=2.354, ppl=5.11, wps=5870.9, ups=0.09, wpb=64831, bsz=128, num_updates=17304, lr=9.98696e-05, gnorm=1.989, loss_scale=4, train_wall=11, gb_free=2.8, wall=199033
2021-06-21 01:56:10 | INFO | train_inner | epoch 006: 2397 / 3002 loss=2.546, ppl=5.84, wps=5969.9, ups=0.09, wpb=64859, bsz=128, num_updates=17305, lr=9.98695e-05, gnorm=1.899, loss_scale=4, train_wall=10, gb_free=2.8, wall=199044
2021-06-21 01:56:21 | INFO | train_inner | epoch 006: 2398 / 3002 loss=2.345, ppl=5.08, wps=5864, ups=0.09, wpb=64793, bsz=128, num_updates=17306, lr=9.98695e-05, gnorm=2.121, loss_scale=4, train_wall=11, gb_free=2.8, wall=199055
2021-06-21 01:56:32 | INFO | train_inner | epoch 006: 2399 / 3002 loss=2.353, ppl=5.11, wps=5879.5, ups=0.09, wpb=64833, bsz=128, num_updates=17307, lr=9.98695e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=199066
2021-06-21 01:56:43 | INFO | train_inner | epoch 006: 2400 / 3002 loss=2.443, ppl=5.44, wps=5887.8, ups=0.09, wpb=64850, bsz=128, num_updates=17308, lr=9.98695e-05, gnorm=1.999, loss_scale=4, train_wall=11, gb_free=2.8, wall=199077
2021-06-21 01:56:54 | INFO | train_inner | epoch 006: 2401 / 3002 loss=2.435, ppl=5.41, wps=5953, ups=0.09, wpb=64741, bsz=128, num_updates=17309, lr=9.98695e-05, gnorm=1.942, loss_scale=4, train_wall=10, gb_free=2.8, wall=199088
2021-06-21 01:57:05 | INFO | train_inner | epoch 006: 2402 / 3002 loss=2.408, ppl=5.31, wps=5930.1, ups=0.09, wpb=64894, bsz=128, num_updates=17310, lr=9.98695e-05, gnorm=1.965, loss_scale=4, train_wall=10, gb_free=2.8, wall=199099
2021-06-21 01:57:16 | INFO | train_inner | epoch 006: 2403 / 3002 loss=2.244, ppl=4.74, wps=5756, ups=0.09, wpb=64773, bsz=128, num_updates=17311, lr=9.98695e-05, gnorm=1.921, loss_scale=4, train_wall=11, gb_free=2.8, wall=199110
2021-06-21 01:57:27 | INFO | train_inner | epoch 006: 2404 / 3002 loss=2.571, ppl=5.94, wps=5887.1, ups=0.09, wpb=64681, bsz=128, num_updates=17312, lr=9.98695e-05, gnorm=1.933, loss_scale=4, train_wall=11, gb_free=2.8, wall=199121
2021-06-21 01:57:38 | INFO | train_inner | epoch 006: 2405 / 3002 loss=2.463, ppl=5.51, wps=5956.5, ups=0.09, wpb=64870, bsz=128, num_updates=17313, lr=9.98695e-05, gnorm=2.178, loss_scale=4, train_wall=10, gb_free=2.8, wall=199132
2021-06-21 01:57:49 | INFO | train_inner | epoch 006: 2406 / 3002 loss=2.455, ppl=5.48, wps=5790.6, ups=0.09, wpb=64795, bsz=128, num_updates=17314, lr=9.98695e-05, gnorm=2.761, loss_scale=4, train_wall=11, gb_free=2.8, wall=199143
2021-06-21 01:58:00 | INFO | train_inner | epoch 006: 2407 / 3002 loss=2.549, ppl=5.85, wps=5834.7, ups=0.09, wpb=64809, bsz=128, num_updates=17315, lr=9.98695e-05, gnorm=2.061, loss_scale=4, train_wall=11, gb_free=2.8, wall=199155
2021-06-21 01:58:11 | INFO | train_inner | epoch 006: 2408 / 3002 loss=2.49, ppl=5.62, wps=5866, ups=0.09, wpb=64773, bsz=128, num_updates=17316, lr=9.98695e-05, gnorm=2.018, loss_scale=4, train_wall=11, gb_free=2.8, wall=199166
2021-06-21 01:58:22 | INFO | train_inner | epoch 006: 2409 / 3002 loss=2.408, ppl=5.31, wps=5920, ups=0.09, wpb=64804, bsz=128, num_updates=17317, lr=9.98695e-05, gnorm=1.945, loss_scale=4, train_wall=10, gb_free=2.8, wall=199177
2021-06-21 01:58:33 | INFO | train_inner | epoch 006: 2410 / 3002 loss=2.348, ppl=5.09, wps=5820.6, ups=0.09, wpb=64761, bsz=128, num_updates=17318, lr=9.98694e-05, gnorm=14.683, loss_scale=4, train_wall=11, gb_free=2.8, wall=199188
2021-06-21 01:58:44 | INFO | train_inner | epoch 006: 2411 / 3002 loss=2.464, ppl=5.52, wps=5819.3, ups=0.09, wpb=64873, bsz=128, num_updates=17319, lr=9.98694e-05, gnorm=2.08, loss_scale=4, train_wall=11, gb_free=2.8, wall=199199
2021-06-21 01:58:56 | INFO | train_inner | epoch 006: 2412 / 3002 loss=2.386, ppl=5.23, wps=5821.9, ups=0.09, wpb=64771, bsz=128, num_updates=17320, lr=9.98694e-05, gnorm=1.976, loss_scale=4, train_wall=11, gb_free=2.8, wall=199210
2021-06-21 01:59:07 | INFO | train_inner | epoch 006: 2413 / 3002 loss=2.507, ppl=5.68, wps=5833.7, ups=0.09, wpb=64811, bsz=128, num_updates=17321, lr=9.98694e-05, gnorm=1.931, loss_scale=4, train_wall=11, gb_free=2.8, wall=199221
2021-06-21 01:59:18 | INFO | train_inner | epoch 006: 2414 / 3002 loss=2.537, ppl=5.8, wps=5817.8, ups=0.09, wpb=64835, bsz=128, num_updates=17322, lr=9.98694e-05, gnorm=2.19, loss_scale=4, train_wall=11, gb_free=2.8, wall=199232
2021-06-21 01:59:29 | INFO | train_inner | epoch 006: 2415 / 3002 loss=2.539, ppl=5.81, wps=5826.6, ups=0.09, wpb=64780, bsz=128, num_updates=17323, lr=9.98694e-05, gnorm=2.75, loss_scale=4, train_wall=11, gb_free=2.8, wall=199243
2021-06-21 01:59:40 | INFO | train_inner | epoch 006: 2416 / 3002 loss=2.514, ppl=5.71, wps=5925.7, ups=0.09, wpb=64835, bsz=128, num_updates=17324, lr=9.98694e-05, gnorm=2.168, loss_scale=4, train_wall=10, gb_free=2.8, wall=199254
2021-06-21 01:59:51 | INFO | train_inner | epoch 006: 2417 / 3002 loss=2.553, ppl=5.87, wps=5842.6, ups=0.09, wpb=64791, bsz=128, num_updates=17325, lr=9.98694e-05, gnorm=2.095, loss_scale=4, train_wall=11, gb_free=2.8, wall=199265
2021-06-21 02:00:02 | INFO | train_inner | epoch 006: 2418 / 3002 loss=2.692, ppl=6.46, wps=5841, ups=0.09, wpb=64743, bsz=128, num_updates=17326, lr=9.98694e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=199276
2021-06-21 02:00:13 | INFO | train_inner | epoch 006: 2419 / 3002 loss=2.434, ppl=5.4, wps=5870.9, ups=0.09, wpb=64912, bsz=128, num_updates=17327, lr=9.98694e-05, gnorm=2.499, loss_scale=4, train_wall=11, gb_free=2.8, wall=199287
2021-06-21 02:00:24 | INFO | train_inner | epoch 006: 2420 / 3002 loss=2.513, ppl=5.71, wps=5929.1, ups=0.09, wpb=64851, bsz=128, num_updates=17328, lr=9.98694e-05, gnorm=9.314, loss_scale=4, train_wall=10, gb_free=2.8, wall=199298
2021-06-21 02:00:35 | INFO | train_inner | epoch 006: 2421 / 3002 loss=2.36, ppl=5.13, wps=5952.7, ups=0.09, wpb=64844, bsz=128, num_updates=17329, lr=9.98694e-05, gnorm=2.431, loss_scale=4, train_wall=10, gb_free=2.8, wall=199309
2021-06-21 02:00:46 | INFO | train_inner | epoch 006: 2422 / 3002 loss=2.616, ppl=6.13, wps=5803.5, ups=0.09, wpb=64827, bsz=128, num_updates=17330, lr=9.98693e-05, gnorm=2.4, loss_scale=4, train_wall=11, gb_free=2.8, wall=199320
2021-06-21 02:00:57 | INFO | train_inner | epoch 006: 2423 / 3002 loss=2.368, ppl=5.16, wps=5822.6, ups=0.09, wpb=64844, bsz=128, num_updates=17331, lr=9.98693e-05, gnorm=2.074, loss_scale=4, train_wall=11, gb_free=2.8, wall=199332
2021-06-21 02:01:08 | INFO | train_inner | epoch 006: 2424 / 3002 loss=2.651, ppl=6.28, wps=5802.3, ups=0.09, wpb=64787, bsz=128, num_updates=17332, lr=9.98693e-05, gnorm=2.185, loss_scale=4, train_wall=11, gb_free=2.8, wall=199343
2021-06-21 02:01:20 | INFO | train_inner | epoch 006: 2425 / 3002 loss=2.629, ppl=6.19, wps=5812, ups=0.09, wpb=64865, bsz=128, num_updates=17333, lr=9.98693e-05, gnorm=2.092, loss_scale=4, train_wall=11, gb_free=2.8, wall=199354
2021-06-21 02:01:31 | INFO | train_inner | epoch 006: 2426 / 3002 loss=2.598, ppl=6.05, wps=5875.5, ups=0.09, wpb=64751, bsz=128, num_updates=17334, lr=9.98693e-05, gnorm=1.981, loss_scale=4, train_wall=11, gb_free=2.8, wall=199365
2021-06-21 02:01:42 | INFO | train_inner | epoch 006: 2427 / 3002 loss=2.42, ppl=5.35, wps=5831.1, ups=0.09, wpb=64789, bsz=128, num_updates=17335, lr=9.98693e-05, gnorm=2.144, loss_scale=4, train_wall=11, gb_free=2.8, wall=199376
2021-06-21 02:01:53 | INFO | train_inner | epoch 006: 2428 / 3002 loss=2.64, ppl=6.23, wps=5824, ups=0.09, wpb=64889, bsz=128, num_updates=17336, lr=9.98693e-05, gnorm=2.012, loss_scale=4, train_wall=11, gb_free=2.8, wall=199387
2021-06-21 02:02:04 | INFO | train_inner | epoch 006: 2429 / 3002 loss=2.559, ppl=5.89, wps=5941.7, ups=0.09, wpb=64859, bsz=128, num_updates=17337, lr=9.98693e-05, gnorm=2.169, loss_scale=4, train_wall=10, gb_free=2.8, wall=199398
2021-06-21 02:02:15 | INFO | train_inner | epoch 006: 2430 / 3002 loss=2.516, ppl=5.72, wps=5799.6, ups=0.09, wpb=64839, bsz=128, num_updates=17338, lr=9.98693e-05, gnorm=2.014, loss_scale=4, train_wall=11, gb_free=2.8, wall=199409
2021-06-21 02:02:26 | INFO | train_inner | epoch 006: 2431 / 3002 loss=2.502, ppl=5.66, wps=5763, ups=0.09, wpb=64811, bsz=128, num_updates=17339, lr=9.98693e-05, gnorm=2.071, loss_scale=4, train_wall=11, gb_free=2.8, wall=199421
2021-06-21 02:02:37 | INFO | train_inner | epoch 006: 2432 / 3002 loss=2.393, ppl=5.25, wps=5811.1, ups=0.09, wpb=64818, bsz=128, num_updates=17340, lr=9.98693e-05, gnorm=2.199, loss_scale=4, train_wall=11, gb_free=2.8, wall=199432
2021-06-21 02:02:49 | INFO | train_inner | epoch 006: 2433 / 3002 loss=2.537, ppl=5.8, wps=5821.1, ups=0.09, wpb=64816, bsz=128, num_updates=17341, lr=9.98693e-05, gnorm=1.975, loss_scale=4, train_wall=11, gb_free=2.8, wall=199443
2021-06-21 02:03:00 | INFO | train_inner | epoch 006: 2434 / 3002 loss=2.62, ppl=6.15, wps=5809.4, ups=0.09, wpb=64779, bsz=128, num_updates=17342, lr=9.98693e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=199454
2021-06-21 02:03:11 | INFO | train_inner | epoch 006: 2435 / 3002 loss=2.558, ppl=5.89, wps=5739.6, ups=0.09, wpb=64752, bsz=128, num_updates=17343, lr=9.98692e-05, gnorm=1.883, loss_scale=4, train_wall=11, gb_free=2.8, wall=199465
2021-06-21 02:03:22 | INFO | train_inner | epoch 006: 2436 / 3002 loss=2.507, ppl=5.68, wps=6000.9, ups=0.09, wpb=64759, bsz=128, num_updates=17344, lr=9.98692e-05, gnorm=1.915, loss_scale=4, train_wall=10, gb_free=2.8, wall=199476
2021-06-21 02:03:33 | INFO | train_inner | epoch 006: 2437 / 3002 loss=2.388, ppl=5.23, wps=5927.5, ups=0.09, wpb=64880, bsz=128, num_updates=17345, lr=9.98692e-05, gnorm=2.023, loss_scale=4, train_wall=10, gb_free=2.8, wall=199487
2021-06-21 02:03:44 | INFO | train_inner | epoch 006: 2438 / 3002 loss=2.355, ppl=5.12, wps=5906.2, ups=0.09, wpb=64808, bsz=128, num_updates=17346, lr=9.98692e-05, gnorm=1.926, loss_scale=4, train_wall=10, gb_free=2.8, wall=199498
2021-06-21 02:03:55 | INFO | train_inner | epoch 006: 2439 / 3002 loss=2.436, ppl=5.41, wps=5889, ups=0.09, wpb=64924, bsz=128, num_updates=17347, lr=9.98692e-05, gnorm=2.188, loss_scale=4, train_wall=11, gb_free=2.8, wall=199509
2021-06-21 02:04:06 | INFO | train_inner | epoch 006: 2440 / 3002 loss=2.484, ppl=5.6, wps=5719.3, ups=0.09, wpb=64745, bsz=128, num_updates=17348, lr=9.98692e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=199520
2021-06-21 02:04:17 | INFO | train_inner | epoch 006: 2441 / 3002 loss=2.524, ppl=5.75, wps=5908.2, ups=0.09, wpb=64847, bsz=128, num_updates=17349, lr=9.98692e-05, gnorm=2.011, loss_scale=4, train_wall=11, gb_free=2.8, wall=199531
2021-06-21 02:04:28 | INFO | train_inner | epoch 006: 2442 / 3002 loss=2.451, ppl=5.47, wps=5881.3, ups=0.09, wpb=64798, bsz=128, num_updates=17350, lr=9.98692e-05, gnorm=15.028, loss_scale=4, train_wall=11, gb_free=2.8, wall=199542
2021-06-21 02:04:39 | INFO | train_inner | epoch 006: 2443 / 3002 loss=2.279, ppl=4.85, wps=5845.5, ups=0.09, wpb=64814, bsz=128, num_updates=17351, lr=9.98692e-05, gnorm=1.966, loss_scale=4, train_wall=11, gb_free=2.8, wall=199553
2021-06-21 02:04:50 | INFO | train_inner | epoch 006: 2444 / 3002 loss=2.343, ppl=5.07, wps=5791.4, ups=0.09, wpb=64837, bsz=128, num_updates=17352, lr=9.98692e-05, gnorm=2.249, loss_scale=4, train_wall=11, gb_free=2.8, wall=199565
2021-06-21 02:05:01 | INFO | train_inner | epoch 006: 2445 / 3002 loss=2.511, ppl=5.7, wps=5827.1, ups=0.09, wpb=64795, bsz=128, num_updates=17353, lr=9.98692e-05, gnorm=2.067, loss_scale=4, train_wall=11, gb_free=2.8, wall=199576
2021-06-21 02:05:12 | INFO | train_inner | epoch 006: 2446 / 3002 loss=2.477, ppl=5.57, wps=5846, ups=0.09, wpb=64794, bsz=128, num_updates=17354, lr=9.98692e-05, gnorm=1.905, loss_scale=4, train_wall=11, gb_free=2.8, wall=199587
2021-06-21 02:05:23 | INFO | train_inner | epoch 006: 2447 / 3002 loss=2.566, ppl=5.92, wps=5906.9, ups=0.09, wpb=64838, bsz=128, num_updates=17355, lr=9.98691e-05, gnorm=2.184, loss_scale=4, train_wall=10, gb_free=2.8, wall=199598
2021-06-21 02:05:35 | INFO | train_inner | epoch 006: 2448 / 3002 loss=2.561, ppl=5.9, wps=5854.3, ups=0.09, wpb=64863, bsz=128, num_updates=17356, lr=9.98691e-05, gnorm=2.021, loss_scale=4, train_wall=11, gb_free=2.8, wall=199609
2021-06-21 02:05:46 | INFO | train_inner | epoch 006: 2449 / 3002 loss=2.425, ppl=5.37, wps=5889, ups=0.09, wpb=64775, bsz=128, num_updates=17357, lr=9.98691e-05, gnorm=1.983, loss_scale=4, train_wall=11, gb_free=2.8, wall=199620
2021-06-21 02:05:56 | INFO | train_inner | epoch 006: 2450 / 3002 loss=2.608, ppl=6.1, wps=5972.9, ups=0.09, wpb=64767, bsz=128, num_updates=17358, lr=9.98691e-05, gnorm=2.55, loss_scale=4, train_wall=10, gb_free=2.8, wall=199631
2021-06-21 02:06:08 | INFO | train_inner | epoch 006: 2451 / 3002 loss=2.461, ppl=5.51, wps=5814.6, ups=0.09, wpb=64853, bsz=128, num_updates=17359, lr=9.98691e-05, gnorm=1.992, loss_scale=4, train_wall=11, gb_free=2.8, wall=199642
2021-06-21 02:06:19 | INFO | train_inner | epoch 006: 2452 / 3002 loss=2.345, ppl=5.08, wps=5831, ups=0.09, wpb=64847, bsz=128, num_updates=17360, lr=9.98691e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=199653
2021-06-21 02:06:30 | INFO | train_inner | epoch 006: 2453 / 3002 loss=2.47, ppl=5.54, wps=5831.6, ups=0.09, wpb=64788, bsz=128, num_updates=17361, lr=9.98691e-05, gnorm=2.168, loss_scale=4, train_wall=11, gb_free=2.8, wall=199664
2021-06-21 02:06:41 | INFO | train_inner | epoch 006: 2454 / 3002 loss=2.5, ppl=5.66, wps=5763.4, ups=0.09, wpb=64829, bsz=128, num_updates=17362, lr=9.98691e-05, gnorm=2.005, loss_scale=4, train_wall=11, gb_free=2.8, wall=199675
2021-06-21 02:06:52 | INFO | train_inner | epoch 006: 2455 / 3002 loss=2.309, ppl=4.95, wps=5939.7, ups=0.09, wpb=64915, bsz=128, num_updates=17363, lr=9.98691e-05, gnorm=1.936, loss_scale=8, train_wall=10, gb_free=2.8, wall=199686
2021-06-21 02:07:03 | INFO | train_inner | epoch 006: 2456 / 3002 loss=2.537, ppl=5.81, wps=5885, ups=0.09, wpb=64823, bsz=128, num_updates=17364, lr=9.98691e-05, gnorm=1.894, loss_scale=8, train_wall=11, gb_free=2.8, wall=199697
2021-06-21 02:07:14 | INFO | train_inner | epoch 006: 2457 / 3002 loss=2.572, ppl=5.95, wps=6001.1, ups=0.09, wpb=64801, bsz=128, num_updates=17365, lr=9.98691e-05, gnorm=1.905, loss_scale=8, train_wall=10, gb_free=2.8, wall=199708
2021-06-21 02:07:25 | INFO | train_inner | epoch 006: 2458 / 3002 loss=2.478, ppl=5.57, wps=5987.3, ups=0.09, wpb=64847, bsz=128, num_updates=17366, lr=9.98691e-05, gnorm=1.935, loss_scale=8, train_wall=10, gb_free=2.8, wall=199719
2021-06-21 02:07:36 | INFO | train_inner | epoch 006: 2459 / 3002 loss=2.41, ppl=5.31, wps=5804.8, ups=0.09, wpb=64792, bsz=128, num_updates=17367, lr=9.98691e-05, gnorm=1.992, loss_scale=8, train_wall=11, gb_free=2.8, wall=199730
2021-06-21 02:07:47 | INFO | train_inner | epoch 006: 2460 / 3002 loss=2.526, ppl=5.76, wps=5976, ups=0.09, wpb=64879, bsz=128, num_updates=17368, lr=9.9869e-05, gnorm=1.908, loss_scale=8, train_wall=10, gb_free=2.8, wall=199741
2021-06-21 02:07:58 | INFO | train_inner | epoch 006: 2461 / 3002 loss=2.419, ppl=5.35, wps=5927.2, ups=0.09, wpb=64906, bsz=128, num_updates=17369, lr=9.9869e-05, gnorm=2.212, loss_scale=8, train_wall=10, gb_free=2.8, wall=199752
2021-06-21 02:08:09 | INFO | train_inner | epoch 006: 2462 / 3002 loss=2.424, ppl=5.37, wps=5869.9, ups=0.09, wpb=64757, bsz=128, num_updates=17370, lr=9.9869e-05, gnorm=2.044, loss_scale=8, train_wall=11, gb_free=2.8, wall=199763
2021-06-21 02:08:20 | INFO | train_inner | epoch 006: 2463 / 3002 loss=2.512, ppl=5.7, wps=5880.6, ups=0.09, wpb=64816, bsz=128, num_updates=17371, lr=9.9869e-05, gnorm=1.969, loss_scale=8, train_wall=11, gb_free=2.8, wall=199774
2021-06-21 02:08:30 | INFO | train_inner | epoch 006: 2464 / 3002 loss=2.422, ppl=5.36, wps=6036.5, ups=0.09, wpb=64832, bsz=128, num_updates=17372, lr=9.9869e-05, gnorm=1.987, loss_scale=8, train_wall=10, gb_free=2.8, wall=199785
2021-06-21 02:08:41 | INFO | train_inner | epoch 006: 2465 / 3002 loss=2.482, ppl=5.58, wps=5870, ups=0.09, wpb=64835, bsz=128, num_updates=17373, lr=9.9869e-05, gnorm=1.984, loss_scale=8, train_wall=11, gb_free=2.8, wall=199796
2021-06-21 02:08:52 | INFO | train_inner | epoch 006: 2466 / 3002 loss=2.536, ppl=5.8, wps=5852.8, ups=0.09, wpb=64782, bsz=128, num_updates=17374, lr=9.9869e-05, gnorm=1.988, loss_scale=8, train_wall=11, gb_free=2.8, wall=199807
2021-06-21 02:09:04 | INFO | train_inner | epoch 006: 2467 / 3002 loss=2.572, ppl=5.94, wps=5820.5, ups=0.09, wpb=64749, bsz=128, num_updates=17375, lr=9.9869e-05, gnorm=2.12, loss_scale=8, train_wall=11, gb_free=2.8, wall=199818
2021-06-21 02:09:14 | INFO | train_inner | epoch 006: 2468 / 3002 loss=2.411, ppl=5.32, wps=6061.9, ups=0.09, wpb=64868, bsz=128, num_updates=17376, lr=9.9869e-05, gnorm=1.946, loss_scale=8, train_wall=10, gb_free=2.8, wall=199829
2021-06-21 02:09:25 | INFO | train_inner | epoch 006: 2469 / 3002 loss=2.499, ppl=5.65, wps=5845.1, ups=0.09, wpb=64802, bsz=128, num_updates=17377, lr=9.9869e-05, gnorm=2.566, loss_scale=8, train_wall=11, gb_free=2.8, wall=199840
2021-06-21 02:09:36 | INFO | train_inner | epoch 006: 2470 / 3002 loss=2.596, ppl=6.05, wps=5953.7, ups=0.09, wpb=64838, bsz=128, num_updates=17378, lr=9.9869e-05, gnorm=2.914, loss_scale=8, train_wall=10, gb_free=2.8, wall=199851
2021-06-21 02:09:48 | INFO | train_inner | epoch 006: 2471 / 3002 loss=2.547, ppl=5.84, wps=5744.3, ups=0.09, wpb=64827, bsz=128, num_updates=17379, lr=9.9869e-05, gnorm=1.977, loss_scale=8, train_wall=11, gb_free=2.8, wall=199862
2021-06-21 02:09:59 | INFO | train_inner | epoch 006: 2472 / 3002 loss=2.522, ppl=5.74, wps=5793.2, ups=0.09, wpb=64928, bsz=128, num_updates=17380, lr=9.98689e-05, gnorm=1.963, loss_scale=8, train_wall=11, gb_free=2.8, wall=199873
2021-06-21 02:10:10 | INFO | train_inner | epoch 006: 2473 / 3002 loss=2.51, ppl=5.7, wps=5752.3, ups=0.09, wpb=64859, bsz=128, num_updates=17381, lr=9.98689e-05, gnorm=2.045, loss_scale=8, train_wall=11, gb_free=2.8, wall=199884
2021-06-21 02:10:21 | INFO | train_inner | epoch 006: 2474 / 3002 loss=2.441, ppl=5.43, wps=5848.4, ups=0.09, wpb=64773, bsz=128, num_updates=17382, lr=9.98689e-05, gnorm=2.021, loss_scale=8, train_wall=11, gb_free=2.8, wall=199895
2021-06-21 02:10:32 | INFO | train_inner | epoch 006: 2475 / 3002 loss=2.499, ppl=5.65, wps=5870.3, ups=0.09, wpb=64839, bsz=128, num_updates=17383, lr=9.98689e-05, gnorm=1.947, loss_scale=8, train_wall=11, gb_free=2.8, wall=199906
2021-06-21 02:10:43 | INFO | train_inner | epoch 006: 2476 / 3002 loss=2.523, ppl=5.75, wps=5824.9, ups=0.09, wpb=64772, bsz=128, num_updates=17384, lr=9.98689e-05, gnorm=2.176, loss_scale=8, train_wall=11, gb_free=2.8, wall=199918
2021-06-21 02:10:54 | INFO | train_inner | epoch 006: 2477 / 3002 loss=2.486, ppl=5.6, wps=5918.4, ups=0.09, wpb=64870, bsz=128, num_updates=17385, lr=9.98689e-05, gnorm=1.92, loss_scale=8, train_wall=10, gb_free=2.8, wall=199929
2021-06-21 02:11:05 | INFO | train_inner | epoch 006: 2478 / 3002 loss=2.404, ppl=5.29, wps=5830.5, ups=0.09, wpb=64888, bsz=128, num_updates=17386, lr=9.98689e-05, gnorm=1.932, loss_scale=8, train_wall=11, gb_free=2.8, wall=199940
2021-06-21 02:11:16 | INFO | train_inner | epoch 006: 2479 / 3002 loss=2.492, ppl=5.63, wps=5971.2, ups=0.09, wpb=64905, bsz=128, num_updates=17387, lr=9.98689e-05, gnorm=2.382, loss_scale=8, train_wall=10, gb_free=2.8, wall=199951
2021-06-21 02:11:27 | INFO | train_inner | epoch 006: 2480 / 3002 loss=2.498, ppl=5.65, wps=5811.3, ups=0.09, wpb=64879, bsz=128, num_updates=17388, lr=9.98689e-05, gnorm=1.939, loss_scale=8, train_wall=11, gb_free=2.8, wall=199962
2021-06-21 02:11:38 | INFO | train_inner | epoch 006: 2481 / 3002 loss=2.487, ppl=5.61, wps=5944.6, ups=0.09, wpb=64798, bsz=128, num_updates=17389, lr=9.98689e-05, gnorm=2.174, loss_scale=8, train_wall=10, gb_free=2.8, wall=199973
2021-06-21 02:11:49 | INFO | train_inner | epoch 006: 2482 / 3002 loss=2.325, ppl=5.01, wps=5821.9, ups=0.09, wpb=64830, bsz=128, num_updates=17390, lr=9.98689e-05, gnorm=1.978, loss_scale=8, train_wall=11, gb_free=2.8, wall=199984
2021-06-21 02:12:00 | INFO | train_inner | epoch 006: 2483 / 3002 loss=2.404, ppl=5.29, wps=5897.3, ups=0.09, wpb=64816, bsz=128, num_updates=17391, lr=9.98689e-05, gnorm=2.9, loss_scale=8, train_wall=11, gb_free=2.8, wall=199995
2021-06-21 02:12:11 | INFO | train_inner | epoch 006: 2484 / 3002 loss=2.417, ppl=5.34, wps=5866, ups=0.09, wpb=64866, bsz=128, num_updates=17392, lr=9.98689e-05, gnorm=2.549, loss_scale=8, train_wall=11, gb_free=2.8, wall=200006
2021-06-21 02:12:22 | INFO | train_inner | epoch 006: 2485 / 3002 loss=2.448, ppl=5.46, wps=5921, ups=0.09, wpb=64842, bsz=128, num_updates=17393, lr=9.98688e-05, gnorm=2.074, loss_scale=8, train_wall=11, gb_free=2.8, wall=200017
2021-06-21 02:12:33 | INFO | train_inner | epoch 006: 2486 / 3002 loss=2.411, ppl=5.32, wps=5863.8, ups=0.09, wpb=64794, bsz=128, num_updates=17394, lr=9.98688e-05, gnorm=1.987, loss_scale=8, train_wall=11, gb_free=2.8, wall=200028
2021-06-21 02:12:44 | INFO | train_inner | epoch 006: 2487 / 3002 loss=2.563, ppl=5.91, wps=5895.4, ups=0.09, wpb=64886, bsz=128, num_updates=17395, lr=9.98688e-05, gnorm=1.961, loss_scale=8, train_wall=11, gb_free=2.8, wall=200039
2021-06-21 02:12:56 | INFO | train_inner | epoch 006: 2488 / 3002 loss=2.632, ppl=6.2, wps=5854.7, ups=0.09, wpb=64825, bsz=128, num_updates=17396, lr=9.98688e-05, gnorm=2.267, loss_scale=8, train_wall=11, gb_free=2.8, wall=200050
2021-06-21 02:13:07 | INFO | train_inner | epoch 006: 2489 / 3002 loss=2.36, ppl=5.13, wps=5905.9, ups=0.09, wpb=64845, bsz=128, num_updates=17397, lr=9.98688e-05, gnorm=2.001, loss_scale=8, train_wall=11, gb_free=2.8, wall=200061
2021-06-21 02:13:18 | INFO | train_inner | epoch 006: 2490 / 3002 loss=2.456, ppl=5.49, wps=5795.7, ups=0.09, wpb=64826, bsz=128, num_updates=17398, lr=9.98688e-05, gnorm=4.037, loss_scale=8, train_wall=11, gb_free=2.8, wall=200072
2021-06-21 02:13:29 | INFO | train_inner | epoch 006: 2491 / 3002 loss=2.501, ppl=5.66, wps=5854.2, ups=0.09, wpb=64845, bsz=128, num_updates=17399, lr=9.98688e-05, gnorm=2.321, loss_scale=8, train_wall=11, gb_free=2.8, wall=200083
2021-06-21 02:13:40 | INFO | train_inner | epoch 006: 2492 / 3002 loss=2.519, ppl=5.73, wps=5880.7, ups=0.09, wpb=64833, bsz=128, num_updates=17400, lr=9.98688e-05, gnorm=2.285, loss_scale=8, train_wall=11, gb_free=2.8, wall=200094
2021-06-21 02:13:51 | INFO | train_inner | epoch 006: 2493 / 3002 loss=2.387, ppl=5.23, wps=5791.2, ups=0.09, wpb=64843, bsz=128, num_updates=17401, lr=9.98688e-05, gnorm=2.03, loss_scale=8, train_wall=11, gb_free=2.8, wall=200105
2021-06-21 02:14:02 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0
2021-06-21 02:14:13 | INFO | train_inner | epoch 006: 2495 / 3002 loss=2.295, ppl=4.91, wps=2925.5, ups=0.05, wpb=64826, bsz=128, num_updates=17402, lr=9.98688e-05, gnorm=2.034, loss_scale=4, train_wall=21, gb_free=2.8, wall=200128
2021-06-21 02:14:24 | INFO | train_inner | epoch 006: 2496 / 3002 loss=2.527, ppl=5.76, wps=5834.9, ups=0.09, wpb=64812, bsz=128, num_updates=17403, lr=9.98688e-05, gnorm=1.963, loss_scale=4, train_wall=11, gb_free=2.8, wall=200139
2021-06-21 02:14:35 | INFO | train_inner | epoch 006: 2497 / 3002 loss=2.422, ppl=5.36, wps=5849.7, ups=0.09, wpb=64869, bsz=128, num_updates=17404, lr=9.98688e-05, gnorm=2.998, loss_scale=4, train_wall=11, gb_free=2.8, wall=200150
2021-06-21 02:14:46 | INFO | train_inner | epoch 006: 2498 / 3002 loss=2.381, ppl=5.21, wps=5896.6, ups=0.09, wpb=64806, bsz=128, num_updates=17405, lr=9.98687e-05, gnorm=2.037, loss_scale=4, train_wall=11, gb_free=2.8, wall=200161
2021-06-21 02:14:57 | INFO | train_inner | epoch 006: 2499 / 3002 loss=2.387, ppl=5.23, wps=5873, ups=0.09, wpb=64852, bsz=128, num_updates=17406, lr=9.98687e-05, gnorm=1.958, loss_scale=4, train_wall=11, gb_free=2.8, wall=200172
2021-06-21 02:15:09 | INFO | train_inner | epoch 006: 2500 / 3002 loss=2.445, ppl=5.44, wps=5815.9, ups=0.09, wpb=64771, bsz=128, num_updates=17407, lr=9.98687e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=200183
2021-06-21 02:15:20 | INFO | train_inner | epoch 006: 2501 / 3002 loss=2.451, ppl=5.47, wps=5813.6, ups=0.09, wpb=64830, bsz=128, num_updates=17408, lr=9.98687e-05, gnorm=2.13, loss_scale=4, train_wall=11, gb_free=2.8, wall=200194
2021-06-21 02:15:31 | INFO | train_inner | epoch 006: 2502 / 3002 loss=2.509, ppl=5.69, wps=5830, ups=0.09, wpb=64836, bsz=128, num_updates=17409, lr=9.98687e-05, gnorm=1.934, loss_scale=4, train_wall=11, gb_free=2.8, wall=200205
2021-06-21 02:15:42 | INFO | train_inner | epoch 006: 2503 / 3002 loss=2.551, ppl=5.86, wps=5814.4, ups=0.09, wpb=64777, bsz=128, num_updates=17410, lr=9.98687e-05, gnorm=2.163, loss_scale=4, train_wall=11, gb_free=2.8, wall=200216
2021-06-21 02:15:53 | INFO | train_inner | epoch 006: 2504 / 3002 loss=2.452, ppl=5.47, wps=5888.4, ups=0.09, wpb=64915, bsz=128, num_updates=17411, lr=9.98687e-05, gnorm=1.92, loss_scale=4, train_wall=11, gb_free=2.8, wall=200227
2021-06-21 02:16:04 | INFO | train_inner | epoch 006: 2505 / 3002 loss=2.515, ppl=5.72, wps=5862.1, ups=0.09, wpb=64811, bsz=128, num_updates=17412, lr=9.98687e-05, gnorm=1.903, loss_scale=4, train_wall=11, gb_free=2.8, wall=200238
2021-06-21 02:16:15 | INFO | train_inner | epoch 006: 2506 / 3002 loss=2.434, ppl=5.4, wps=5823.2, ups=0.09, wpb=64838, bsz=128, num_updates=17413, lr=9.98687e-05, gnorm=2.063, loss_scale=4, train_wall=11, gb_free=2.8, wall=200250
2021-06-21 02:16:26 | INFO | train_inner | epoch 006: 2507 / 3002 loss=2.388, ppl=5.23, wps=5839.4, ups=0.09, wpb=64728, bsz=128, num_updates=17414, lr=9.98687e-05, gnorm=5.922, loss_scale=4, train_wall=11, gb_free=2.8, wall=200261
2021-06-21 02:16:37 | INFO | train_inner | epoch 006: 2508 / 3002 loss=2.451, ppl=5.47, wps=5933.8, ups=0.09, wpb=64894, bsz=128, num_updates=17415, lr=9.98687e-05, gnorm=2.017, loss_scale=4, train_wall=10, gb_free=2.8, wall=200272
2021-06-21 02:16:48 | INFO | train_inner | epoch 006: 2509 / 3002 loss=2.505, ppl=5.68, wps=5897.6, ups=0.09, wpb=64786, bsz=128, num_updates=17416, lr=9.98687e-05, gnorm=1.962, loss_scale=4, train_wall=11, gb_free=2.8, wall=200283
2021-06-21 02:16:59 | INFO | train_inner | epoch 006: 2510 / 3002 loss=2.597, ppl=6.05, wps=5819.5, ups=0.09, wpb=64863, bsz=128, num_updates=17417, lr=9.98687e-05, gnorm=1.955, loss_scale=4, train_wall=11, gb_free=2.8, wall=200294
2021-06-21 02:17:10 | INFO | train_inner | epoch 006: 2511 / 3002 loss=2.564, ppl=5.91, wps=5859.6, ups=0.09, wpb=64845, bsz=128, num_updates=17418, lr=9.98686e-05, gnorm=1.98, loss_scale=4, train_wall=11, gb_free=2.8, wall=200305
2021-06-21 02:17:22 | INFO | train_inner | epoch 006: 2512 / 3002 loss=2.474, ppl=5.55, wps=5779.3, ups=0.09, wpb=64825, bsz=128, num_updates=17419, lr=9.98686e-05, gnorm=10.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=200316
2021-06-21 02:17:33 | INFO | train_inner | epoch 006: 2513 / 3002 loss=2.566, ppl=5.92, wps=5803.1, ups=0.09, wpb=64825, bsz=128, num_updates=17420, lr=9.98686e-05, gnorm=1.932, loss_scale=4, train_wall=11, gb_free=2.8, wall=200327
2021-06-21 02:17:44 | INFO | train_inner | epoch 006: 2514 / 3002 loss=2.454, ppl=5.48, wps=5825.6, ups=0.09, wpb=64728, bsz=128, num_updates=17421, lr=9.98686e-05, gnorm=2.228, loss_scale=4, train_wall=11, gb_free=2.8, wall=200338
2021-06-21 02:17:55 | INFO | train_inner | epoch 006: 2515 / 3002 loss=2.462, ppl=5.51, wps=5842.5, ups=0.09, wpb=64834, bsz=128, num_updates=17422, lr=9.98686e-05, gnorm=3.636, loss_scale=4, train_wall=11, gb_free=2.8, wall=200349
2021-06-21 02:18:06 | INFO | train_inner | epoch 006: 2516 / 3002 loss=2.516, ppl=5.72, wps=5868.8, ups=0.09, wpb=64884, bsz=128, num_updates=17423, lr=9.98686e-05, gnorm=2.05, loss_scale=4, train_wall=11, gb_free=2.8, wall=200360
2021-06-21 02:18:17 | INFO | train_inner | epoch 006: 2517 / 3002 loss=2.631, ppl=6.2, wps=5873.3, ups=0.09, wpb=64846, bsz=128, num_updates=17424, lr=9.98686e-05, gnorm=2.276, loss_scale=4, train_wall=11, gb_free=2.8, wall=200371
2021-06-21 02:18:28 | INFO | train_inner | epoch 006: 2518 / 3002 loss=2.43, ppl=5.39, wps=5800.9, ups=0.09, wpb=64831, bsz=128, num_updates=17425, lr=9.98686e-05, gnorm=2.203, loss_scale=4, train_wall=11, gb_free=2.8, wall=200383
2021-06-21 02:18:39 | INFO | train_inner | epoch 006: 2519 / 3002 loss=2.427, ppl=5.38, wps=5873.3, ups=0.09, wpb=64899, bsz=128, num_updates=17426, lr=9.98686e-05, gnorm=2.078, loss_scale=4, train_wall=11, gb_free=2.8, wall=200394
2021-06-21 02:18:50 | INFO | train_inner | epoch 006: 2520 / 3002 loss=2.632, ppl=6.2, wps=5941.5, ups=0.09, wpb=64813, bsz=128, num_updates=17427, lr=9.98686e-05, gnorm=2.045, loss_scale=4, train_wall=10, gb_free=2.8, wall=200405
2021-06-21 02:19:01 | INFO | train_inner | epoch 006: 2521 / 3002 loss=2.465, ppl=5.52, wps=5795.8, ups=0.09, wpb=64877, bsz=128, num_updates=17428, lr=9.98686e-05, gnorm=1.965, loss_scale=4, train_wall=11, gb_free=2.8, wall=200416
2021-06-21 02:19:12 | INFO | train_inner | epoch 006: 2522 / 3002 loss=2.444, ppl=5.44, wps=5946.3, ups=0.09, wpb=64890, bsz=128, num_updates=17429, lr=9.98686e-05, gnorm=1.946, loss_scale=4, train_wall=10, gb_free=2.8, wall=200427
2021-06-21 02:19:23 | INFO | train_inner | epoch 006: 2523 / 3002 loss=2.497, ppl=5.64, wps=5878, ups=0.09, wpb=64753, bsz=128, num_updates=17430, lr=9.98685e-05, gnorm=1.964, loss_scale=4, train_wall=11, gb_free=2.8, wall=200438
2021-06-21 02:19:34 | INFO | train_inner | epoch 006: 2524 / 3002 loss=2.612, ppl=6.12, wps=5896.6, ups=0.09, wpb=64764, bsz=128, num_updates=17431, lr=9.98685e-05, gnorm=1.977, loss_scale=4, train_wall=11, gb_free=2.8, wall=200449
2021-06-21 02:19:46 | INFO | train_inner | epoch 006: 2525 / 3002 loss=2.392, ppl=5.25, wps=5749.3, ups=0.09, wpb=64763, bsz=128, num_updates=17432, lr=9.98685e-05, gnorm=2.046, loss_scale=4, train_wall=11, gb_free=2.8, wall=200460
2021-06-21 02:19:56 | INFO | train_inner | epoch 006: 2526 / 3002 loss=2.384, ppl=5.22, wps=5997.4, ups=0.09, wpb=64835, bsz=128, num_updates=17433, lr=9.98685e-05, gnorm=1.991, loss_scale=4, train_wall=10, gb_free=2.8, wall=200471
2021-06-21 02:20:07 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-21 02:20:18 | INFO | train_inner | epoch 006: 2528 / 3002 loss=2.437, ppl=5.42, wps=2937.4, ups=0.05, wpb=64812, bsz=128, num_updates=17434, lr=9.98685e-05, gnorm=1.944, loss_scale=2, train_wall=21, gb_free=2.8, wall=200493
2021-06-21 02:20:30 | INFO | train_inner | epoch 006: 2529 / 3002 loss=2.534, ppl=5.79, wps=5865.2, ups=0.09, wpb=64884, bsz=128, num_updates=17435, lr=9.98685e-05, gnorm=2.247, loss_scale=2, train_wall=11, gb_free=2.8, wall=200504
2021-06-21 02:20:40 | INFO | train_inner | epoch 006: 2530 / 3002 loss=2.464, ppl=5.52, wps=5926, ups=0.09, wpb=64792, bsz=128, num_updates=17436, lr=9.98685e-05, gnorm=2.032, loss_scale=2, train_wall=10, gb_free=2.8, wall=200515
2021-06-21 02:20:51 | INFO | train_inner | epoch 006: 2531 / 3002 loss=2.439, ppl=5.42, wps=5948.3, ups=0.09, wpb=64835, bsz=128, num_updates=17437, lr=9.98685e-05, gnorm=2.016, loss_scale=2, train_wall=10, gb_free=2.8, wall=200526
2021-06-21 02:21:02 | INFO | train_inner | epoch 006: 2532 / 3002 loss=2.572, ppl=5.94, wps=5857.3, ups=0.09, wpb=64838, bsz=128, num_updates=17438, lr=9.98685e-05, gnorm=2.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=200537
2021-06-21 02:21:13 | INFO | train_inner | epoch 006: 2533 / 3002 loss=2.517, ppl=5.72, wps=5907.2, ups=0.09, wpb=64848, bsz=128, num_updates=17439, lr=9.98685e-05, gnorm=2.01, loss_scale=2, train_wall=11, gb_free=2.8, wall=200548
2021-06-21 02:21:24 | INFO | train_inner | epoch 006: 2534 / 3002 loss=2.418, ppl=5.34, wps=5895.1, ups=0.09, wpb=64937, bsz=128, num_updates=17440, lr=9.98685e-05, gnorm=1.943, loss_scale=2, train_wall=11, gb_free=2.8, wall=200559
2021-06-21 02:21:35 | INFO | train_inner | epoch 006: 2535 / 3002 loss=2.48, ppl=5.58, wps=5937.6, ups=0.09, wpb=64826, bsz=128, num_updates=17441, lr=9.98685e-05, gnorm=2.012, loss_scale=2, train_wall=10, gb_free=2.8, wall=200570
2021-06-21 02:21:46 | INFO | train_inner | epoch 006: 2536 / 3002 loss=2.382, ppl=5.21, wps=5875.3, ups=0.09, wpb=64809, bsz=128, num_updates=17442, lr=9.98685e-05, gnorm=1.866, loss_scale=2, train_wall=11, gb_free=2.8, wall=200581
2021-06-21 02:21:57 | INFO | train_inner | epoch 006: 2537 / 3002 loss=2.629, ppl=6.19, wps=5857.3, ups=0.09, wpb=64875, bsz=128, num_updates=17443, lr=9.98684e-05, gnorm=1.961, loss_scale=2, train_wall=11, gb_free=2.8, wall=200592
2021-06-21 02:22:09 | INFO | train_inner | epoch 006: 2538 / 3002 loss=2.44, ppl=5.43, wps=5791.1, ups=0.09, wpb=64777, bsz=128, num_updates=17444, lr=9.98684e-05, gnorm=1.864, loss_scale=2, train_wall=11, gb_free=2.8, wall=200603
2021-06-21 02:22:20 | INFO | train_inner | epoch 006: 2539 / 3002 loss=2.421, ppl=5.36, wps=5815.7, ups=0.09, wpb=64849, bsz=128, num_updates=17445, lr=9.98684e-05, gnorm=2.056, loss_scale=2, train_wall=11, gb_free=2.8, wall=200614
2021-06-21 02:22:31 | INFO | train_inner | epoch 006: 2540 / 3002 loss=2.365, ppl=5.15, wps=5869.1, ups=0.09, wpb=64931, bsz=128, num_updates=17446, lr=9.98684e-05, gnorm=1.944, loss_scale=2, train_wall=11, gb_free=2.8, wall=200625
2021-06-21 02:22:42 | INFO | train_inner | epoch 006: 2541 / 3002 loss=2.334, ppl=5.04, wps=5828.8, ups=0.09, wpb=64889, bsz=128, num_updates=17447, lr=9.98684e-05, gnorm=1.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=200636
2021-06-21 02:22:53 | INFO | train_inner | epoch 006: 2542 / 3002 loss=2.449, ppl=5.46, wps=5878.6, ups=0.09, wpb=64871, bsz=128, num_updates=17448, lr=9.98684e-05, gnorm=4.393, loss_scale=2, train_wall=11, gb_free=2.8, wall=200647
2021-06-21 02:23:04 | INFO | train_inner | epoch 006: 2543 / 3002 loss=2.587, ppl=6.01, wps=5873.6, ups=0.09, wpb=64858, bsz=128, num_updates=17449, lr=9.98684e-05, gnorm=2.017, loss_scale=2, train_wall=11, gb_free=2.8, wall=200658
2021-06-21 02:23:15 | INFO | train_inner | epoch 006: 2544 / 3002 loss=2.569, ppl=5.93, wps=5983.4, ups=0.09, wpb=64848, bsz=128, num_updates=17450, lr=9.98684e-05, gnorm=2.337, loss_scale=2, train_wall=10, gb_free=2.8, wall=200669
2021-06-21 02:23:26 | INFO | train_inner | epoch 006: 2545 / 3002 loss=2.401, ppl=5.28, wps=5870.4, ups=0.09, wpb=64802, bsz=128, num_updates=17451, lr=9.98684e-05, gnorm=1.884, loss_scale=2, train_wall=11, gb_free=2.8, wall=200680
2021-06-21 02:23:37 | INFO | train_inner | epoch 006: 2546 / 3002 loss=2.496, ppl=5.64, wps=5812.7, ups=0.09, wpb=64858, bsz=128, num_updates=17452, lr=9.98684e-05, gnorm=2, loss_scale=2, train_wall=11, gb_free=2.8, wall=200691
2021-06-21 02:23:48 | INFO | train_inner | epoch 006: 2547 / 3002 loss=2.516, ppl=5.72, wps=5758.8, ups=0.09, wpb=64802, bsz=128, num_updates=17453, lr=9.98684e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=200703
2021-06-21 02:24:00 | INFO | train_inner | epoch 006: 2548 / 3002 loss=2.443, ppl=5.44, wps=5809, ups=0.09, wpb=64860, bsz=128, num_updates=17454, lr=9.98684e-05, gnorm=1.976, loss_scale=2, train_wall=11, gb_free=2.8, wall=200714
2021-06-21 02:24:11 | INFO | train_inner | epoch 006: 2549 / 3002 loss=2.499, ppl=5.65, wps=5760.4, ups=0.09, wpb=64812, bsz=128, num_updates=17455, lr=9.98683e-05, gnorm=1.937, loss_scale=2, train_wall=11, gb_free=2.8, wall=200725
2021-06-21 02:24:22 | INFO | train_inner | epoch 006: 2550 / 3002 loss=2.509, ppl=5.69, wps=5846.4, ups=0.09, wpb=64804, bsz=128, num_updates=17456, lr=9.98683e-05, gnorm=3.284, loss_scale=2, train_wall=11, gb_free=2.8, wall=200736
2021-06-21 02:24:33 | INFO | train_inner | epoch 006: 2551 / 3002 loss=2.415, ppl=5.33, wps=5857.9, ups=0.09, wpb=64846, bsz=128, num_updates=17457, lr=9.98683e-05, gnorm=1.927, loss_scale=2, train_wall=11, gb_free=2.8, wall=200747
2021-06-21 02:24:44 | INFO | train_inner | epoch 006: 2552 / 3002 loss=2.379, ppl=5.2, wps=5937.8, ups=0.09, wpb=64857, bsz=128, num_updates=17458, lr=9.98683e-05, gnorm=1.922, loss_scale=2, train_wall=10, gb_free=2.8, wall=200758
2021-06-21 02:24:55 | INFO | train_inner | epoch 006: 2553 / 3002 loss=2.497, ppl=5.64, wps=5914.3, ups=0.09, wpb=64821, bsz=128, num_updates=17459, lr=9.98683e-05, gnorm=2.038, loss_scale=2, train_wall=10, gb_free=2.8, wall=200769
2021-06-21 02:25:06 | INFO | train_inner | epoch 006: 2554 / 3002 loss=2.55, ppl=5.86, wps=5809.7, ups=0.09, wpb=64818, bsz=128, num_updates=17460, lr=9.98683e-05, gnorm=3.451, loss_scale=2, train_wall=11, gb_free=2.8, wall=200780
2021-06-21 02:25:17 | INFO | train_inner | epoch 006: 2555 / 3002 loss=2.364, ppl=5.15, wps=5968.3, ups=0.09, wpb=64842, bsz=128, num_updates=17461, lr=9.98683e-05, gnorm=1.983, loss_scale=2, train_wall=10, gb_free=2.8, wall=200791
2021-06-21 02:25:28 | INFO | train_inner | epoch 006: 2556 / 3002 loss=2.533, ppl=5.79, wps=5780.8, ups=0.09, wpb=64801, bsz=128, num_updates=17462, lr=9.98683e-05, gnorm=1.947, loss_scale=2, train_wall=11, gb_free=2.8, wall=200802
2021-06-21 02:25:39 | INFO | train_inner | epoch 006: 2557 / 3002 loss=2.309, ppl=4.96, wps=5905.9, ups=0.09, wpb=64902, bsz=128, num_updates=17463, lr=9.98683e-05, gnorm=1.949, loss_scale=2, train_wall=11, gb_free=2.8, wall=200813
2021-06-21 02:25:50 | INFO | train_inner | epoch 006: 2558 / 3002 loss=2.506, ppl=5.68, wps=5822.2, ups=0.09, wpb=64773, bsz=128, num_updates=17464, lr=9.98683e-05, gnorm=1.934, loss_scale=2, train_wall=11, gb_free=2.8, wall=200825
2021-06-21 02:26:01 | INFO | train_inner | epoch 006: 2559 / 3002 loss=2.624, ppl=6.17, wps=6100.9, ups=0.09, wpb=64847, bsz=128, num_updates=17465, lr=9.98683e-05, gnorm=1.961, loss_scale=2, train_wall=10, gb_free=2.8, wall=200835
2021-06-21 02:26:12 | INFO | train_inner | epoch 006: 2560 / 3002 loss=2.489, ppl=5.61, wps=5850.6, ups=0.09, wpb=64742, bsz=128, num_updates=17466, lr=9.98683e-05, gnorm=1.838, loss_scale=2, train_wall=11, gb_free=2.8, wall=200846
2021-06-21 02:26:23 | INFO | train_inner | epoch 006: 2561 / 3002 loss=2.448, ppl=5.46, wps=5918.2, ups=0.09, wpb=64812, bsz=128, num_updates=17467, lr=9.98683e-05, gnorm=1.948, loss_scale=2, train_wall=11, gb_free=2.8, wall=200857
2021-06-21 02:26:34 | INFO | train_inner | epoch 006: 2562 / 3002 loss=2.477, ppl=5.57, wps=5875.8, ups=0.09, wpb=64767, bsz=128, num_updates=17468, lr=9.98682e-05, gnorm=2.044, loss_scale=2, train_wall=11, gb_free=2.8, wall=200868
2021-06-21 02:26:45 | INFO | train_inner | epoch 006: 2563 / 3002 loss=2.397, ppl=5.27, wps=5849.2, ups=0.09, wpb=64843, bsz=128, num_updates=17469, lr=9.98682e-05, gnorm=2.04, loss_scale=2, train_wall=11, gb_free=2.8, wall=200879
2021-06-21 02:26:56 | INFO | train_inner | epoch 006: 2564 / 3002 loss=2.545, ppl=5.84, wps=5780.2, ups=0.09, wpb=64745, bsz=128, num_updates=17470, lr=9.98682e-05, gnorm=2.082, loss_scale=2, train_wall=11, gb_free=2.8, wall=200890
2021-06-21 02:27:07 | INFO | train_inner | epoch 006: 2565 / 3002 loss=2.481, ppl=5.58, wps=5813.1, ups=0.09, wpb=64750, bsz=128, num_updates=17471, lr=9.98682e-05, gnorm=11.151, loss_scale=2, train_wall=11, gb_free=2.8, wall=200902
2021-06-21 02:27:18 | INFO | train_inner | epoch 006: 2566 / 3002 loss=2.311, ppl=4.96, wps=5830.9, ups=0.09, wpb=64847, bsz=128, num_updates=17472, lr=9.98682e-05, gnorm=6.167, loss_scale=2, train_wall=11, gb_free=2.8, wall=200913
2021-06-21 02:27:29 | INFO | train_inner | epoch 006: 2567 / 3002 loss=2.449, ppl=5.46, wps=5903.5, ups=0.09, wpb=64719, bsz=128, num_updates=17473, lr=9.98682e-05, gnorm=1.947, loss_scale=2, train_wall=11, gb_free=2.8, wall=200924
2021-06-21 02:27:40 | INFO | train_inner | epoch 006: 2568 / 3002 loss=2.609, ppl=6.1, wps=5855, ups=0.09, wpb=64848, bsz=128, num_updates=17474, lr=9.98682e-05, gnorm=2.173, loss_scale=2, train_wall=11, gb_free=2.8, wall=200935
2021-06-21 02:27:51 | INFO | train_inner | epoch 006: 2569 / 3002 loss=2.361, ppl=5.14, wps=6115.6, ups=0.09, wpb=64874, bsz=128, num_updates=17475, lr=9.98682e-05, gnorm=2.008, loss_scale=2, train_wall=10, gb_free=2.8, wall=200945
2021-06-21 02:28:02 | INFO | train_inner | epoch 006: 2570 / 3002 loss=2.325, ppl=5.01, wps=5838.9, ups=0.09, wpb=64790, bsz=128, num_updates=17476, lr=9.98682e-05, gnorm=1.988, loss_scale=2, train_wall=11, gb_free=2.8, wall=200956
2021-06-21 02:28:13 | INFO | train_inner | epoch 006: 2571 / 3002 loss=2.487, ppl=5.6, wps=5851.2, ups=0.09, wpb=64784, bsz=128, num_updates=17477, lr=9.98682e-05, gnorm=2.094, loss_scale=2, train_wall=11, gb_free=2.8, wall=200968
2021-06-21 02:28:24 | INFO | train_inner | epoch 006: 2572 / 3002 loss=2.503, ppl=5.67, wps=5935.6, ups=0.09, wpb=64926, bsz=128, num_updates=17478, lr=9.98682e-05, gnorm=4.368, loss_scale=2, train_wall=10, gb_free=2.8, wall=200978
2021-06-21 02:28:35 | INFO | train_inner | epoch 006: 2573 / 3002 loss=2.299, ppl=4.92, wps=5820.4, ups=0.09, wpb=64904, bsz=128, num_updates=17479, lr=9.98682e-05, gnorm=1.923, loss_scale=2, train_wall=11, gb_free=2.8, wall=200990
2021-06-21 02:28:46 | INFO | train_inner | epoch 006: 2574 / 3002 loss=2.51, ppl=5.7, wps=5904.5, ups=0.09, wpb=64870, bsz=128, num_updates=17480, lr=9.98681e-05, gnorm=2.145, loss_scale=2, train_wall=11, gb_free=2.8, wall=201001
2021-06-21 02:28:57 | INFO | train_inner | epoch 006: 2575 / 3002 loss=2.602, ppl=6.07, wps=5870.7, ups=0.09, wpb=64841, bsz=128, num_updates=17481, lr=9.98681e-05, gnorm=2.136, loss_scale=2, train_wall=11, gb_free=2.8, wall=201012
2021-06-21 02:29:08 | INFO | train_inner | epoch 006: 2576 / 3002 loss=2.359, ppl=5.13, wps=5941.3, ups=0.09, wpb=64833, bsz=128, num_updates=17482, lr=9.98681e-05, gnorm=2.151, loss_scale=2, train_wall=10, gb_free=2.8, wall=201023
2021-06-21 02:29:19 | INFO | train_inner | epoch 006: 2577 / 3002 loss=2.38, ppl=5.2, wps=5848.8, ups=0.09, wpb=64780, bsz=128, num_updates=17483, lr=9.98681e-05, gnorm=2.002, loss_scale=2, train_wall=11, gb_free=2.8, wall=201034
2021-06-21 02:29:30 | INFO | train_inner | epoch 006: 2578 / 3002 loss=2.406, ppl=5.3, wps=5868.4, ups=0.09, wpb=64849, bsz=128, num_updates=17484, lr=9.98681e-05, gnorm=2.107, loss_scale=2, train_wall=11, gb_free=2.8, wall=201045
2021-06-21 02:29:42 | INFO | train_inner | epoch 006: 2579 / 3002 loss=2.424, ppl=5.37, wps=5771.8, ups=0.09, wpb=64840, bsz=128, num_updates=17485, lr=9.98681e-05, gnorm=2.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=201056
2021-06-21 02:29:53 | INFO | train_inner | epoch 006: 2580 / 3002 loss=2.515, ppl=5.72, wps=5929.3, ups=0.09, wpb=64835, bsz=128, num_updates=17486, lr=9.98681e-05, gnorm=2.015, loss_scale=2, train_wall=10, gb_free=2.8, wall=201067
2021-06-21 02:30:04 | INFO | train_inner | epoch 006: 2581 / 3002 loss=2.624, ppl=6.17, wps=5741.2, ups=0.09, wpb=64809, bsz=128, num_updates=17487, lr=9.98681e-05, gnorm=2.591, loss_scale=2, train_wall=11, gb_free=2.8, wall=201078
2021-06-21 02:30:15 | INFO | train_inner | epoch 006: 2582 / 3002 loss=2.635, ppl=6.21, wps=5981.2, ups=0.09, wpb=64857, bsz=128, num_updates=17488, lr=9.98681e-05, gnorm=5.219, loss_scale=2, train_wall=10, gb_free=2.8, wall=201089
2021-06-21 02:30:26 | INFO | train_inner | epoch 006: 2583 / 3002 loss=2.532, ppl=5.78, wps=5906.1, ups=0.09, wpb=64812, bsz=128, num_updates=17489, lr=9.98681e-05, gnorm=1.944, loss_scale=2, train_wall=11, gb_free=2.8, wall=201100
2021-06-21 02:30:37 | INFO | train_inner | epoch 006: 2584 / 3002 loss=2.547, ppl=5.84, wps=5881.9, ups=0.09, wpb=64887, bsz=128, num_updates=17490, lr=9.98681e-05, gnorm=2.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=201111
2021-06-21 02:30:48 | INFO | train_inner | epoch 006: 2585 / 3002 loss=2.555, ppl=5.87, wps=5879.2, ups=0.09, wpb=64871, bsz=128, num_updates=17491, lr=9.98681e-05, gnorm=2.019, loss_scale=2, train_wall=11, gb_free=2.8, wall=201122
2021-06-21 02:30:59 | INFO | train_inner | epoch 006: 2586 / 3002 loss=2.432, ppl=5.4, wps=5921.9, ups=0.09, wpb=64918, bsz=128, num_updates=17492, lr=9.98681e-05, gnorm=1.975, loss_scale=2, train_wall=10, gb_free=2.8, wall=201133
2021-06-21 02:31:10 | INFO | train_inner | epoch 006: 2587 / 3002 loss=2.381, ppl=5.21, wps=5824, ups=0.09, wpb=64879, bsz=128, num_updates=17493, lr=9.9868e-05, gnorm=2.14, loss_scale=2, train_wall=11, gb_free=2.8, wall=201144
2021-06-21 02:31:21 | INFO | train_inner | epoch 006: 2588 / 3002 loss=2.471, ppl=5.55, wps=5804.8, ups=0.09, wpb=64880, bsz=128, num_updates=17494, lr=9.9868e-05, gnorm=5.84, loss_scale=2, train_wall=11, gb_free=2.8, wall=201155
2021-06-21 02:31:32 | INFO | train_inner | epoch 006: 2589 / 3002 loss=2.481, ppl=5.58, wps=5758.3, ups=0.09, wpb=64858, bsz=128, num_updates=17495, lr=9.9868e-05, gnorm=2.484, loss_scale=2, train_wall=11, gb_free=2.8, wall=201167
2021-06-21 02:31:43 | INFO | train_inner | epoch 006: 2590 / 3002 loss=2.526, ppl=5.76, wps=5980.1, ups=0.09, wpb=64900, bsz=128, num_updates=17496, lr=9.9868e-05, gnorm=1.997, loss_scale=2, train_wall=10, gb_free=2.8, wall=201177
2021-06-21 02:31:54 | INFO | train_inner | epoch 006: 2591 / 3002 loss=2.675, ppl=6.38, wps=5951.5, ups=0.09, wpb=64804, bsz=128, num_updates=17497, lr=9.9868e-05, gnorm=3.496, loss_scale=2, train_wall=10, gb_free=2.8, wall=201188
2021-06-21 02:32:05 | INFO | train_inner | epoch 006: 2592 / 3002 loss=2.416, ppl=5.34, wps=5829.3, ups=0.09, wpb=64768, bsz=128, num_updates=17498, lr=9.9868e-05, gnorm=2.399, loss_scale=2, train_wall=11, gb_free=2.8, wall=201199
2021-06-21 02:32:16 | INFO | train_inner | epoch 006: 2593 / 3002 loss=2.469, ppl=5.54, wps=5921.8, ups=0.09, wpb=64804, bsz=128, num_updates=17499, lr=9.9868e-05, gnorm=2.67, loss_scale=2, train_wall=10, gb_free=2.8, wall=201210
2021-06-21 02:32:27 | INFO | train_inner | epoch 006: 2594 / 3002 loss=2.489, ppl=5.61, wps=5890.7, ups=0.09, wpb=64809, bsz=128, num_updates=17500, lr=9.9868e-05, gnorm=14.66, loss_scale=2, train_wall=11, gb_free=2.8, wall=201221
2021-06-21 02:32:38 | INFO | train_inner | epoch 006: 2595 / 3002 loss=2.636, ppl=6.22, wps=5910.7, ups=0.09, wpb=64833, bsz=128, num_updates=17501, lr=9.9868e-05, gnorm=1.977, loss_scale=2, train_wall=11, gb_free=2.8, wall=201232
2021-06-21 02:32:49 | INFO | train_inner | epoch 006: 2596 / 3002 loss=2.501, ppl=5.66, wps=5844.9, ups=0.09, wpb=64892, bsz=128, num_updates=17502, lr=9.9868e-05, gnorm=7.298, loss_scale=2, train_wall=11, gb_free=2.8, wall=201243
2021-06-21 02:33:00 | INFO | train_inner | epoch 006: 2597 / 3002 loss=2.538, ppl=5.81, wps=5946, ups=0.09, wpb=64674, bsz=128, num_updates=17503, lr=9.9868e-05, gnorm=2.509, loss_scale=2, train_wall=10, gb_free=2.8, wall=201254
2021-06-21 02:33:11 | INFO | train_inner | epoch 006: 2598 / 3002 loss=2.502, ppl=5.66, wps=5940.1, ups=0.09, wpb=64887, bsz=128, num_updates=17504, lr=9.9868e-05, gnorm=9.989, loss_scale=2, train_wall=10, gb_free=2.8, wall=201265
2021-06-21 02:33:22 | INFO | train_inner | epoch 006: 2599 / 3002 loss=2.502, ppl=5.66, wps=5889.8, ups=0.09, wpb=64775, bsz=128, num_updates=17505, lr=9.98679e-05, gnorm=2.523, loss_scale=2, train_wall=11, gb_free=2.8, wall=201276
2021-06-21 02:33:33 | INFO | train_inner | epoch 006: 2600 / 3002 loss=2.564, ppl=5.91, wps=5881.5, ups=0.09, wpb=64757, bsz=128, num_updates=17506, lr=9.98679e-05, gnorm=4.279, loss_scale=2, train_wall=11, gb_free=2.8, wall=201287
2021-06-21 02:33:44 | INFO | train_inner | epoch 006: 2601 / 3002 loss=2.471, ppl=5.54, wps=5881.8, ups=0.09, wpb=64904, bsz=128, num_updates=17507, lr=9.98679e-05, gnorm=2.379, loss_scale=2, train_wall=11, gb_free=2.8, wall=201298
2021-06-21 02:33:55 | INFO | train_inner | epoch 006: 2602 / 3002 loss=2.329, ppl=5.02, wps=5840.2, ups=0.09, wpb=64836, bsz=128, num_updates=17508, lr=9.98679e-05, gnorm=2.949, loss_scale=2, train_wall=11, gb_free=2.8, wall=201309
2021-06-21 02:34:06 | INFO | train_inner | epoch 006: 2603 / 3002 loss=2.59, ppl=6.02, wps=5801.3, ups=0.09, wpb=64803, bsz=128, num_updates=17509, lr=9.98679e-05, gnorm=2.334, loss_scale=2, train_wall=11, gb_free=2.8, wall=201321
2021-06-21 02:34:18 | INFO | train_inner | epoch 006: 2604 / 3002 loss=2.588, ppl=6.01, wps=5736.2, ups=0.09, wpb=64784, bsz=128, num_updates=17510, lr=9.98679e-05, gnorm=2.188, loss_scale=2, train_wall=11, gb_free=2.8, wall=201332
2021-06-21 02:34:28 | INFO | train_inner | epoch 006: 2605 / 3002 loss=2.454, ppl=5.48, wps=5957.2, ups=0.09, wpb=64892, bsz=128, num_updates=17511, lr=9.98679e-05, gnorm=2.997, loss_scale=2, train_wall=10, gb_free=2.8, wall=201343
2021-06-21 02:34:40 | INFO | train_inner | epoch 006: 2606 / 3002 loss=2.464, ppl=5.52, wps=5781.3, ups=0.09, wpb=64843, bsz=128, num_updates=17512, lr=9.98679e-05, gnorm=2.692, loss_scale=2, train_wall=11, gb_free=2.8, wall=201354
2021-06-21 02:34:51 | INFO | train_inner | epoch 006: 2607 / 3002 loss=2.391, ppl=5.24, wps=5823.2, ups=0.09, wpb=64816, bsz=128, num_updates=17513, lr=9.98679e-05, gnorm=11.301, loss_scale=2, train_wall=11, gb_free=2.8, wall=201365
2021-06-21 02:35:02 | INFO | train_inner | epoch 006: 2608 / 3002 loss=2.601, ppl=6.07, wps=5741.5, ups=0.09, wpb=64778, bsz=128, num_updates=17514, lr=9.98679e-05, gnorm=4.659, loss_scale=2, train_wall=11, gb_free=2.8, wall=201376
2021-06-21 02:35:13 | INFO | train_inner | epoch 006: 2609 / 3002 loss=2.488, ppl=5.61, wps=5753.6, ups=0.09, wpb=64764, bsz=128, num_updates=17515, lr=9.98679e-05, gnorm=2.216, loss_scale=2, train_wall=11, gb_free=2.8, wall=201388
2021-06-21 02:35:24 | INFO | train_inner | epoch 006: 2610 / 3002 loss=2.433, ppl=5.4, wps=5945.2, ups=0.09, wpb=64909, bsz=128, num_updates=17516, lr=9.98679e-05, gnorm=2.382, loss_scale=2, train_wall=10, gb_free=2.8, wall=201399
2021-06-21 02:35:35 | INFO | train_inner | epoch 006: 2611 / 3002 loss=2.446, ppl=5.45, wps=5883.8, ups=0.09, wpb=64827, bsz=128, num_updates=17517, lr=9.98679e-05, gnorm=2.092, loss_scale=2, train_wall=11, gb_free=2.8, wall=201410
2021-06-21 02:35:47 | INFO | train_inner | epoch 006: 2612 / 3002 loss=2.429, ppl=5.39, wps=5693.1, ups=0.09, wpb=64765, bsz=128, num_updates=17518, lr=9.98678e-05, gnorm=2.444, loss_scale=2, train_wall=11, gb_free=2.8, wall=201421
2021-06-21 02:35:58 | INFO | train_inner | epoch 006: 2613 / 3002 loss=2.346, ppl=5.09, wps=5834.5, ups=0.09, wpb=64808, bsz=128, num_updates=17519, lr=9.98678e-05, gnorm=2.21, loss_scale=2, train_wall=11, gb_free=2.8, wall=201432
2021-06-21 02:36:09 | INFO | train_inner | epoch 006: 2614 / 3002 loss=2.417, ppl=5.34, wps=5814.4, ups=0.09, wpb=64880, bsz=128, num_updates=17520, lr=9.98678e-05, gnorm=4.11, loss_scale=2, train_wall=11, gb_free=2.8, wall=201443
2021-06-21 02:36:20 | INFO | train_inner | epoch 006: 2615 / 3002 loss=2.516, ppl=5.72, wps=5906.8, ups=0.09, wpb=64908, bsz=128, num_updates=17521, lr=9.98678e-05, gnorm=2.793, loss_scale=2, train_wall=11, gb_free=2.8, wall=201454
2021-06-21 02:36:31 | INFO | train_inner | epoch 006: 2616 / 3002 loss=2.489, ppl=5.62, wps=5742.1, ups=0.09, wpb=64889, bsz=128, num_updates=17522, lr=9.98678e-05, gnorm=1.941, loss_scale=2, train_wall=11, gb_free=2.8, wall=201466
2021-06-21 02:36:42 | INFO | train_inner | epoch 006: 2617 / 3002 loss=2.502, ppl=5.67, wps=5928.7, ups=0.09, wpb=64860, bsz=128, num_updates=17523, lr=9.98678e-05, gnorm=2.074, loss_scale=2, train_wall=11, gb_free=2.8, wall=201476
2021-06-21 02:36:53 | INFO | train_inner | epoch 006: 2618 / 3002 loss=2.44, ppl=5.43, wps=5795.9, ups=0.09, wpb=64774, bsz=128, num_updates=17524, lr=9.98678e-05, gnorm=2.416, loss_scale=2, train_wall=11, gb_free=2.8, wall=201488
2021-06-21 02:37:04 | INFO | train_inner | epoch 006: 2619 / 3002 loss=2.402, ppl=5.29, wps=5888.7, ups=0.09, wpb=64859, bsz=128, num_updates=17525, lr=9.98678e-05, gnorm=5.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=201499
2021-06-21 02:37:15 | INFO | train_inner | epoch 006: 2620 / 3002 loss=2.599, ppl=6.06, wps=5795.5, ups=0.09, wpb=64731, bsz=128, num_updates=17526, lr=9.98678e-05, gnorm=2.492, loss_scale=2, train_wall=11, gb_free=2.8, wall=201510
2021-06-21 02:37:27 | INFO | train_inner | epoch 006: 2621 / 3002 loss=2.385, ppl=5.22, wps=5883.7, ups=0.09, wpb=64918, bsz=128, num_updates=17527, lr=9.98678e-05, gnorm=4.929, loss_scale=2, train_wall=11, gb_free=2.8, wall=201521
2021-06-21 02:37:38 | INFO | train_inner | epoch 006: 2622 / 3002 loss=2.493, ppl=5.63, wps=5824.9, ups=0.09, wpb=64780, bsz=128, num_updates=17528, lr=9.98678e-05, gnorm=2.256, loss_scale=2, train_wall=11, gb_free=2.8, wall=201532
2021-06-21 02:37:49 | INFO | train_inner | epoch 006: 2623 / 3002 loss=2.566, ppl=5.92, wps=5754.3, ups=0.09, wpb=64831, bsz=128, num_updates=17529, lr=9.98678e-05, gnorm=1.964, loss_scale=2, train_wall=11, gb_free=2.8, wall=201543
2021-06-21 02:38:00 | INFO | train_inner | epoch 006: 2624 / 3002 loss=2.528, ppl=5.77, wps=5888.7, ups=0.09, wpb=64924, bsz=128, num_updates=17530, lr=9.98677e-05, gnorm=2.686, loss_scale=2, train_wall=11, gb_free=2.8, wall=201554
2021-06-21 02:38:11 | INFO | train_inner | epoch 006: 2625 / 3002 loss=2.347, ppl=5.09, wps=5876.3, ups=0.09, wpb=64847, bsz=128, num_updates=17531, lr=9.98677e-05, gnorm=2.057, loss_scale=2, train_wall=11, gb_free=2.8, wall=201565
2021-06-21 02:38:22 | INFO | train_inner | epoch 006: 2626 / 3002 loss=2.602, ppl=6.07, wps=5907.3, ups=0.09, wpb=64817, bsz=128, num_updates=17532, lr=9.98677e-05, gnorm=2.55, loss_scale=2, train_wall=11, gb_free=2.8, wall=201576
2021-06-21 02:38:33 | INFO | train_inner | epoch 006: 2627 / 3002 loss=2.636, ppl=6.22, wps=6007.1, ups=0.09, wpb=64778, bsz=128, num_updates=17533, lr=9.98677e-05, gnorm=21.152, loss_scale=2, train_wall=10, gb_free=2.8, wall=201587
2021-06-21 02:38:44 | INFO | train_inner | epoch 006: 2628 / 3002 loss=2.512, ppl=5.7, wps=5798.8, ups=0.09, wpb=64785, bsz=128, num_updates=17534, lr=9.98677e-05, gnorm=7.077, loss_scale=2, train_wall=11, gb_free=2.8, wall=201598
2021-06-21 02:38:55 | INFO | train_inner | epoch 006: 2629 / 3002 loss=2.571, ppl=5.94, wps=5791.5, ups=0.09, wpb=64814, bsz=128, num_updates=17535, lr=9.98677e-05, gnorm=2.162, loss_scale=2, train_wall=11, gb_free=2.8, wall=201609
2021-06-21 02:39:06 | INFO | train_inner | epoch 006: 2630 / 3002 loss=2.491, ppl=5.62, wps=5839.4, ups=0.09, wpb=64839, bsz=128, num_updates=17536, lr=9.98677e-05, gnorm=2.081, loss_scale=2, train_wall=11, gb_free=2.8, wall=201621
2021-06-21 02:39:17 | INFO | train_inner | epoch 006: 2631 / 3002 loss=2.64, ppl=6.23, wps=5796, ups=0.09, wpb=64793, bsz=128, num_updates=17537, lr=9.98677e-05, gnorm=2.465, loss_scale=2, train_wall=11, gb_free=2.8, wall=201632
2021-06-21 02:39:28 | INFO | train_inner | epoch 006: 2632 / 3002 loss=2.59, ppl=6.02, wps=5832.6, ups=0.09, wpb=64761, bsz=128, num_updates=17538, lr=9.98677e-05, gnorm=9.498, loss_scale=2, train_wall=11, gb_free=2.8, wall=201643
2021-06-21 02:39:39 | INFO | train_inner | epoch 006: 2633 / 3002 loss=2.447, ppl=5.45, wps=5934.2, ups=0.09, wpb=64781, bsz=128, num_updates=17539, lr=9.98677e-05, gnorm=2.833, loss_scale=2, train_wall=10, gb_free=2.8, wall=201654
2021-06-21 02:39:50 | INFO | train_inner | epoch 006: 2634 / 3002 loss=2.514, ppl=5.71, wps=5856.6, ups=0.09, wpb=64812, bsz=128, num_updates=17540, lr=9.98677e-05, gnorm=3.543, loss_scale=2, train_wall=11, gb_free=2.8, wall=201665
2021-06-21 02:40:02 | INFO | train_inner | epoch 006: 2635 / 3002 loss=2.611, ppl=6.11, wps=5747.6, ups=0.09, wpb=64722, bsz=128, num_updates=17541, lr=9.98677e-05, gnorm=2.182, loss_scale=2, train_wall=11, gb_free=2.8, wall=201676
2021-06-21 02:40:13 | INFO | train_inner | epoch 006: 2636 / 3002 loss=2.46, ppl=5.5, wps=5918.2, ups=0.09, wpb=64873, bsz=128, num_updates=17542, lr=9.98677e-05, gnorm=3.064, loss_scale=2, train_wall=11, gb_free=2.8, wall=201687
2021-06-21 02:40:24 | INFO | train_inner | epoch 006: 2637 / 3002 loss=2.407, ppl=5.3, wps=5801, ups=0.09, wpb=64839, bsz=128, num_updates=17543, lr=9.98676e-05, gnorm=3.312, loss_scale=2, train_wall=11, gb_free=2.8, wall=201698
2021-06-21 02:40:35 | INFO | train_inner | epoch 006: 2638 / 3002 loss=2.32, ppl=4.99, wps=5905.6, ups=0.09, wpb=64925, bsz=128, num_updates=17544, lr=9.98676e-05, gnorm=1.991, loss_scale=2, train_wall=11, gb_free=2.8, wall=201709
2021-06-21 02:40:46 | INFO | train_inner | epoch 006: 2639 / 3002 loss=2.425, ppl=5.37, wps=5913.3, ups=0.09, wpb=64881, bsz=128, num_updates=17545, lr=9.98676e-05, gnorm=2.21, loss_scale=2, train_wall=10, gb_free=2.8, wall=201720
2021-06-21 02:40:57 | INFO | train_inner | epoch 006: 2640 / 3002 loss=2.486, ppl=5.6, wps=5734.3, ups=0.09, wpb=64862, bsz=128, num_updates=17546, lr=9.98676e-05, gnorm=25.834, loss_scale=2, train_wall=11, gb_free=2.8, wall=201731
2021-06-21 02:41:08 | INFO | train_inner | epoch 006: 2641 / 3002 loss=2.612, ppl=6.11, wps=5894.6, ups=0.09, wpb=64855, bsz=128, num_updates=17547, lr=9.98676e-05, gnorm=22.968, loss_scale=2, train_wall=11, gb_free=2.8, wall=201742
2021-06-21 02:41:19 | INFO | train_inner | epoch 006: 2642 / 3002 loss=2.549, ppl=5.85, wps=5871.1, ups=0.09, wpb=64835, bsz=128, num_updates=17548, lr=9.98676e-05, gnorm=2.43, loss_scale=2, train_wall=11, gb_free=2.8, wall=201754
2021-06-21 02:41:30 | INFO | train_inner | epoch 006: 2643 / 3002 loss=2.583, ppl=5.99, wps=5799.6, ups=0.09, wpb=64792, bsz=128, num_updates=17549, lr=9.98676e-05, gnorm=2.335, loss_scale=2, train_wall=11, gb_free=2.8, wall=201765
2021-06-21 02:41:42 | INFO | train_inner | epoch 006: 2644 / 3002 loss=2.378, ppl=5.2, wps=5773.4, ups=0.09, wpb=64806, bsz=128, num_updates=17550, lr=9.98676e-05, gnorm=7.509, loss_scale=2, train_wall=11, gb_free=2.8, wall=201776
2021-06-21 02:41:53 | INFO | train_inner | epoch 006: 2645 / 3002 loss=2.491, ppl=5.62, wps=5916.7, ups=0.09, wpb=64762, bsz=128, num_updates=17551, lr=9.98676e-05, gnorm=4.091, loss_scale=2, train_wall=10, gb_free=2.8, wall=201787
2021-06-21 02:42:04 | INFO | train_inner | epoch 006: 2646 / 3002 loss=2.52, ppl=5.74, wps=5794.4, ups=0.09, wpb=64873, bsz=128, num_updates=17552, lr=9.98676e-05, gnorm=6.231, loss_scale=2, train_wall=11, gb_free=2.8, wall=201798
2021-06-21 02:42:15 | INFO | train_inner | epoch 006: 2647 / 3002 loss=2.518, ppl=5.73, wps=5776, ups=0.09, wpb=64798, bsz=128, num_updates=17553, lr=9.98676e-05, gnorm=2.308, loss_scale=2, train_wall=11, gb_free=2.8, wall=201809
2021-06-21 02:42:26 | INFO | train_inner | epoch 006: 2648 / 3002 loss=2.324, ppl=5.01, wps=5732.1, ups=0.09, wpb=64842, bsz=128, num_updates=17554, lr=9.98676e-05, gnorm=3.176, loss_scale=2, train_wall=11, gb_free=2.8, wall=201821
2021-06-21 02:42:37 | INFO | train_inner | epoch 006: 2649 / 3002 loss=2.495, ppl=5.64, wps=5843.1, ups=0.09, wpb=64888, bsz=128, num_updates=17555, lr=9.98675e-05, gnorm=2.288, loss_scale=2, train_wall=11, gb_free=2.8, wall=201832
2021-06-21 02:42:49 | INFO | train_inner | epoch 006: 2650 / 3002 loss=2.446, ppl=5.45, wps=5800.7, ups=0.09, wpb=64804, bsz=128, num_updates=17556, lr=9.98675e-05, gnorm=2.89, loss_scale=2, train_wall=11, gb_free=2.8, wall=201843
2021-06-21 02:43:00 | INFO | train_inner | epoch 006: 2651 / 3002 loss=2.609, ppl=6.1, wps=5823, ups=0.09, wpb=64814, bsz=128, num_updates=17557, lr=9.98675e-05, gnorm=3.937, loss_scale=2, train_wall=11, gb_free=2.8, wall=201854
2021-06-21 02:43:11 | INFO | train_inner | epoch 006: 2652 / 3002 loss=2.563, ppl=5.91, wps=5820.3, ups=0.09, wpb=64859, bsz=128, num_updates=17558, lr=9.98675e-05, gnorm=4.498, loss_scale=2, train_wall=11, gb_free=2.8, wall=201865
2021-06-21 02:43:22 | INFO | train_inner | epoch 006: 2653 / 3002 loss=2.548, ppl=5.85, wps=5823.3, ups=0.09, wpb=64897, bsz=128, num_updates=17559, lr=9.98675e-05, gnorm=2.604, loss_scale=2, train_wall=11, gb_free=2.8, wall=201876
2021-06-21 02:43:33 | INFO | train_inner | epoch 006: 2654 / 3002 loss=2.513, ppl=5.71, wps=5844.8, ups=0.09, wpb=64834, bsz=128, num_updates=17560, lr=9.98675e-05, gnorm=2.352, loss_scale=2, train_wall=11, gb_free=2.8, wall=201887
2021-06-21 02:43:44 | INFO | train_inner | epoch 006: 2655 / 3002 loss=2.566, ppl=5.92, wps=5779.9, ups=0.09, wpb=64845, bsz=128, num_updates=17561, lr=9.98675e-05, gnorm=4.874, loss_scale=4, train_wall=11, gb_free=2.8, wall=201899
2021-06-21 02:43:55 | INFO | train_inner | epoch 006: 2656 / 3002 loss=2.503, ppl=5.67, wps=5846.3, ups=0.09, wpb=64859, bsz=128, num_updates=17562, lr=9.98675e-05, gnorm=2.412, loss_scale=4, train_wall=11, gb_free=2.8, wall=201910
2021-06-21 02:44:06 | INFO | train_inner | epoch 006: 2657 / 3002 loss=2.562, ppl=5.91, wps=5823.9, ups=0.09, wpb=64779, bsz=128, num_updates=17563, lr=9.98675e-05, gnorm=2.419, loss_scale=4, train_wall=11, gb_free=2.8, wall=201921
2021-06-21 02:44:18 | INFO | train_inner | epoch 006: 2658 / 3002 loss=2.519, ppl=5.73, wps=5875.8, ups=0.09, wpb=64830, bsz=128, num_updates=17564, lr=9.98675e-05, gnorm=2.341, loss_scale=4, train_wall=11, gb_free=2.8, wall=201932
2021-06-21 02:44:28 | INFO | train_inner | epoch 006: 2659 / 3002 loss=2.325, ppl=5.01, wps=5928.3, ups=0.09, wpb=64879, bsz=128, num_updates=17565, lr=9.98675e-05, gnorm=2.194, loss_scale=4, train_wall=10, gb_free=2.8, wall=201943
2021-06-21 02:44:39 | INFO | train_inner | epoch 006: 2660 / 3002 loss=2.475, ppl=5.56, wps=5941, ups=0.09, wpb=64789, bsz=128, num_updates=17566, lr=9.98675e-05, gnorm=2.316, loss_scale=4, train_wall=10, gb_free=2.8, wall=201954
2021-06-21 02:44:51 | INFO | train_inner | epoch 006: 2661 / 3002 loss=2.617, ppl=6.13, wps=5784.5, ups=0.09, wpb=64800, bsz=128, num_updates=17567, lr=9.98675e-05, gnorm=2.894, loss_scale=4, train_wall=11, gb_free=2.8, wall=201965
2021-06-21 02:45:02 | INFO | train_inner | epoch 006: 2662 / 3002 loss=2.533, ppl=5.79, wps=5840, ups=0.09, wpb=64737, bsz=128, num_updates=17568, lr=9.98674e-05, gnorm=2.342, loss_scale=4, train_wall=11, gb_free=2.8, wall=201976
2021-06-21 02:45:13 | INFO | train_inner | epoch 006: 2663 / 3002 loss=2.618, ppl=6.14, wps=5896.3, ups=0.09, wpb=64805, bsz=128, num_updates=17569, lr=9.98674e-05, gnorm=2.116, loss_scale=4, train_wall=11, gb_free=2.8, wall=201987
2021-06-21 02:45:24 | INFO | train_inner | epoch 006: 2664 / 3002 loss=2.626, ppl=6.17, wps=5815.7, ups=0.09, wpb=64803, bsz=128, num_updates=17570, lr=9.98674e-05, gnorm=2.242, loss_scale=4, train_wall=11, gb_free=2.8, wall=201998
2021-06-21 02:45:35 | INFO | train_inner | epoch 006: 2665 / 3002 loss=2.583, ppl=5.99, wps=5857.5, ups=0.09, wpb=64847, bsz=128, num_updates=17571, lr=9.98674e-05, gnorm=2.768, loss_scale=4, train_wall=11, gb_free=2.8, wall=202009
2021-06-21 02:45:46 | INFO | train_inner | epoch 006: 2666 / 3002 loss=2.582, ppl=5.99, wps=5954.4, ups=0.09, wpb=64747, bsz=128, num_updates=17572, lr=9.98674e-05, gnorm=2.141, loss_scale=4, train_wall=10, gb_free=2.8, wall=202020
2021-06-21 02:45:57 | INFO | train_inner | epoch 006: 2667 / 3002 loss=2.483, ppl=5.59, wps=5951.1, ups=0.09, wpb=64843, bsz=128, num_updates=17573, lr=9.98674e-05, gnorm=4.99, loss_scale=4, train_wall=10, gb_free=2.8, wall=202031
2021-06-21 02:46:08 | INFO | train_inner | epoch 006: 2668 / 3002 loss=2.57, ppl=5.94, wps=5909.9, ups=0.09, wpb=64915, bsz=128, num_updates=17574, lr=9.98674e-05, gnorm=2.051, loss_scale=4, train_wall=11, gb_free=2.8, wall=202042
2021-06-21 02:46:19 | INFO | train_inner | epoch 006: 2669 / 3002 loss=2.577, ppl=5.97, wps=5805.1, ups=0.09, wpb=64833, bsz=128, num_updates=17575, lr=9.98674e-05, gnorm=2.307, loss_scale=4, train_wall=11, gb_free=2.8, wall=202053
2021-06-21 02:46:30 | INFO | train_inner | epoch 006: 2670 / 3002 loss=2.452, ppl=5.47, wps=5804.3, ups=0.09, wpb=64826, bsz=128, num_updates=17576, lr=9.98674e-05, gnorm=2.077, loss_scale=4, train_wall=11, gb_free=2.8, wall=202064
2021-06-21 02:46:41 | INFO | train_inner | epoch 006: 2671 / 3002 loss=2.368, ppl=5.16, wps=5718.2, ups=0.09, wpb=64905, bsz=128, num_updates=17577, lr=9.98674e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=202076
2021-06-21 02:46:53 | INFO | train_inner | epoch 006: 2672 / 3002 loss=2.608, ppl=6.1, wps=5768.4, ups=0.09, wpb=64907, bsz=128, num_updates=17578, lr=9.98674e-05, gnorm=2.155, loss_scale=4, train_wall=11, gb_free=2.8, wall=202087
2021-06-21 02:47:03 | INFO | train_inner | epoch 006: 2673 / 3002 loss=2.47, ppl=5.54, wps=5967.2, ups=0.09, wpb=64865, bsz=128, num_updates=17579, lr=9.98674e-05, gnorm=3.094, loss_scale=4, train_wall=10, gb_free=2.8, wall=202098
2021-06-21 02:47:15 | INFO | train_inner | epoch 006: 2674 / 3002 loss=2.576, ppl=5.96, wps=5848, ups=0.09, wpb=64815, bsz=128, num_updates=17580, lr=9.98673e-05, gnorm=2.194, loss_scale=4, train_wall=11, gb_free=2.8, wall=202109
2021-06-21 02:47:26 | INFO | train_inner | epoch 006: 2675 / 3002 loss=2.51, ppl=5.7, wps=5865.4, ups=0.09, wpb=64803, bsz=128, num_updates=17581, lr=9.98673e-05, gnorm=2.283, loss_scale=4, train_wall=11, gb_free=2.8, wall=202120
2021-06-21 02:47:37 | INFO | train_inner | epoch 006: 2676 / 3002 loss=2.534, ppl=5.79, wps=5850.6, ups=0.09, wpb=64802, bsz=128, num_updates=17582, lr=9.98673e-05, gnorm=3.584, loss_scale=4, train_wall=11, gb_free=2.8, wall=202131
2021-06-21 02:47:48 | INFO | train_inner | epoch 006: 2677 / 3002 loss=2.491, ppl=5.62, wps=5885.4, ups=0.09, wpb=64815, bsz=128, num_updates=17583, lr=9.98673e-05, gnorm=2.099, loss_scale=4, train_wall=11, gb_free=2.8, wall=202142
2021-06-21 02:47:59 | INFO | train_inner | epoch 006: 2678 / 3002 loss=2.471, ppl=5.54, wps=5973.9, ups=0.09, wpb=64839, bsz=128, num_updates=17584, lr=9.98673e-05, gnorm=2.106, loss_scale=4, train_wall=10, gb_free=2.8, wall=202153
2021-06-21 02:48:10 | INFO | train_inner | epoch 006: 2679 / 3002 loss=2.393, ppl=5.25, wps=5863, ups=0.09, wpb=64802, bsz=128, num_updates=17585, lr=9.98673e-05, gnorm=4.153, loss_scale=4, train_wall=11, gb_free=2.8, wall=202164
2021-06-21 02:48:21 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
2021-06-21 02:48:31 | INFO | train_inner | epoch 006: 2681 / 3002 loss=2.433, ppl=5.4, wps=2964.5, ups=0.05, wpb=64819, bsz=128, num_updates=17586, lr=9.98673e-05, gnorm=2.385, loss_scale=2, train_wall=21, gb_free=2.8, wall=202186
2021-06-21 02:48:42 | INFO | train_inner | epoch 006: 2682 / 3002 loss=2.396, ppl=5.26, wps=5952.6, ups=0.09, wpb=64762, bsz=128, num_updates=17587, lr=9.98673e-05, gnorm=2.031, loss_scale=2, train_wall=10, gb_free=2.8, wall=202197
2021-06-21 02:48:53 | INFO | train_inner | epoch 006: 2683 / 3002 loss=2.404, ppl=5.29, wps=5843.6, ups=0.09, wpb=64768, bsz=128, num_updates=17588, lr=9.98673e-05, gnorm=2.54, loss_scale=2, train_wall=11, gb_free=2.8, wall=202208
2021-06-21 02:49:04 | INFO | train_inner | epoch 006: 2684 / 3002 loss=2.468, ppl=5.53, wps=5869.7, ups=0.09, wpb=64802, bsz=128, num_updates=17589, lr=9.98673e-05, gnorm=2.583, loss_scale=2, train_wall=11, gb_free=2.8, wall=202219
2021-06-21 02:49:15 | INFO | train_inner | epoch 006: 2685 / 3002 loss=2.491, ppl=5.62, wps=5898, ups=0.09, wpb=64841, bsz=128, num_updates=17590, lr=9.98673e-05, gnorm=2.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=202230
2021-06-21 02:49:27 | INFO | train_inner | epoch 006: 2686 / 3002 loss=2.399, ppl=5.27, wps=5777.9, ups=0.09, wpb=64805, bsz=128, num_updates=17591, lr=9.98673e-05, gnorm=2.116, loss_scale=2, train_wall=11, gb_free=2.8, wall=202241
2021-06-21 02:49:38 | INFO | train_inner | epoch 006: 2687 / 3002 loss=2.543, ppl=5.83, wps=5753, ups=0.09, wpb=64827, bsz=128, num_updates=17592, lr=9.98673e-05, gnorm=13.96, loss_scale=2, train_wall=11, gb_free=2.8, wall=202252
2021-06-21 02:49:49 | INFO | train_inner | epoch 006: 2688 / 3002 loss=2.457, ppl=5.49, wps=5978.1, ups=0.09, wpb=64879, bsz=128, num_updates=17593, lr=9.98672e-05, gnorm=9.345, loss_scale=2, train_wall=10, gb_free=2.8, wall=202263
2021-06-21 02:50:00 | INFO | train_inner | epoch 006: 2689 / 3002 loss=2.605, ppl=6.09, wps=5788.9, ups=0.09, wpb=64764, bsz=128, num_updates=17594, lr=9.98672e-05, gnorm=2.196, loss_scale=2, train_wall=11, gb_free=2.8, wall=202274
2021-06-21 02:50:11 | INFO | train_inner | epoch 006: 2690 / 3002 loss=2.545, ppl=5.84, wps=5927.8, ups=0.09, wpb=64760, bsz=128, num_updates=17595, lr=9.98672e-05, gnorm=2.745, loss_scale=2, train_wall=10, gb_free=2.8, wall=202285
2021-06-21 02:50:22 | INFO | train_inner | epoch 006: 2691 / 3002 loss=2.365, ppl=5.15, wps=5926.3, ups=0.09, wpb=64883, bsz=128, num_updates=17596, lr=9.98672e-05, gnorm=4.403, loss_scale=2, train_wall=10, gb_free=2.8, wall=202296
2021-06-21 02:50:33 | INFO | train_inner | epoch 006: 2692 / 3002 loss=2.602, ppl=6.07, wps=5817.4, ups=0.09, wpb=64844, bsz=128, num_updates=17597, lr=9.98672e-05, gnorm=3.03, loss_scale=2, train_wall=11, gb_free=2.8, wall=202307
2021-06-21 02:50:44 | INFO | train_inner | epoch 006: 2693 / 3002 loss=2.472, ppl=5.55, wps=5889.9, ups=0.09, wpb=64876, bsz=128, num_updates=17598, lr=9.98672e-05, gnorm=2.208, loss_scale=2, train_wall=11, gb_free=2.8, wall=202318
2021-06-21 02:50:55 | INFO | train_inner | epoch 006: 2694 / 3002 loss=2.563, ppl=5.91, wps=5907.8, ups=0.09, wpb=64872, bsz=128, num_updates=17599, lr=9.98672e-05, gnorm=4.899, loss_scale=2, train_wall=11, gb_free=2.8, wall=202329
2021-06-21 02:51:06 | INFO | train_inner | epoch 006: 2695 / 3002 loss=2.613, ppl=6.12, wps=5814.1, ups=0.09, wpb=64801, bsz=128, num_updates=17600, lr=9.98672e-05, gnorm=2.395, loss_scale=2, train_wall=11, gb_free=2.8, wall=202340
2021-06-21 02:51:17 | INFO | train_inner | epoch 006: 2696 / 3002 loss=2.462, ppl=5.51, wps=5940.8, ups=0.09, wpb=64862, bsz=128, num_updates=17601, lr=9.98672e-05, gnorm=2.356, loss_scale=2, train_wall=10, gb_free=2.8, wall=202351
2021-06-21 02:51:28 | INFO | train_inner | epoch 006: 2697 / 3002 loss=2.588, ppl=6.01, wps=5911, ups=0.09, wpb=64884, bsz=128, num_updates=17602, lr=9.98672e-05, gnorm=2.339, loss_scale=2, train_wall=11, gb_free=2.8, wall=202362
2021-06-21 02:51:39 | INFO | train_inner | epoch 006: 2698 / 3002 loss=2.685, ppl=6.43, wps=5689, ups=0.09, wpb=64864, bsz=128, num_updates=17603, lr=9.98672e-05, gnorm=4.553, loss_scale=2, train_wall=11, gb_free=2.8, wall=202374
2021-06-21 02:51:50 | INFO | train_inner | epoch 006: 2699 / 3002 loss=2.536, ppl=5.8, wps=5864.7, ups=0.09, wpb=64873, bsz=128, num_updates=17604, lr=9.98672e-05, gnorm=2.323, loss_scale=2, train_wall=11, gb_free=2.8, wall=202385
2021-06-21 02:52:02 | INFO | train_inner | epoch 006: 2700 / 3002 loss=2.671, ppl=6.37, wps=5844.2, ups=0.09, wpb=64733, bsz=128, num_updates=17605, lr=9.98671e-05, gnorm=2.743, loss_scale=2, train_wall=11, gb_free=2.8, wall=202396
2021-06-21 02:52:13 | INFO | train_inner | epoch 006: 2701 / 3002 loss=2.52, ppl=5.74, wps=5897.7, ups=0.09, wpb=64869, bsz=128, num_updates=17606, lr=9.98671e-05, gnorm=3.276, loss_scale=2, train_wall=11, gb_free=2.8, wall=202407
2021-06-21 02:52:24 | INFO | train_inner | epoch 006: 2702 / 3002 loss=2.548, ppl=5.85, wps=5813.4, ups=0.09, wpb=64815, bsz=128, num_updates=17607, lr=9.98671e-05, gnorm=4.147, loss_scale=2, train_wall=11, gb_free=2.8, wall=202418
2021-06-21 02:52:35 | INFO | train_inner | epoch 006: 2703 / 3002 loss=2.483, ppl=5.59, wps=5937.2, ups=0.09, wpb=64848, bsz=128, num_updates=17608, lr=9.98671e-05, gnorm=7.738, loss_scale=2, train_wall=10, gb_free=2.8, wall=202429
2021-06-21 02:52:46 | INFO | train_inner | epoch 006: 2704 / 3002 loss=2.504, ppl=5.67, wps=5782.9, ups=0.09, wpb=64759, bsz=128, num_updates=17609, lr=9.98671e-05, gnorm=4.388, loss_scale=2, train_wall=11, gb_free=2.8, wall=202440
2021-06-21 02:52:57 | INFO | train_inner | epoch 006: 2705 / 3002 loss=2.467, ppl=5.53, wps=5869.2, ups=0.09, wpb=64730, bsz=128, num_updates=17610, lr=9.98671e-05, gnorm=4.575, loss_scale=2, train_wall=11, gb_free=2.8, wall=202451
2021-06-21 02:53:08 | INFO | train_inner | epoch 006: 2706 / 3002 loss=2.441, ppl=5.43, wps=5933.6, ups=0.09, wpb=64761, bsz=128, num_updates=17611, lr=9.98671e-05, gnorm=2.056, loss_scale=2, train_wall=10, gb_free=2.8, wall=202462
2021-06-21 02:53:19 | INFO | train_inner | epoch 006: 2707 / 3002 loss=2.436, ppl=5.41, wps=5974.4, ups=0.09, wpb=64859, bsz=128, num_updates=17612, lr=9.98671e-05, gnorm=2.14, loss_scale=2, train_wall=10, gb_free=2.8, wall=202473
2021-06-21 02:53:30 | INFO | train_inner | epoch 006: 2708 / 3002 loss=2.535, ppl=5.8, wps=5848.6, ups=0.09, wpb=64848, bsz=128, num_updates=17613, lr=9.98671e-05, gnorm=2.503, loss_scale=2, train_wall=11, gb_free=2.8, wall=202484
2021-06-21 02:53:41 | INFO | train_inner | epoch 006: 2709 / 3002 loss=2.523, ppl=5.75, wps=5880.3, ups=0.09, wpb=64675, bsz=128, num_updates=17614, lr=9.98671e-05, gnorm=2.148, loss_scale=2, train_wall=11, gb_free=2.8, wall=202495
2021-06-21 02:53:52 | INFO | train_inner | epoch 006: 2710 / 3002 loss=2.673, ppl=6.38, wps=5856, ups=0.09, wpb=64794, bsz=128, num_updates=17615, lr=9.98671e-05, gnorm=12.997, loss_scale=2, train_wall=11, gb_free=2.8, wall=202506
2021-06-21 02:54:03 | INFO | train_inner | epoch 006: 2711 / 3002 loss=2.686, ppl=6.43, wps=5842.4, ups=0.09, wpb=64843, bsz=128, num_updates=17616, lr=9.98671e-05, gnorm=3.337, loss_scale=2, train_wall=11, gb_free=2.8, wall=202517
2021-06-21 02:54:14 | INFO | train_inner | epoch 006: 2712 / 3002 loss=2.507, ppl=5.68, wps=5952.7, ups=0.09, wpb=64902, bsz=128, num_updates=17617, lr=9.98671e-05, gnorm=2.051, loss_scale=2, train_wall=10, gb_free=2.8, wall=202528
2021-06-21 02:54:25 | INFO | train_inner | epoch 006: 2713 / 3002 loss=2.393, ppl=5.25, wps=5890, ups=0.09, wpb=64847, bsz=128, num_updates=17618, lr=9.9867e-05, gnorm=2.999, loss_scale=2, train_wall=11, gb_free=2.8, wall=202539
2021-06-21 02:54:36 | INFO | train_inner | epoch 006: 2714 / 3002 loss=2.449, ppl=5.46, wps=5800.2, ups=0.09, wpb=64775, bsz=128, num_updates=17619, lr=9.9867e-05, gnorm=2.146, loss_scale=2, train_wall=11, gb_free=2.8, wall=202550
2021-06-21 02:54:47 | INFO | train_inner | epoch 006: 2715 / 3002 loss=2.537, ppl=5.8, wps=5949.1, ups=0.09, wpb=64843, bsz=128, num_updates=17620, lr=9.9867e-05, gnorm=2.102, loss_scale=2, train_wall=10, gb_free=2.8, wall=202561
2021-06-21 02:54:58 | INFO | train_inner | epoch 006: 2716 / 3002 loss=2.527, ppl=5.76, wps=5715.4, ups=0.09, wpb=64829, bsz=128, num_updates=17621, lr=9.9867e-05, gnorm=2.236, loss_scale=2, train_wall=11, gb_free=2.8, wall=202573
2021-06-21 02:55:09 | INFO | train_inner | epoch 006: 2717 / 3002 loss=2.561, ppl=5.9, wps=5823.5, ups=0.09, wpb=64840, bsz=128, num_updates=17622, lr=9.9867e-05, gnorm=14.238, loss_scale=2, train_wall=11, gb_free=2.8, wall=202584
2021-06-21 02:55:20 | INFO | train_inner | epoch 006: 2718 / 3002 loss=2.57, ppl=5.94, wps=5819.2, ups=0.09, wpb=64804, bsz=128, num_updates=17623, lr=9.9867e-05, gnorm=2.246, loss_scale=2, train_wall=11, gb_free=2.8, wall=202595
2021-06-21 02:55:32 | INFO | train_inner | epoch 006: 2719 / 3002 loss=2.545, ppl=5.83, wps=5788, ups=0.09, wpb=64823, bsz=128, num_updates=17624, lr=9.9867e-05, gnorm=2.329, loss_scale=2, train_wall=11, gb_free=2.8, wall=202606
2021-06-21 02:55:43 | INFO | train_inner | epoch 006: 2720 / 3002 loss=2.453, ppl=5.48, wps=5860.1, ups=0.09, wpb=64859, bsz=128, num_updates=17625, lr=9.9867e-05, gnorm=1.993, loss_scale=2, train_wall=11, gb_free=2.8, wall=202617
2021-06-21 02:55:54 | INFO | train_inner | epoch 006: 2721 / 3002 loss=2.593, ppl=6.03, wps=5907.5, ups=0.09, wpb=64905, bsz=128, num_updates=17626, lr=9.9867e-05, gnorm=3.159, loss_scale=2, train_wall=11, gb_free=2.8, wall=202628
2021-06-21 02:56:05 | INFO | train_inner | epoch 006: 2722 / 3002 loss=2.477, ppl=5.57, wps=5911.3, ups=0.09, wpb=64792, bsz=128, num_updates=17627, lr=9.9867e-05, gnorm=15.99, loss_scale=2, train_wall=11, gb_free=2.8, wall=202639
2021-06-21 02:56:16 | INFO | train_inner | epoch 006: 2723 / 3002 loss=2.454, ppl=5.48, wps=5925.9, ups=0.09, wpb=64873, bsz=128, num_updates=17628, lr=9.9867e-05, gnorm=2.108, loss_scale=2, train_wall=10, gb_free=2.8, wall=202650
2021-06-21 02:56:27 | INFO | train_inner | epoch 006: 2724 / 3002 loss=2.456, ppl=5.49, wps=5833, ups=0.09, wpb=64797, bsz=128, num_updates=17629, lr=9.9867e-05, gnorm=14.405, loss_scale=2, train_wall=11, gb_free=2.8, wall=202661
2021-06-21 02:56:38 | INFO | train_inner | epoch 006: 2725 / 3002 loss=2.66, ppl=6.32, wps=5814, ups=0.09, wpb=64877, bsz=128, num_updates=17630, lr=9.98669e-05, gnorm=2.202, loss_scale=2, train_wall=11, gb_free=2.8, wall=202672
2021-06-21 02:56:49 | INFO | train_inner | epoch 006: 2726 / 3002 loss=2.453, ppl=5.47, wps=5791.9, ups=0.09, wpb=64865, bsz=128, num_updates=17631, lr=9.98669e-05, gnorm=8.327, loss_scale=2, train_wall=11, gb_free=2.8, wall=202683
2021-06-21 02:57:00 | INFO | train_inner | epoch 006: 2727 / 3002 loss=2.601, ppl=6.07, wps=5790.1, ups=0.09, wpb=64926, bsz=128, num_updates=17632, lr=9.98669e-05, gnorm=2.296, loss_scale=2, train_wall=11, gb_free=2.8, wall=202695
2021-06-21 02:57:11 | INFO | train_inner | epoch 006: 2728 / 3002 loss=2.504, ppl=5.67, wps=5853.5, ups=0.09, wpb=64897, bsz=128, num_updates=17633, lr=9.98669e-05, gnorm=2.655, loss_scale=2, train_wall=11, gb_free=2.8, wall=202706
2021-06-21 02:57:23 | INFO | train_inner | epoch 006: 2729 / 3002 loss=2.497, ppl=5.65, wps=5836.1, ups=0.09, wpb=64902, bsz=128, num_updates=17634, lr=9.98669e-05, gnorm=3.052, loss_scale=2, train_wall=11, gb_free=2.8, wall=202717
2021-06-21 02:57:34 | INFO | train_inner | epoch 006: 2730 / 3002 loss=2.441, ppl=5.43, wps=5818.3, ups=0.09, wpb=64833, bsz=128, num_updates=17635, lr=9.98669e-05, gnorm=2.187, loss_scale=2, train_wall=11, gb_free=2.8, wall=202728
2021-06-21 02:57:45 | INFO | train_inner | epoch 006: 2731 / 3002 loss=2.608, ppl=6.1, wps=5795, ups=0.09, wpb=64715, bsz=128, num_updates=17636, lr=9.98669e-05, gnorm=2.088, loss_scale=2, train_wall=11, gb_free=2.8, wall=202739
2021-06-21 02:57:56 | INFO | train_inner | epoch 006: 2732 / 3002 loss=2.492, ppl=5.62, wps=5886, ups=0.09, wpb=64846, bsz=128, num_updates=17637, lr=9.98669e-05, gnorm=2.487, loss_scale=2, train_wall=11, gb_free=2.8, wall=202750
2021-06-21 02:58:07 | INFO | train_inner | epoch 006: 2733 / 3002 loss=2.583, ppl=5.99, wps=5842.7, ups=0.09, wpb=64762, bsz=128, num_updates=17638, lr=9.98669e-05, gnorm=2.288, loss_scale=2, train_wall=11, gb_free=2.8, wall=202761
2021-06-21 02:58:18 | INFO | train_inner | epoch 006: 2734 / 3002 loss=2.636, ppl=6.21, wps=5720.4, ups=0.09, wpb=64819, bsz=128, num_updates=17639, lr=9.98669e-05, gnorm=2.751, loss_scale=2, train_wall=11, gb_free=2.8, wall=202773
2021-06-21 02:58:29 | INFO | train_inner | epoch 006: 2735 / 3002 loss=2.541, ppl=5.82, wps=5885.4, ups=0.09, wpb=64898, bsz=128, num_updates=17640, lr=9.98669e-05, gnorm=2.064, loss_scale=2, train_wall=11, gb_free=2.8, wall=202784
2021-06-21 02:58:40 | INFO | train_inner | epoch 006: 2736 / 3002 loss=2.631, ppl=6.19, wps=5951.9, ups=0.09, wpb=64799, bsz=128, num_updates=17641, lr=9.98669e-05, gnorm=2.313, loss_scale=2, train_wall=10, gb_free=2.8, wall=202795
2021-06-21 02:58:51 | INFO | train_inner | epoch 006: 2737 / 3002 loss=2.522, ppl=5.74, wps=5977.8, ups=0.09, wpb=64883, bsz=128, num_updates=17642, lr=9.98669e-05, gnorm=2.052, loss_scale=2, train_wall=10, gb_free=2.8, wall=202805
2021-06-21 02:59:02 | INFO | train_inner | epoch 006: 2738 / 3002 loss=2.489, ppl=5.61, wps=5770.2, ups=0.09, wpb=64849, bsz=128, num_updates=17643, lr=9.98668e-05, gnorm=2.058, loss_scale=2, train_wall=11, gb_free=2.8, wall=202817
2021-06-21 02:59:13 | INFO | train_inner | epoch 006: 2739 / 3002 loss=2.491, ppl=5.62, wps=5950.7, ups=0.09, wpb=64806, bsz=128, num_updates=17644, lr=9.98668e-05, gnorm=2.047, loss_scale=2, train_wall=10, gb_free=2.8, wall=202827
2021-06-21 02:59:24 | INFO | train_inner | epoch 006: 2740 / 3002 loss=2.528, ppl=5.77, wps=5804.6, ups=0.09, wpb=64728, bsz=128, num_updates=17645, lr=9.98668e-05, gnorm=5.507, loss_scale=2, train_wall=11, gb_free=2.8, wall=202839
2021-06-21 02:59:35 | INFO | train_inner | epoch 006: 2741 / 3002 loss=2.418, ppl=5.34, wps=5900.1, ups=0.09, wpb=64852, bsz=128, num_updates=17646, lr=9.98668e-05, gnorm=2.496, loss_scale=2, train_wall=11, gb_free=2.8, wall=202850
2021-06-21 02:59:46 | INFO | train_inner | epoch 006: 2742 / 3002 loss=2.552, ppl=5.87, wps=5847, ups=0.09, wpb=64471, bsz=128, num_updates=17647, lr=9.98668e-05, gnorm=2.069, loss_scale=2, train_wall=11, gb_free=2.8, wall=202861
2021-06-21 02:59:57 | INFO | train_inner | epoch 006: 2743 / 3002 loss=2.488, ppl=5.61, wps=5904.1, ups=0.09, wpb=64799, bsz=128, num_updates=17648, lr=9.98668e-05, gnorm=2.077, loss_scale=2, train_wall=11, gb_free=2.8, wall=202872